@InProceedings{ ArmendarizEtAl20SemEval,
    title = "{S}em{E}val-2020 Task 3: Graded Word Similarity in Context",
    author = "Armendariz, Carlos Santos  and
      Purver, Matthew  and
      Pollak, Senja  and
      Ljube{\v{s}}i{\'c}, Nikola  and
      Ul{\v{c}}ar, Matej  and
      Vuli{\'c}, Ivan  and
      Pilehvar, Mohammad Taher",
    booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
    pages = "36--49",
    month = dec,
    year = "2020",
    address = "Barcelona (online)",
    publisher = "International Committee for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.semeval-1.3",
    url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/armendariz-et-al20semeval.pdf",
    annote = "ISBN 978-1-952148-31-6",
    isbn = "978-1-952148-31-6",
    abstract = "This paper presents the Graded Word Similarity in Context (GWSC) task which asked participants to predict the effects of context on human perception of similarity in English, Croatian, Slovene and Finnish. We received 15 submissions and 11 system description papers. A new dataset (CoSimLex) was created for evaluation in this task: it contains pairs of words, each annotated within two different contexts. Systems beat the baselines by significant margins, but few did well in more than one language or subtask. Almost every system employed a Transformer model, but with many variations in the details: WordNet sense embeddings, translation of contexts, TF-IDF weightings, and the automatic creation of datasets for fine-tuning were all used to good effect.",
}