@inproceedings{DelBosqueEtAl21ReInAct,
    title = "Communicative Grounding of Analogical Explanations in Dialogue: A Corpus Study of Conversational Management Acts and Statistical Sequence Models for Tutoring through Analogy",
    author = "Del-Bosque-Trevino, Jorge  and
      Hough, Julian  and
      Purver, Matthew",
    booktitle = "Proceedings of the Conference on Reasoning and Interaction (ReInAct)",
    month = oct,
    year = "2021",
    address = "Gothenburg, Sweden",
    publisher = "Association for Computational Linguistics",
    pages = "23--31",
    isbn = "978-1-955917-07-0",
    annote = "ISBN 978-1-955917-07-0",
    url = "https://aclanthology.org/2021.reinact-1.4",
    url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/delbosque-et-al21reinact.pdf",
}
@InProceedings{GanEtAl2021ACL,
    title = "Towards Robustness of Text-to-{SQL} Models against Synonym Substitution",
    author = "Gan, Yujian  and Chen, Xinyun  and Huang, Qiuping  and Purver, Matthew  and Woodward, John R.  and  Xie, Jinxia and  Huang, Pengsheng",
    booktitle = "Proceedings of the Joint Conference of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (ACL-IJCNLP 2021)",
    month = aug,
    year = 2021,
    address = "Bangkok, Thailand",
    publisher = "Association for Computational Linguistics",
    doi = "10.18653/v1/2021.acl-long.195",
    url = "https://aclanthology.org/2021.acl-long.195.pdf",
    url = "https://arxiv.org/abs/2106.01065",
    abstract = "Recently, there has been significant progress in studying neural networks to translate text descriptions into SQL queries. Despite achieving good performance on some public benchmarks, existing text-to-SQL models typically rely on the lexical matching between words in natural language (NL) questions and tokens in table schemas, which may render the models vulnerable to attacks that break the schema linking mechanism. In this work, we investigate the robustness of text-to-SQL models to synonym substitution. In particular, we introduce Spider-Syn, a human-curated dataset based on the Spider benchmark for text-to-SQL translation. NL questions in Spider-Syn are modified from Spider, by replacing their schema-related words with manually selected synonyms that reflect real-world question paraphrases. We observe that the accuracy dramatically drops by eliminating such explicit correspondence between NL questions and table schemas, even if the synonyms are not adversarially selected to conduct worst-case adversarial attacks. Finally, we present two categories of approaches to improve the model robustness. The first category of approaches utilizes additional synonym annotations for table schemas by modifying the model input, while the second category is based on adversarial training. We demonstrate that both categories of approaches significantly outperform their counterparts without the defense, and the first category of approaches are more effective.",
}
@InProceedings{GanEtAl2021EMNLP,
    title = "Exploring Underexplored Limitations of Cross-Domain Text-to-{SQL} Generalization",
    author = "Gan, Yujian  and Chen, Xinyun  and   Purver, Matthew",
    booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing ({EMNLP})",
    month = nov,
    year = 2021,
    address = "Online and Punta Cana, Dominican Republic",
    pages = "8926--8931",
    publisher = "Association for Computational Linguistics",
    isbn = "978-1-955917-09-4",
    annote = "ISBN 978-1-955917-09-4",
    doi = "10.18653/v1/2021.emnlp-main.702",
    url = "http://doi.org/10.18653/v1/2021.emnlp-main.702",
    url = "https://aclanthology.org/2021.emnlp-main.702/",
    url = "https://arxiv.org/abs/2109.05157",
}
@InProceedings{GanEtAl2021FindingsEMNLP,
    title = "Natural {SQL}: Making {SQL} Easier to Infer from Natural Language Specifications",
    author = "Gan, Yujian  and
      Chen, Xinyun  and
      Xie, Jinxia  and
      Purver, Matthew  and
      Woodward, John R.  and
      Drake, John  and
      Zhang, Qiaofu",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
    month = nov,
    year = "2021",
    pages = "2030--2042",
    address = "Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    isbn = "978-1-955917-10-0",
    annote = "ISBN 978-1-955917-10-0",
    doi = "10.18653/v1/2021.findings-emnlp.174",
    url = "http://doi.org/10.18653/v1/2021.findings-emnlp.174",
    url = "https://aclanthology.org/2021.findings-emnlp.174",
}
@InProceedings{KaranEtAl2021SIGDIAL,
  author    = {Karan, Mladen  and  Khare, Prashant  and  Healey, Patrick  and  Purver, Matthew},
  title     = {Mitigating Topic Bias when Detecting Decisions in Dialogue},
  booktitle      = {Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue},
  month          = jul,
  year           = 2021,
  address        = {Singapore and Online},
  publisher      = {Association for Computational Linguistics},
  pages     = {542--547},
  abstract  = {This work revisits the task of detecting decision-related utterances in multi-party dialogue. We explore performance of a traditional approach and a deep learning-based approach based on transformer language models, with the latter providing modest improvements. We then analyze topic bias in the models using topic information obtained by manual annotation. Our finding is that when detecting some types of decisions in our data, models rely more on topic specific words that decisions are about rather than on words that more generally indicate decision making. We further explore this by removing topic information from the train data. We show that this resolves the bias issues to an extent and, surprisingly, sometimes even boosts performance.},
  url       = {https://aclanthology.org/2021.sigdial-1.56}
}
@InProceedings{McQuistinEtAl2021IMC,
  author = "McQuistin, Stephen and Karan, Mladen and Khare, Prashant and Perkins, Colin and Tyson, Gareth and Purver, Matthew and Healey, Patrick and Iqbal, Waleed and Qadir, Junaid and Castro, Ignacio",
  title = "Characterising the {IETF} through the Lens of {RFC} Deployment",
  year = 2021,
  booktitle = "Proceedings of the 21st {ACM} Internet Measurement Conference",
  location = "online",
  pages = "137-149",
  publisher = "Association for Computing Machinery",
  address = {New York, NY, USA},
  isbn = "9781450391290",
  doi = "10.1145/3487552.3487821",
  url = "https://doi.org/10.1145/3487552.3487821",
}
@Article{NasreenEtAl21Frontiers,
  author="Nasreen, Shamila and Rohanian, Morteza and Hough, Julian and Purver, Matthew",
  title="Alzheimer's Dementia Recognition From Spontaneous Speech Using Disfluency and Interactional Features",
  journal="Frontiers in Computer Science",
  volume=3,
  pages=49,
  year=2021,
  url="https://www.frontiersin.org/article/10.3389/fcomp.2021.640669",
  url="http://doi.org/10.3389/fcomp.2021.640669",
  doi="10.3389/fcomp.2021.640669",
  issn="2624-9898",
  annote="ISSN 2624-9898",
  abstract={Alzheimer's disease (AD) is a progressive, neurodegenerative disorder mainly characterized by memory loss with deficits in other cognitive domains, including language, visuospatial abilities, and changes in behavior. Detecting diagnostic biomarkers that are noninvasive and cost-effective is of great value not only for clinical assessments and diagnostics but also for research purposes. Several previous studies have investigated AD diagnosis via the acoustic, lexical, syntactic, and semantic aspects of speech and language. Other studies include approaches from conversation analysis that look at more interactional aspects, showing that disfluencies such as fillers and repairs, and purely nonverbal features such as inter-speaker silence, can be key features of AD conversations. These kinds of features, if useful for diagnosis, may have many advantages: They are simple to extract and relatively language-, topic-, and task-independent. This study aims to quantify the role and contribution of these features of interaction structure in predicting whether a dialogue participant has AD. We used a subset of the Carolinas Conversation Collection dataset of patients with AD at moderate stage within the age range 60–89 and similar-aged non-AD patients with other health conditions. Our feature analysis comprised two sets: disfluency features, including indicators such as self-repairs and fillers, and interactional features, including overlaps, turn-taking behavior, and distributions of different types of silence both within patient speech and between patient and interviewer speech. Statistical analysis showed significant differences between AD and non-AD groups for several disfluency features (edit terms, verbatim repeats, and substitutions) and interactional features (lapses, gaps, attributable silences, turn switches per minute, standardized phonation time, and turn length). For the classification of AD patient conversations vs. non-AD patient conversations, we achieved 83% accuracy with disfluency features, 83% accuracy with interactional features, and an overall accuracy of 90% when combining both feature sets using support vector machine classifiers. The discriminative power of these features, perhaps combined with more conventional linguistic features, therefore shows potential for integration into noninvasive clinical assessments for AD at advanced stages.}
}
@inproceedings{NasreenEtAl21Interspeech,
    title = "Detecting Alzheimer's Disease Using Interactional and Acoustic Features from Spontaneous Speech",
    author = "Nasreen, Shamila  and  Hough, Julian  and Purver, Matthew",
    booktitle = "Proceedings of {INTERSPEECH}",
    pages = "1962-1966",
    month = sep,
    year = 2021,
    address = "Brno, Czechia",
    publisher = "ISCA",
    issn = "1990-9772",
    note = "ISSN 1990-9772",
    doi = "10.21437/Interspeech.2021-1526",
    url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/nasreen-et-al21interspeech.pdf",
    url = "http://doi.org/10.21437/Interspeech.2021-1526",
}
@InProceedings{NasreenEtAl2021SIGDIAL,
  author    = {Nasreen, Shamila  and  Hough, Julian  and  Purver, Matthew},
  title     = {Rare-Class Dialogue Act Tagging for Alzheimer's Disease Diagnosis},
  booktitle      = {Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue},
  month          = jul,
  year           = 2021,
  address        = {Singapore and Online},
  publisher      = {Association for Computational Linguistics},
  pages     = {304--314},
  abstract  = {Alzheimer's Disease (AD) is associated with many characteristic changes, not only in an individual's language but also in the interactive patterns observed in dialogue. The most indicative changes of this latter kind tend to be associated with relatively rare dialogue acts (DAs), such as those involved in clarification exchanges and responses to particular kinds of questions. However, most existing work in DA tagging focuses on improving average performance, effectively prioritizing more frequent classes; it thus gives a poor performance on these rarer classes and is not suited for application to AD analysis. In this paper, we investigate tagging specifically for rare class DAs, using a hierarchical BiLSTM model with various ways of incorporating information from previous utterances and DA tags in context. We show that this can give good performance for rare DA classes on both the general Switchboard corpus (SwDA) and an AD-specific conversational dataset, the Carolinas Conversation Collection (CCC); and that the tagger outputs then contribute useful information for distinguishing patients with and without AD},
  url       = {https://aclanthology.org/2021.sigdial-1.33}
}
@InProceedings{ PeliconEtAl21EACL,
  title = "Zero-shot Cross-lingual Content Filtering: Offensive Language and Hate Speech Detection",
  author = "Pelicon, Andra{\v{z}} and Shekhar, Ravi and Martinc, Matej and {\v{S}}krlj, Bla{\v{z}} and Purver, Matthew and Pollak, Senja",
  booktitle = "Proceedings of the {EACL} Hackashop on News Media Content Analysis and Automated Report Generation",
  editor = "Toivonen, Hannu and Boggia, Michele",
  location = "Kyiv (online)",
  year = 2021,
  month = apr,
  pages = "30-34",
  isbn = "978-1-954085-13-8",
  annote = "ISBN 978-1-954085-13-8",
  url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/pelicon-et-al21eacl.pdf",
}
@Article{ PeliconEtAl21PeerJ,
  title = "Investigating Cross-lingual Training for Offensive Language Detection",
  author = "Pelicon, Andra{\v{z}} and Shekhar, Ravi and {\v{S}}krlj, Bla{\v{z}} and Purver, Matthew and Pollak, Senja",
  year = 2021,
  journal = "{PeerJ} Computer Science",
  volume = 7,
  pages = "e559",
  doi = "10.7717/peerj-cs.559",
  url = "https://doi.org/10.7717/peerj-cs.559",
}
@InProceedings{ PollakEtAl21EACL,
  title = "{EMBEDDIA} Tools, Datasets and Challenges: Resources and Hackathon Contributions",
  author = "Pollak, Senja  and Robnik-{\v{S}}ikonja, Marko  and Purver, Matthew  and Boggia, Michele  and Shekhar, Ravi  and Pranji{\'{c}}, Marko  and Salmela, Salla  and Krustok, Ivar  and Paju, Tarmo  and Linden, Carl-Gustav  and Lepp{\"{a}}nen, Leo  and Zosa, Elaine  and Ul{\v{c}}ar, Matej  and Freiental, Linda  and Traat, Silver  and Adri{\'{a}}n Cabrera-Diego, Luis  and Martinc, Matej  and Lavra{\v{c}}, Nada  and {\v{S}}krlj, Bla{\v{z}} and Martin {\v{Z}}nidar{\v{s}}i{\v{c}}  and Pelicon, Andra{\v{z}} and Koloski, Boshko  and Pode{\v{c}}an, Vid  and Kranjc, Janez  and Sheehan, Shane  and Boros, Emanuela  and Moreno, Jose  and Doucet, Antoine  and Toivonen, Hannu",
  booktitle = "Proceedings of the {EACL} Hackashop on News Media Content Analysis and Automated Report Generation",
  editor = "Toivonen, Hannu and Boggia, Michele",
  location = "Kyiv (online)",
  year = 2021,
  month = apr,
  pages = "99-109",
  isbn = "978-1-954085-13-8",
  annote = "ISBN 978-1-954085-13-8",
  url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/pollak-et-al21eacl.pdf",
}
@Article{ PurverEtAl21JOLLI,
  author    = "Purver, Matthew and Sadrzadeh, Mehrnoosh and Kempson, Ruth and Wijnholds, Gijs and Hough, Julian",
  title     = "Incremental Composition in Distributional Semantics",
  journal   = "Journal of Logic, Language and Information",
  year      = 2021,
  volume = 30,
  number = 2,
  pages = "379--406",
  month = jun,
  issn = "1572-9583",
  annote = "ISSN 1572-9583",
  doi = "10.1007/s10849-021-09337-8",
  url       = "http://doi.org/10.1007/s10849-021-09337-8",
  url       = "http://www.eecs.qmul.ac.uk/~mpurver/papers/purver-et-al21jolli.pdf",
}
@inproceedings{RohanianEtAl21Interspeech,
    title = "Alzheimer's Dementia Recognition Using Acoustic, Lexical, Disfluency and Speech Pause Features Robust to Noisy Inputs",
    author = "Rohanian, Morteza  and  Hough, Julian  and Purver, Matthew",
    booktitle = "Proceedings of {INTERSPEECH}",
    pages = "3820-3824",
    month = sep,
    year = 2021,
    address = "Brno, Czechia",
    publisher = "ISCA",
    issn = "1990-9772",
    note = "ISSN 1990-9772",
    doi = "10.21437/Interspeech.2021-1633",
    url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/rohanian-et-al21interspeech.pdf",
    url = "http://doi.org/10.21437/Interspeech.2021-1633",
}
@inproceedings{WrightPurver21ICCC,
    title = "Evaluating Natural Language Descriptions Generated in a Workspace-Based Architecture",
    author = "Wright, George  and
      Purver, Matthew",
    booktitle = "Proceedings of the 12th International Conference on Computational Creativity ({ICCC})",
    editor = "Gómez de Silva Garza, A. and Veale, T. and Aguilar, W. and Pérez y Pérez, R.",
    month = sep,
    year = "2021",
    pages = "87-91",
    address = "Online / Mexico City",
    publisher = "Association for Computational Creativity",
  isbn = "978-989-54160-3-5",
  annote = "ISBN 978-989-54160-3-5",
    url = "https://computationalcreativity.net/iccc21/wp-content/uploads/2021/09/ICCC_2021_paper_97.pdf",
    url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/wright-purver21iccc.pdf",
}
@InProceedings{WrightPurver2021STD,
  author = {George Wright and Matthew Purver},
  title = {Parsing Text in a Workspace for Language Generation},
  booktitle = "Proceedings of the 2021 Society for Text \& Discourse Annual Conference",
  location = "online",
  year = 2021,
  month = aug,
  howpublished = {EasyChair Preprint no.\ 6171},
  url = "https://easychair.org/publications/preprint/l4c9",
}
@InProceedings{ZosaEtAl2021RANLP,
  author = "Zosa, Elaine and Shekhar, Ravi and Karan, Mladen and Purver, Matthew",
  title = "Not All Comments are Equal: Insights into Comment Moderation from a Topic-Aware Model",
  booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP)",
  pages = "1652--1662",
  month = sep,
  year = 2021,
  address = "online",
  editor = "Angelova, Galia and Kunilovskaya, Maria and Mitkov, Ruslan and Nikolova-Koleva, Ivelina",
  publisher = "INCOMA Ltd.",
  isbn = "978-954-452-072-4",
  issn = "1313-8502",
  annote = "ISBN 978-954-452-072-4 / ISSN",
  doi = "10.26615/978-954-452-072-4_185",
  url = "https://doi.org/10.26615/978-954-452-072-4_185",
  url = "https://aclanthology.org/2021.ranlp-1.185",
  url = "https://ranlp.org/ranlp2021/proceedings-20Sep.pdf",
  url = "http://www.eecs.qmul.ac.uk/~mpurver/papers/zosa-et-al21ranlp.pdf",
}