@inproceedings{GhinassiEtAl2023ICMR,
author = {Ghinassi, Iacopo and Wang, Lin and Newell, Chris and Purver, Matthew},
title = {Multimodal Topic Segmentation of Podcast Shows with Pre-Trained Neural Encoders},
year = 2023,
isbn = {9798400701788},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3591106.3592270},
doi = {10.1145/3591106.3592270},
abstract = {We present two multimodal models for topic segmentation of podcasts built on pre-trained neural text and audio embeddings. We show that results can be improved by combining different modalities; but also by combining different encoders from the same modality, especially general-purpose sentence embeddings with specifically fine-tuned ones. We also show that audio embeddings can be substituted with two simple features related to sentence duration and inter-sentential pauses with comparable results. Finally, we publicly release our two datasets, the first in our knowledge publicly and freely available multimodal datasets for topic segmentation.},
booktitle = {Proceedings of the 2023 ACM International Conference on Multimedia Retrieval},
pages = {602–606},
numpages = {5},
keywords = {multi-modal, topic segmentation},
location = {Thessaloniki, Greece},
series = {ICMR '23}
}