@inproceedings{GhinassiEtAl2023ICMR, author = {Ghinassi, Iacopo and Wang, Lin and Newell, Chris and Purver, Matthew}, title = {Multimodal Topic Segmentation of Podcast Shows with Pre-Trained Neural Encoders}, year = 2023, isbn = {9798400701788}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3591106.3592270}, doi = {10.1145/3591106.3592270}, abstract = {We present two multimodal models for topic segmentation of podcasts built on pre-trained neural text and audio embeddings. We show that results can be improved by combining different modalities; but also by combining different encoders from the same modality, especially general-purpose sentence embeddings with specifically fine-tuned ones. We also show that audio embeddings can be substituted with two simple features related to sentence duration and inter-sentential pauses with comparable results. Finally, we publicly release our two datasets, the first in our knowledge publicly and freely available multimodal datasets for topic segmentation.}, booktitle = {Proceedings of the 2023 ACM International Conference on Multimedia Retrieval}, pages = {602–606}, numpages = {5}, keywords = {multi-modal, topic segmentation}, location = {Thessaloniki, Greece}, series = {ICMR '23} }