Dataset Groups Activity Stream Clotho Automated audio captioning is a cross-modal translation task for describing the content of audio clips with natural language sentences. BibTex: @dataset{Arvind_Krishna_Sridhar_and_Yinyi_Guo_and_Erik_Visser_and_Rehana_Mahfuz_2024, abstract = {Automated audio captioning is a cross-modal translation task for describing the content of audio clips with natural language sentences.}, author = {Arvind Krishna Sridhar and Yinyi Guo and Erik Visser and Rehana Mahfuz}, doi = {10.57702/c1snqbd4}, institution = {No Organization}, keyword = {'Clotho', 'FreeSound', 'Freesound', 'audio', 'audio captioning', 'audio clips', 'audio dataset', 'captioning', 'cross-modal translation', 'dataset', 'music information retrieval', 'natural language sentences', 'sound clips'}, month = {nov}, publisher = {TIB}, title = {Clotho}, url = {https://service.tib.eu/ldmservice/dataset/clotho}, year = {2024} }