Dataset Groups Activity Stream Clotho v2 Automated audio captioning is a cross-modal translation task for describing the content of audio clips with natural language sentences. BibTex: @dataset{Xinhao_Mei_and_Xubo_Liu_and_Jianyuan_Sun_and_Mark_D_Plumbley_and_Wenwu_Wang_2024, abstract = {Automated audio captioning is a cross-modal translation task for describing the content of audio clips with natural language sentences.}, author = {Xinhao Mei and Xubo Liu and Jianyuan Sun and Mark D. Plumbley and Wenwu Wang}, doi = {10.57702/rv90u3ey}, institution = {No Organization}, keyword = {'audio captioning', 'cross-modal translation', 'natural language sentences'}, month = {dec}, publisher = {TIB}, title = {Clotho v2}, url = {https://service.tib.eu/ldmservice/dataset/clotho-v2}, year = {2024} }