Dataset Groups Activity Stream Conceptual Captions 12M The Conceptual Captions 12M (CC-12M) dataset consists of 12 million diverse and high-quality images paired with descriptive captions and titles. BibTex: @dataset{Liangliang_Cao_and_Bowen_Zhang_and_Chen_Chen_and_Yinfei_Yang_and_Xianzhi_Du_and_Wencong_Zhang_and_Zhiyun_Lu_and_Yantao_Zheng_2024, abstract = {The Conceptual Captions 12M (CC-12M) dataset consists of 12 million diverse and high-quality images paired with descriptive captions and titles.}, author = {Liangliang Cao and Bowen Zhang and Chen Chen and Yinfei Yang and Xianzhi Du and Wencong Zhang and Zhiyun Lu and Yantao Zheng}, doi = {10.57702/vsbq74bb}, institution = {No Organization}, keyword = {'CC-12M', 'Image Captioning', 'Image-Text Data', 'Large-Scale Dataset', 'image captioning', 'large-scale image-text pairs', 'visual question answering'}, month = {dec}, publisher = {TIB}, title = {Conceptual Captions 12M}, url = {https://service.tib.eu/ldmservice/dataset/conceptual-captions-12m}, year = {2024} }