Dataset Groups Activity Stream CC-3M CC-3M is a large-scale dataset of 300,000 image-caption pairs. BibTex: @dataset{Jaisidh_Singh_and_Ishaan_Shrivastava_and_Mayank_Vatsa_and_Richa_Singh_and_Aparna_Bharati_2024, abstract = {CC-3M is a large-scale dataset of 300,000 image-caption pairs.}, author = {Jaisidh Singh and Ishaan Shrivastava and Mayank Vatsa and Richa Singh and Aparna Bharati}, doi = {10.57702/fx4p8xy5}, institution = {No Organization}, keyword = {'dataset', 'image-caption', 'vision-language models'}, month = {dec}, publisher = {TIB}, title = {CC-3M}, url = {https://service.tib.eu/ldmservice/dataset/cc-3m}, year = {2024} }