Dataset Groups Activity Stream Conceptual Captions 12M and RedCaps The dataset used in the paper is Conceptual Captions 12M (CC12M) and RedCaps. BibTex: @dataset{Soravit_Changpinyo_and_Piyush_Sharma_and_Nan_Ding_and_Radu_Soricut_2024, abstract = {The dataset used in the paper is Conceptual Captions 12M (CC12M) and RedCaps.}, author = {Soravit Changpinyo and Piyush Sharma and Nan Ding and Radu Soricut}, doi = {10.57702/pf4d0kkm}, institution = {No Organization}, keyword = {'dataset', 'image captioning', 'image-text pairs', 'vision-language models'}, month = {dec}, publisher = {TIB}, title = {Conceptual Captions 12M and RedCaps}, url = {https://service.tib.eu/ldmservice/dataset/conceptual-captions-12m-and-redcaps}, year = {2024} }