Dataset Groups Activity Stream Conceptual Captions 3M The Conceptual Captions 3M dataset is a large-scale image-text dataset used for vision-language pre-training. BibTex: @dataset{Yunhao_Gou_and_Tom_Ko_and_Hansi_Yang_and_Mingxuan_Wang_and_James_Kwok_and_Yu_Zhang_2024, abstract = {The Conceptual Captions 3M dataset is a large-scale image-text dataset used for vision-language pre-training.}, author = {Yunhao Gou and Tom Ko and Hansi Yang and Mingxuan Wang and James Kwok and Yu Zhang}, doi = {10.57702/jq7xdpir}, institution = {No Organization}, keyword = {'image captioning', 'image-text pairs', 'text-image pairs', 'unpaired learning', 'vision-language pre-training'}, month = {dec}, publisher = {TIB}, title = {Conceptual Captions 3M}, url = {https://service.tib.eu/ldmservice/dataset/conceptual-captions-3m}, year = {2024} }