Dataset Groups Activity Stream CC14M Large-scale image-text dataset for pre-training a collaborative two-stream vision-language model for cross-modal retrieval. BibTex: @dataset{Haoyu_Lu_and_Nanyi_Fei_and_Yuqi_Huo_and_Yizhao_Gao_and_Zhiwu_Lu_2024, abstract = {Large-scale image-text dataset for pre-training a collaborative two-stream vision-language model for cross-modal retrieval.}, author = {Haoyu Lu and Nanyi Fei and Yuqi Huo and Yizhao Gao and Zhiwu Lu}, doi = {10.57702/eo9lstlo}, institution = {No Organization}, keyword = {'cross-modal retrieval', 'image-text', 'pre-training'}, month = {dec}, publisher = {TIB}, title = {CC14M}, url = {https://service.tib.eu/ldmservice/dataset/cc14m}, year = {2024} }