Dataset Groups Activity Stream COCO 5K The dataset used in the paper for unpaired vision-language pre-training via cross-modal CutMix. BibTex: @dataset{Teng_Wang_and_Wenhao_Jiang_and_Zhichao_Lu_and_Feng_Zheng_and_Ran_Cheng_and_Chengguo_Yin_and_Ping_Luo_2024, abstract = {The dataset used in the paper for unpaired vision-language pre-training via cross-modal CutMix.}, author = {Teng Wang and Wenhao Jiang and Zhichao Lu and Feng Zheng and Ran Cheng and Chengguo Yin and Ping Luo}, doi = {10.57702/b0ojmz96}, institution = {No Organization}, keyword = {'image retrieval', 'unpaired learning', 'vision-language pre-training'}, month = {dec}, publisher = {TIB}, title = {COCO 5K}, url = {https://service.tib.eu/ldmservice/dataset/coco-5k}, year = {2024} }