Dataset Groups Activity Stream CC12M dataset CC12M dataset is used for training and testing the proposed method. It contains 12 million images with 12 million captions. BibTex: @dataset{Zheng_Ma_and_Changxin_Wang_and_Yawen_Ouyang_and_Fei_Zhao_and_Jianbing_Zhang_and_Shujian_Huang_and_Jiajun_Chen_2024, abstract = {CC12M dataset is used for training and testing the proposed method. It contains 12 million images with 12 million captions.}, author = {Zheng Ma and Changxin Wang and Yawen Ouyang and Fei Zhao and Jianbing Zhang and Shujian Huang and Jiajun Chen}, doi = {10.57702/p4296kqf}, institution = {No Organization}, keyword = {'CC12M', 'dataset', 'image captioning'}, month = {dec}, publisher = {TIB}, title = {CC12M dataset}, url = {https://service.tib.eu/ldmservice/dataset/cc12m-dataset}, year = {2024} }