Dataset Groups Activity Stream SBU Captions The SBU Captions dataset is a large-scale image-text dataset used for vision-language pre-training. BibTex: @dataset{Sunan_He_and_Taian_Guo_and_Tao_Dai_and_Ruizhi_Qiao_and_Chen_Wu_and_Xiujun_Shu_and_Bo_Ren_2024, abstract = {The SBU Captions dataset is a large-scale image-text dataset used for vision-language pre-training.}, author = {Sunan He and Taian Guo and Tao Dai and Ruizhi Qiao and Chen Wu and Xiujun Shu and Bo Ren}, doi = {10.57702/hc8227lz}, institution = {No Organization}, keyword = {'dataset', 'image-text pairs', 'image-text retrieval', 'vision and language', 'vision-language pre-training', 'visual question answering'}, month = {dec}, publisher = {TIB}, title = {SBU Captions}, url = {https://service.tib.eu/ldmservice/dataset/sbu-captions}, year = {2024} }