Dataset Groups Activity Stream MARIO-10M The MARIO-10M dataset is a collection of about 10 million high-quality and diverse image-text pairs from various data sources such as natural images, posters, and book covers. BibTex: @dataset{Jingye_Chen_and_Yupan_Huang_and_Tengchao_Lv_and_Lei_Cui_and_Qifeng_Chen_and_Furu_Wei_2024, abstract = {The MARIO-10M dataset is a collection of about 10 million high-quality and diverse image-text pairs from various data sources such as natural images, posters, and book covers.}, author = {Jingye Chen and Yupan Huang and Tengchao Lv and Lei Cui and Qifeng Chen and Furu Wei}, doi = {10.57702/8sua297g}, institution = {No Organization}, keyword = {'book covers', 'image-text pairs', 'natural images', 'posters'}, month = {dec}, publisher = {TIB}, title = {MARIO-10M}, url = {https://service.tib.eu/ldmservice/dataset/mario-10m}, year = {2024} }