Dataset Groups Activity Stream Wikipedia Image Text Wikipedia Image Text (WIT) dataset is a large-scale multimodal learning dataset used for training and evaluation of the MURAL model. BibTex: @dataset{Krishna_Srinivasan_and_Karthik_Raman_and_Jiecao_Chen_and_Michael_Bendersky_and_Marc_Najork_2024, abstract = {Wikipedia Image Text (WIT) dataset is a large-scale multimodal learning dataset used for training and evaluation of the MURAL model.}, author = {Krishna Srinivasan and Karthik Raman and Jiecao Chen and Michael Bendersky and Marc Najork}, doi = {10.57702/w1yaarhx}, institution = {No Organization}, keyword = {'Large-Scale Learning', 'Multimodal Learning', 'Wikipedia Image Text'}, month = {dec}, publisher = {TIB}, title = {Wikipedia Image Text}, url = {https://service.tib.eu/ldmservice/dataset/wikipedia-image-text}, year = {2024} }