Dataset Groups Activity Stream VATEX The dataset used in the paper is a video question answering dataset, which is a large-scale video-language pre-training task. BibTex: @dataset{V_Gabeur_and_S_K_Gorti_and_N_Vouitsis_and_J_Ma_and_G_Golestan_and_M_Volkovs_and_A_Garg_and_G_Yu_2024, abstract = {The dataset used in the paper is a video question answering dataset, which is a large-scale video-language pre-training task.}, author = {V. Gabeur and S. K. Gorti and N. Vouitsis and J. Ma and G. Golestan and M. Volkovs and A. Garg and G. Yu}, doi = {10.57702/r7bbm16b}, institution = {No Organization}, keyword = {'Large-scale', 'Multi-modal Transformer', 'Multilingual', 'Retrieval', 'Video Captioning', 'Video Retrieval', 'Video-Text', 'activity recognition', 'temporal modeling', 'video question answering'}, month = {dec}, publisher = {TIB}, title = {VATEX}, url = {https://service.tib.eu/ldmservice/dataset/vatex}, year = {2024} }