Dataset Groups Activity Stream VQA The VQA dataset is a large-scale visual question answering dataset that consists of pairs of images that require natural language answers. BibTex: @dataset{Yash_Goyal_and_Tejas_Khot_and_Douglas_Summers-Stay_and_Dhruv_Batra_and_Devi_Parikh_2024, abstract = {The VQA dataset is a large-scale visual question answering dataset that consists of pairs of images that require natural language answers.}, author = {Yash Goyal and Tejas Khot and Douglas Summers-Stay and Dhruv Batra and Devi Parikh}, doi = {10.57702/fjcazpne}, institution = {No Organization}, keyword = {'Image', 'Multimodal', 'Question Answering', 'Representation Learning', 'Semantic Reasoning', 'Text', 'VQA', 'Visual Question Answering', 'consistency', 'image captioning', 'image classification', 'natural language processing', 'question answering', 'visual question answering'}, month = {dec}, publisher = {TIB}, title = {VQA}, url = {https://service.tib.eu/ldmservice/dataset/vqa}, year = {2024} }