Dataset Groups Activity Stream Mutan: Multimodal Tucker Fusion for Visual Question Answering The dataset used in the paper is a collection of images and corresponding referring expressions. BibTex: @dataset{Hedi_Ben-Younes_and_R´emi_Cadene_and_Matthieu_Cord_and_Nicolas_Thome_2024, abstract = {The dataset used in the paper is a collection of images and corresponding referring expressions.}, author = {Hedi Ben-Younes and R´emi Cadene and Matthieu Cord and Nicolas Thome}, doi = {10.57702/rzhvbow6}, institution = {No Organization}, keyword = {'multimodal fusion', 'visual question answering'}, month = {dec}, publisher = {TIB}, title = {Mutan: Multimodal Tucker Fusion for Visual Question Answering}, url = {https://service.tib.eu/ldmservice/dataset/mutan--multimodal-tucker-fusion-for-visual-question-answering}, year = {2024} }