Dataset Groups Activity Stream Multimodal Visual Patterns (MMVP) Benchmark The Multimodal Visual Patterns (MMVP) benchmark is a dataset used to evaluate the visual question answering capabilities of multimodal large language models (MLLMs). BibTex: @dataset{Shengbang_Tong_and_Yi_Ma_and_Zhuang_Liu_and_Yann_LeCun_and_Yuexiang_Zhai_and_Saining_Xie_2024, abstract = {The Multimodal Visual Patterns (MMVP) benchmark is a dataset used to evaluate the visual question answering capabilities of multimodal large language models (MLLMs).}, author = {Shengbang Tong and Yi Ma and Zhuang Liu and Yann LeCun and Yuexiang Zhai and Saining Xie}, doi = {10.57702/7mfkee4u}, institution = {No Organization}, keyword = {'large language models', 'multimodal learning', 'visual question answering'}, month = {dec}, publisher = {TIB}, title = {Multimodal Visual Patterns (MMVP) Benchmark}, url = {https://service.tib.eu/ldmservice/dataset/multimodal-visual-patterns--mmvp--benchmark}, year = {2024} }