Dataset Groups Activity Stream When and why Vision-Language Models behave like Bags-of-Words, and what to do about it? When and why Vision-Language Models behave like Bags-of-Words, and what to do about it? BibTex: @dataset{Mert_Yuksekgonul_and_Federico_Bianchi_and_Pratyusha_Kalluri_and_Dan_Jurafsky_and_James_Zou_2024, abstract = {When and why Vision-Language Models behave like Bags-of-Words, and what to do about it?}, author = {Mert Yuksekgonul and Federico Bianchi and Pratyusha Kalluri and Dan Jurafsky and James Zou}, doi = {10.57702/pxdwqv15}, institution = {No Organization}, keyword = {'Bags-of-Words', 'Vision-Language Models'}, month = {dec}, publisher = {TIB}, title = {When and why Vision-Language Models behave like Bags-of-Words, and what to do about it?}, url = {https://service.tib.eu/ldmservice/dataset/when-and-why-vision-language-models-behave-like-bags-of-words--and-what-to-do-about-it-}, year = {2024} }