Dataset Groups Activity Stream InstructBLIP The InstructBLIP dataset is a vision-language model for comprehensive scene understanding and textual descriptions. BibTex: @dataset{Wenliang_Dai_and_Junnan_Li_and_Dongxu_Li_and_Anthony_Meng_Huat_Tiong_2024, abstract = {The InstructBLIP dataset is a vision-language model for comprehensive scene understanding and textual descriptions.}, author = {Wenliang Dai and Junnan Li and Dongxu Li and Anthony Meng Huat Tiong}, doi = {10.57702/euv6djqk}, institution = {No Organization}, keyword = {'blindness', 'instruction tuning', 'low vision', 'multimodal question answering', 'vision-language inference', 'vision-language models'}, month = {dec}, publisher = {TIB}, title = {InstructBLIP}, url = {https://service.tib.eu/ldmservice/dataset/instructblip}, year = {2024} }