Dataset Groups Activity Stream Video-LLaVA Video-LLaVA: Learning united visual representation by alignment before projection. BibTex: @dataset{Jiaqi_Xu_and_Cuiling_Lan_and_Wenxuan_Xie_and_Xuejin_Chen_and_Yan_Lu_2024, abstract = {Video-LLaVA: Learning united visual representation by alignment before projection.}, author = {Jiaqi Xu and Cuiling Lan and Wenxuan Xie and Xuejin Chen and Yan Lu}, doi = {10.57702/12swgcvf}, institution = {No Organization}, keyword = {'united visual representation', 'video understanding'}, month = {dec}, publisher = {TIB}, title = {Video-LLaVA}, url = {https://service.tib.eu/ldmservice/dataset/video-llava}, year = {2024} }