Dataset Groups Activity Stream EgoSchema EgoSchema is a diagnostic benchmark for assessing very long-form video-language understanding capabilities of modern multimodal systems. BibTex: @dataset{Karttikeya_Mangalam_and_Raiymbek_Akshulakov_and_Jitendra_Malik_2024, abstract = {EgoSchema is a diagnostic benchmark for assessing very long-form video-language understanding capabilities of modern multimodal systems.}, author = {Karttikeya Mangalam and Raiymbek Akshulakov and Jitendra Malik}, doi = {10.57702/fzd4ep5q}, institution = {No Organization}, keyword = {'language understanding', 'long-form video', 'multimodal learning', 'very long-form', 'video language understanding'}, month = {dec}, publisher = {TIB}, title = {EgoSchema}, url = {https://service.tib.eu/ldmservice/dataset/egoschema}, year = {2024} }