Dataset Groups Activity Stream Fine-tuned CLIP Models are Efficient Video Learners This work explores the capability of a simple baseline called ViFi-CLIP (Video Fine-tuned CLIP) for adapting image-based CLIP to video domain. BibTex: @dataset{Hanoona_Rasheed_and_Muhammad_Uzair_Khattak_and_Salman_Khan_and_Fahad_Shahbaz_Khan_2024, abstract = {This work explores the capability of a simple baseline called ViFi-CLIP (Video Fine-tuned CLIP) for adapting image-based CLIP to video domain.}, author = {Hanoona Rasheed and Muhammad Uzair Khattak and Salman Khan and Fahad Shahbaz Khan}, doi = {10.57702/nuu1d0jy}, institution = {No Organization}, keyword = {'CLIP', 'fine-tuning', 'video action recognition'}, month = {dec}, publisher = {TIB}, title = {Fine-tuned CLIP Models are Efficient Video Learners}, url = {https://service.tib.eu/ldmservice/dataset/fine-tuned-clip-models-are-efficient-video-learners}, year = {2024} }