Dataset Groups Activity Stream EPIC: Leveraging Per Image-Token Consistency for Vision-Language Pre-training The proposed EPIC method is a pre-training approach that leverages more text tokens for learning vision-language associations. BibTex: @dataset{Yunhao_Gou_and_Tom_Ko_and_Hansi_Yang_and_Mingxuan_Wang_and_James_Kwok_and_Yu_Zhang_2024, abstract = {The proposed EPIC method is a pre-training approach that leverages more text tokens for learning vision-language associations.}, author = {Yunhao Gou and Tom Ko and Hansi Yang and Mingxuan Wang and James Kwok and Yu Zhang}, doi = {10.57702/4s3ecd7l}, institution = {No Organization}, keyword = {'cross-modal learning', 'image-text pairs', 'vision-language pre-training'}, month = {dec}, publisher = {TIB}, title = {EPIC: Leveraging Per Image-Token Consistency for Vision-Language Pre-training}, url = {https://service.tib.eu/ldmservice/dataset/epic--leveraging-per-image-token-consistency-for-vision-language-pre-training}, year = {2024} }