Dataset Groups Activity Stream ALIGN Scaling up visual and vision-language representation learning with noisy text supervision. BibTex: @dataset{Soravit_Changpinyo_and_Yinfei_Yang_and_Ye_Xia_and_Yi-Ting_Chen_and_Zarana_Parekh_and_Hieu_Pham_and_Quoc_Le_and_Yun-Hsuan_Sung_and_Zhen_Li_and_Tom_Duerig_2024, abstract = {Scaling up visual and vision-language representation learning with noisy text supervision.}, author = {Soravit Changpinyo and Yinfei Yang and Ye Xia and Yi-Ting Chen and Zarana Parekh and Hieu Pham and Quoc Le and Yun-Hsuan Sung and Zhen Li and Tom Duerig}, doi = {10.57702/1siwlt77}, institution = {No Organization}, keyword = {'noisy text supervision', 'vision-language pre-training'}, month = {dec}, publisher = {TIB}, title = {ALIGN}, url = {https://service.tib.eu/ldmservice/dataset/align}, year = {2024} }