Dataset Groups Activity Stream Flickr30k entities: Collecting region-to-phrase correspondences for richer image-to-sentence models A dataset for multimodal learning tasks, focusing on region-to-phrase correspondences for image-to-sentence models. BibTex: @dataset{Bryan_A_Plummer_and_Liwei_Wang_and_Chris_M_Cervantes_and_Juan_C_Caicedo_and_Julia_Hockenmaier_and_Svetlana_Lazebnik_2024, abstract = {A dataset for multimodal learning tasks, focusing on region-to-phrase correspondences for image-to-sentence models.}, author = {Bryan A. Plummer and Liwei Wang and Chris M. Cervantes and Juan C. Caicedo and Julia Hockenmaier and Svetlana Lazebnik}, doi = {10.57702/xtar17ow}, institution = {No Organization}, keyword = {'image-to-sentence models', 'multimodal learning', 'region-to-phrase correspondences'}, month = {dec}, publisher = {TIB}, title = {Flickr30k entities: Collecting region-to-phrase correspondences for richer image-to-sentence models}, url = {https://service.tib.eu/ldmservice/dataset/flickr30k-entities--collecting-region-to-phrase-correspondences-for-richer-image-to-sentence-models}, year = {2024} }