Dataset Groups Activity Stream ReferItGame Visual grounding is the task of localizing a language query in an image. The output is often a bounding box as drawn in the yellow color. BibTex: @dataset{Zhengyuan_Yang_and_Boqing_Gong_and_Liwei_Wang_and_Wenbing_Huang_and_Dong_Yu_and_Jiebo_Luo_2024, abstract = {Visual grounding is the task of localizing a language query in an image. The output is often a bounding box as drawn in the yellow color.}, author = {Zhengyuan Yang and Boqing Gong and Liwei Wang and Wenbing Huang and Dong Yu and Jiebo Luo}, doi = {10.57702/dbq66v1j}, institution = {No Organization}, keyword = {'image annotation', 'natural language processing', 'visual grounding'}, month = {dec}, publisher = {TIB}, title = {ReferItGame}, url = {https://service.tib.eu/ldmservice/dataset/referitgame}, year = {2024} }