Dataset Groups Activity Stream RefCOCO, RefCOCO+, and RefCOCOg Visual Grounding is a task that aims to locate a target object according to a natural language expression. The dataset used in this paper is RefCOCO, RefCOCO+, and RefCOCOg. BibTex: @dataset{Yucheng_Suo_and_Linchao_Zhu_and_Yi_Yang_2024, abstract = {Visual Grounding is a task that aims to locate a target object according to a natural language expression. The dataset used in this paper is RefCOCO, RefCOCO+, and RefCOCOg.}, author = {Yucheng Suo and Linchao Zhu and Yi Yang}, doi = {10.57702/a6ushrmc}, institution = {No Organization}, keyword = {'Image Captioning', 'Image-Text Matching', 'Referring Expression Comprehension', 'Referring Image Segmentation', 'Visual Grounding', 'Zero-shot Segmentation'}, month = {dec}, publisher = {TIB}, title = {RefCOCO, RefCOCO+, and RefCOCOg}, url = {https://service.tib.eu/ldmservice/dataset/refcoco--refcoco---and-refcocog}, year = {2024} }