Dataset Groups Activity Stream VGDiffZero: Text-to-Image Diffusion Models Can Be Zero-Shot Visual Grounders VGDiffZero is a zero-shot visual grounding framework that leverages pre-trained text-to-image diffusion models' vision-language alignment abilities. BibTex: @dataset{Xuyang_Liu_and_Siteng_Huang_and_Yachen_Kang_and_Honggang_Chen_and_Donglin_Wang_2024, abstract = {VGDiffZero is a zero-shot visual grounding framework that leverages pre-trained text-to-image diffusion models' vision-language alignment abilities.}, author = {Xuyang Liu and Siteng Huang and Yachen Kang and Honggang Chen and Donglin Wang}, doi = {10.57702/5c7ldj53}, institution = {No Organization}, keyword = {'text-to-image diffusion models', 'visual grounding', 'zero-shot learning'}, month = {dec}, publisher = {TIB}, title = {VGDiffZero: Text-to-Image Diffusion Models Can Be Zero-Shot Visual Grounders}, url = {https://service.tib.eu/ldmservice/dataset/vgdiffzero--text-to-image-diffusion-models-can-be-zero-shot-visual-grounders}, year = {2024} }