Dataset Groups Activity Stream VLMixer: Unpaired Vision-Language Pre-training via Cross-Modal CutMix Unpaired vision-language pre-training via cross-modal CutMix. BibTex: @dataset{T_Wang_and_W_Jiang_and_Z_Lu_and_F_Zheng_and_R_Cheng_and_C_Yin_and_P_Luo_2024, abstract = {Unpaired vision-language pre-training via cross-modal CutMix.}, author = {T. Wang and W. Jiang and Z. Lu and F. Zheng and R. Cheng and C. Yin and P. Luo}, doi = {10.57702/nv69pofd}, institution = {No Organization}, keyword = {'VLMixer', 'multimodal learning'}, month = {dec}, publisher = {TIB}, title = {VLMixer: Unpaired Vision-Language Pre-training via Cross-Modal CutMix}, url = {https://service.tib.eu/ldmservice/dataset/vlmixer--unpaired-vision-language-pre-training-via-cross-modal-cutmix}, year = {2024} }