Dataset Groups Activity Stream WanJuan: A Comprehensive Multimodal Dataset for Advancing English and Chinese Large Models WanJuan: A comprehensive multimodal dataset for advancing English and Chinese large models. BibTex: @dataset{Conghui_He_and_Wei_Li_and_Zhenjiang_Jin_and_Chao_Xu_and_Bin_Wang_and_Dahua_Lin_2024, abstract = {WanJuan: A comprehensive multimodal dataset for advancing English and Chinese large models.}, author = {Conghui He and Wei Li and Zhenjiang Jin and Chao Xu and Bin Wang and Dahua Lin}, doi = {10.57702/eh9ai9h5}, institution = {No Organization}, keyword = {'Large Models', 'Multimodal Learning', 'Natural Language Processing'}, month = {dec}, publisher = {TIB}, title = {WanJuan: A Comprehensive Multimodal Dataset for Advancing English and Chinese Large Models}, url = {https://service.tib.eu/ldmservice/dataset/wanjuan--a-comprehensive-multimodal-dataset-for-advancing-english-and-chinese-large-models}, year = {2024} }