Dataset Groups Activity Stream Voxceleb2 The Voxceleb2 dataset is a large-scale speaker recognition dataset, containing 2442 hours raw speech from 6112 speakers. BibTex: @dataset{J_S_Chung_and_A_Nagrani_and_A_Zisserman_2025, abstract = {The Voxceleb2 dataset is a large-scale speaker recognition dataset, containing 2442 hours raw speech from 6112 speakers.}, author = {J. S. Chung and A. Nagrani and A. Zisserman}, doi = {10.57702/otue2x0v}, institution = {No Organization}, keyword = {'Audio', 'Audio Dataset', 'Audio-Visual', 'Face', 'Speaker Recognition', 'Speech', 'Speech Separation', 'Visual', 'Voxceleb2', 'audio-visual speech recognition', 'speaker recognition'}, month = {jan}, publisher = {TIB}, title = {Voxceleb2}, url = {https://service.tib.eu/ldmservice/dataset/voxceleb2}, year = {2025} }