Dataset Groups Activity Stream LRS2 The LRS2 dataset consists of 48,164 video clips from outdoor shows on BBC television. Each video is accompanied by an audio corresponding to a sentence with up to 100 characters. BibTex: @dataset{Ziqiao_Peng_and_Wentao_Hu_and_Yue_Shi_and_Xiangyu_Zhu_and_Xiaomei_Zhang_and_Hao_Zhao_and_Jun_He_and_Hongyan_Liu_and_Zhaoxin_Fan_2024, abstract = {The LRS2 dataset consists of 48,164 video clips from outdoor shows on BBC television. Each video is accompanied by an audio corresponding to a sentence with up to 100 characters.}, author = {Ziqiao Peng and Wentao Hu and Yue Shi and Xiangyu Zhu and Xiaomei Zhang and Hao Zhao and Jun He and Hongyan Liu and Zhaoxin Fan}, doi = {10.57702/o6o0hfm6}, institution = {No Organization}, keyword = {'ASR', 'AV-ASR', 'Audio-Visual Speech Recognition', 'BBC News', 'Dubbing', 'Duration Controllable', 'LRS2', 'Multimodal TTS', 'Speech Enhancement', 'Speech Recognition', 'Speech Separation', 'Text-to-Speech', 'VSR', 'Video Corpus', 'audio-visual', 'clean speech', 'conditional image generation', 'facial expressions', 'head poses', 'image manipulation', 'lip movements', 'multi-channel', 'noise', 'overlapped speech', 'overlapped-noisy-reverberant', 'single image', 'speech dereverberation', 'speech enhancement', 'speech recognition', 'speech separation', 'synchronized'}, month = {dec}, publisher = {TIB}, title = {LRS2}, url = {https://service.tib.eu/ldmservice/dataset/lrs2}, year = {2024} }