Dataset Groups Activity Stream LibriLight: A Benchmark for ASR with Limited or No Supervision The LibriLight dataset is a large-scale speech corpus used for self-supervised speech recognition tasks. BibTex: @dataset{Alexei_Baevski_and_Henry_Zhou_and_Abdelrahman_Mohamed_and_Michael_Auli_2025, abstract = {The LibriLight dataset is a large-scale speech corpus used for self-supervised speech recognition tasks.}, author = {Alexei Baevski and Henry Zhou and Abdelrahman Mohamed and Michael Auli}, doi = {10.57702/1y5slbfc}, institution = {No Organization}, keyword = {'LibriLight', 'Self-Supervised Learning', 'Speech Recognition'}, month = {jan}, publisher = {TIB}, title = {LibriLight: A Benchmark for ASR with Limited or No Supervision}, url = {https://service.tib.eu/ldmservice/dataset/librilight--a-benchmark-for-asr-with-limited-or-no-supervision}, year = {2025} }