Dataset Groups Activity Stream Enwik8 The Enwik8 dataset is a large-scale language modeling dataset. BibTex: @dataset{Tomas_Mikolov_and_Martin_Karafi´at_and_Luk´as_Burget_and_Jan_Cernock´y_and_Sanjeev_Khudanpur_2024, abstract = {The Enwik8 dataset is a large-scale language modeling dataset.}, author = {Tomas Mikolov and Martin Karafi´at and Luk´as Burget and Jan Cernock´y and Sanjeev Khudanpur}, doi = {10.57702/yr8b2f73}, institution = {No Organization}, keyword = {'LSTM', 'Language Modeling', 'Regularization'}, month = {dec}, publisher = {TIB}, title = {Enwik8}, url = {https://service.tib.eu/ldmservice/dataset/enwik8}, year = {2024} }