Dataset Groups Activity Stream OpenWebTextCorpus The OpenWebText corpus is a collection of text data from the web. BibTex: @dataset{Wes_Gurnee_and_Neel_Nanda_and_Matthew_Pauly_and_Katherine_Harvey_and_Dmitrii_Troitskii_and_Dimitris_Bertsimas_2024, abstract = {The OpenWebText corpus is a collection of text data from the web.}, author = {Wes Gurnee and Neel Nanda and Matthew Pauly and Katherine Harvey and Dmitrii Troitskii and Dimitris Bertsimas}, doi = {10.57702/hcb1yh2b}, institution = {No Organization}, keyword = {'openwebtext', 'text analysis', 'web scraping'}, month = {dec}, publisher = {TIB}, title = {OpenWebTextCorpus}, url = {https://service.tib.eu/ldmservice/dataset/openwebtextcorpus}, year = {2024} }