Dataset Groups Activity Stream LongPile LongPile is a diverse dataset derived from the Pile corpus. BibTex: @dataset{Ali_Safaya_and_Deniz_Yuret_2024, abstract = {LongPile is a diverse dataset derived from the Pile corpus.}, author = {Ali Safaya and Deniz Yuret}, doi = {10.57702/lz6t9hu0}, institution = {No Organization}, keyword = {'Language Models', 'Natural Language Processing'}, month = {dec}, publisher = {TIB}, title = {LongPile}, url = {https://service.tib.eu/ldmservice/dataset/longpile}, year = {2024} }