Dataset Groups Activity Stream Gutenberg Corpus A dataset of 2,857 books written by 141 authors, used for pre-training and fine-tuning a language model for author-stylized text generation. BibTex: @dataset{Bakhtiyar_Syed_and_Gaurav_Verma_and_Balaji_Vasan_Srinivasan_and_Anandhavelu_Natarajan_and_Vasudeva_Varma_2024, abstract = {A dataset of 2,857 books written by 141 authors, used for pre-training and fine-tuning a language model for author-stylized text generation.}, author = {Bakhtiyar Syed and Gaurav Verma and Balaji Vasan Srinivasan and Anandhavelu Natarajan and Vasudeva Varma}, doi = {10.57702/f9n0n0hu}, institution = {No Organization}, keyword = {'Authorship Analysis', 'Books', 'Corpus', 'Gutenberg Corpus', 'author-stylized text', 'language model', 'text generation'}, month = {dec}, publisher = {TIB}, title = {Gutenberg Corpus}, url = {https://service.tib.eu/ldmservice/dataset/gutenberg-corpus}, year = {2024} }