Dataset Groups Activity Stream C4 dataset The dataset used in the paper is not explicitly mentioned, but it is mentioned that the authors trained a GPT2 transformer language model on the C4 dataset. BibTex: @dataset{Ziyi_Guan_and_Hantao_Huang_and_Yupeng_Su_and_Hong_Huang_and_Ngai_Wong_and_Hao_Yu_2024, abstract = {The dataset used in the paper is not explicitly mentioned, but it is mentioned that the authors trained a GPT2 transformer language model on the C4 dataset.}, author = {Ziyi Guan and Hantao Huang and Yupeng Su and Hong Huang and Ngai Wong and Hao Yu}, doi = {10.57702/bsbjlzeg}, institution = {No Organization}, keyword = {'C4 dataset', 'GPT2', 'Large Language Models', 'news articles', 'text classification', 'transformer language model'}, month = {dec}, publisher = {TIB}, title = {C4 dataset}, url = {https://service.tib.eu/ldmservice/dataset/c4-dataset}, year = {2024} }