Dataset Groups Activity Stream Corpora Generation for Grammatical Error Correction Two approaches for generating large parallel datasets for Grammatical Error Correction (GEC) using publicly available Wikipedia data. BibTex: @dataset{Jared_Lichtarge_and_Chris_Alberti_and_Shankar_Kumar_and_Noam_Shazeer_and_Niki_Parmar_and_Simon_Tong_2025, abstract = {Two approaches for generating large parallel datasets for Grammatical Error Correction (GEC) using publicly available Wikipedia data.}, author = {Jared Lichtarge and Chris Alberti and Shankar Kumar and Noam Shazeer and Niki Parmar and Simon Tong}, doi = {10.57702/h484rdy9}, institution = {No Organization}, keyword = {'Grammatical Error Correction', 'Machine Learning', 'Natural Language Processing', 'Wikipedia'}, month = {jan}, publisher = {TIB}, title = {Corpora Generation for Grammatical Error Correction}, url = {https://service.tib.eu/ldmservice/dataset/corpora-generation-for-grammatical-error-correction}, year = {2025} }