Dataset Groups Activity Stream CleanEval CleanEval is the largest publicly available dataset for boilerplate removal. BibTex: @dataset{CleanEval_2024, abstract = {CleanEval is the largest publicly available dataset for boilerplate removal.}, author = {CleanEval}, doi = {10.57702/n8qmki4t}, institution = {No Organization}, keyword = {'HTML pages', 'boilerplate removal'}, month = {dec}, publisher = {TIB}, title = {CleanEval}, url = {https://service.tib.eu/ldmservice/dataset/cleaneval}, year = {2024} }