Dataset Groups Activity Stream Penn Treebank corpus The Penn Treebank corpus contains 49,208 sentences and over 1 million words, and is used to test the proposed algorithm on a real-world dataset. BibTex: @dataset{Peter_Macgregor_and_He_Sun_2025, abstract = {The Penn Treebank corpus contains 49,208 sentences and over 1 million words, and is used to test the proposed algorithm on a real-world dataset.}, author = {Peter Macgregor and He Sun}, doi = {10.57702/cbes8y63}, institution = {No Organization}, keyword = {'natural language processing', 'sentences', 'words'}, month = {jan}, publisher = {TIB}, title = {Penn Treebank corpus}, url = {https://service.tib.eu/ldmservice/dataset/penn-treebank-corpus}, year = {2025} }