Dataset Groups Activity Stream LM-Extraction benchmark The LM-Extraction benchmark is derived from The Pile (Gao et al., 2020) dataset, which contains 15,000 pairs of prefixes and suffixes derived from The Pile dataset (Gao et al., 2020). BibTex: @dataset{Zhexin_Zhang_and_Jiaxin_Wen_and_Minlie_Huang_2024, abstract = {The LM-Extraction benchmark is derived from The Pile (Gao et al., 2020) dataset, which contains 15,000 pairs of prefixes and suffixes derived from The Pile dataset (Gao et al., 2020).}, author = {Zhexin Zhang and Jiaxin Wen and Minlie Huang}, doi = {10.57702/oyk61fti}, institution = {No Organization}, keyword = {'language model', 'named entity recognition', 'text classification'}, month = {dec}, publisher = {TIB}, title = {LM-Extraction benchmark}, url = {https://service.tib.eu/ldmservice/dataset/lm-extraction-benchmark}, year = {2024} }