Dataset Groups Activity Stream TL;DR Reddit corpus The TL;DR Reddit corpus consists of approximately 3 million content-summary pairs mined from Reddit, designed for the TL;DR challenge focusing on text summarization. BibTex: @dataset{Michael_Völske_and_Martin_Potthast_and_Shahbaz_Syed_and_Benno_Stein_2024, abstract = {The TL;DR Reddit corpus consists of approximately 3 million content-summary pairs mined from Reddit, designed for the TL;DR challenge focusing on text summarization.}, author = {Michael Völske and Martin Potthast and Shahbaz Syed and Benno Stein}, doi = {10.57702/2heldboi}, institution = {No Organization}, month = {nov}, publisher = {TIB}, title = {TL;DR Reddit corpus}, url = {https://service.tib.eu/ldmservice/dataset/tl-dr-reddit-corpus}, year = {2024} }