Dataset Groups Activity Stream GIGA-CM dataset GIGA-CM is a large-scale dataset comprising millions of documents, created to facilitate the pre-training of hierarchical document encoding models for summarization tasks. BibTex: @dataset{Microsoft_Research_2024, abstract = {GIGA-CM is a large-scale dataset comprising millions of documents, created to facilitate the pre-training of hierarchical document encoding models for summarization tasks.}, author = {Microsoft Research}, doi = {10.57702/fsfa4x4x}, institution = {No Organization}, month = {nov}, publisher = {TIB}, title = {GIGA-CM dataset}, url = {https://service.tib.eu/ldmservice/dataset/giga-cm-dataset}, year = {2024} }