Dataset Groups Activity Stream DocRepair dataset The dataset used for testing the DocRepair model, containing 30m groups of 4 consecutive sentences in English and Russian. BibTex: @dataset{Elena_Voita_and_Rico_Sennrich_and_Ivan_Titov_2024, abstract = {The dataset used for testing the DocRepair model, containing 30m groups of 4 consecutive sentences in English and Russian.}, author = {Elena Voita and Rico Sennrich and Ivan Titov}, doi = {10.57702/3qeel4sd}, institution = {No Organization}, keyword = {'DocRepair', 'Machine Translation'}, month = {dec}, publisher = {TIB}, title = {DocRepair dataset}, url = {https://service.tib.eu/ldmservice/dataset/docrepair-dataset}, year = {2024} }