Dataset Groups Activity Stream BookCorpus The dataset used in this paper for unsupervised sentence representation learning, consisting of paragraphs from unlabeled text. BibTex: @dataset{Shuai_Tang_and_Hailin_Jin_and_Chen_Fang_and_Zhaowen_Wang_and_Virginia_R_de_Sa_2024, abstract = {The dataset used in this paper for unsupervised sentence representation learning, consisting of paragraphs from unlabeled text.}, author = {Shuai Tang and Hailin Jin and Chen Fang and Zhaowen Wang and Virginia R. de Sa}, doi = {10.57702/wgy6lj2h}, institution = {No Organization}, keyword = {'BookCorpus', 'Text Corpus', 'bookcorpus', 'books', 'language models', 'natural language processing', 'paragraphs', 'sentence representation', 'sentence representation learning', 'sentences', 'text analysis', 'text classification', 'text generation', 'unlabeled text', 'unpaired learning', 'vision-language pre-training'}, month = {nov}, publisher = {TIB}, title = {BookCorpus}, url = {https://service.tib.eu/ldmservice/dataset/bookcorpus}, year = {2024} }