Dataset Groups Activity Stream Distributed representations of words and phrases and their compositionality The word2vec dataset is a word embedding dataset that contains 3 million words. BibTex: @dataset{Tomas_Mikolov_and_Ilya_Sutskever_and_Kai_Chen_and_Greg_S_Corrado_and_Jeff_Dean_2025, abstract = {The word2vec dataset is a word embedding dataset that contains 3 million words.}, author = {Tomas Mikolov and Ilya Sutskever and Kai Chen and Greg S Corrado and Jeff Dean}, doi = {10.57702/kcdhx0zi}, institution = {No Organization}, keyword = {'Compositionality', 'Word Embeddings', 'word embeddings', 'word2vec'}, month = {jan}, publisher = {TIB}, title = {Distributed representations of words and phrases and their compositionality}, url = {https://service.tib.eu/ldmservice/dataset/distributed-representations-of-words-and-phrases-and-their-compositionality}, year = {2025} }