Dataset Groups Activity Stream RedPajama The RedPajama dataset is an open-source recipe to reproduce the LLaMA training dataset. BibTex: @dataset{Together_Computer_2024, abstract = {The RedPajama dataset is an open-source recipe to reproduce the LLaMA training dataset.}, author = {Together Computer}, doi = {10.57702/qqx3qox1}, institution = {No Organization}, keyword = {'Dataset Creation', 'Language Model', 'Pretraining Data', 'RedPajama'}, month = {dec}, publisher = {TIB}, title = {RedPajama}, url = {https://service.tib.eu/ldmservice/dataset/redpajama}, year = {2024} }