Dataset Groups Activity Stream OSCAR Dataset The dataset used in the paper is a large corpus of real-world programs for pre-training a neural network model to learn better code representation. BibTex: @dataset{Dinglan_Peng_and_Shuxin_Zheng_and_Yatao_Li_and_Guolin_Ke_and_Di_He_and_Tie-Yan_Liu_2024, abstract = {The dataset used in the paper is a large corpus of real-world programs for pre-training a neural network model to learn better code representation.}, author = {Dinglan Peng and Shuxin Zheng and Yatao Li and Guolin Ke and Di He and Tie-Yan Liu}, doi = {10.57702/gp2sfidn}, institution = {No Organization}, keyword = {'code representation', 'neural networks', 'pre-training', 'programming languages'}, month = {dec}, publisher = {TIB}, title = {OSCAR Dataset}, url = {https://service.tib.eu/ldmservice/dataset/oscar-dataset}, year = {2024} }