Dataset Groups Activity Stream CodeContest The dataset used in the paper for training and testing the DPO and PPO models. BibTex: @dataset{Shusheng_Xu_and_Wei_Fu_and_Jiaxuan_Gao_and_Wenjie_Ye_and_Weilin_Liu_and_Zhiyu_Mei_and_Guangju_Wang_and_Chao_Yu_and_Yi_Wu_2024, abstract = {The dataset used in the paper for training and testing the DPO and PPO models.}, author = {Shusheng Xu and Wei Fu and Jiaxuan Gao and Wenjie Ye and Weilin Liu and Zhiyu Mei and Guangju Wang and Chao Yu and Yi Wu}, doi = {10.57702/1vqkr7cn}, institution = {No Organization}, keyword = {'CodeContest', 'Human Feedback', 'Reinforcement Learning'}, month = {dec}, publisher = {TIB}, title = {CodeContest}, url = {https://service.tib.eu/ldmservice/dataset/codecontest}, year = {2024} }