Dataset Groups Activity Stream Ultrafeedback The dataset used in the paper is Ultrafeedback, which is a preference dataset that contains 63k preference pairs sampled from models other than the SFT model. BibTex: @dataset{Wenxuan_Zhou_and_Ravi_Agrawal_and_Shujian_Zhang_and_Sathish_Reddy_Indurthi_and_Sanqiang_Zhao_and_Kaiqiang_Song_and_Silei_Xu_and_Chenguang_Zhu_2024, abstract = {The dataset used in the paper is Ultrafeedback, which is a preference dataset that contains 63k preference pairs sampled from models other than the SFT model.}, author = {Wenxuan Zhou and Ravi Agrawal and Shujian Zhang and Sathish Reddy Indurthi and Sanqiang Zhao and Kaiqiang Song and Silei Xu and Chenguang Zhu}, doi = {10.57702/06vp56te}, institution = {No Organization}, keyword = {'Ultrafeedback', 'language model', 'preference alignment', 'preference optimization', 'sDPO'}, month = {dec}, publisher = {TIB}, title = {Ultrafeedback}, url = {https://service.tib.eu/ldmservice/dataset/ultrafeedback}, year = {2024} }