Dataset Groups Activity Stream Dense Reward for Free in RLHF The dataset used in the paper is not explicitly described, but it is mentioned that it is a preference dataset for language models. BibTex: @dataset{Alex_J_Chan_and_Hao_Sun_and_Samuel_Holt_and_Mihaela_van_der_Schaar_2024, abstract = {The dataset used in the paper is not explicitly described, but it is mentioned that it is a preference dataset for language models.}, author = {Alex J. Chan and Hao Sun and Samuel Holt and Mihaela van der Schaar}, doi = {10.57702/netp9p5i}, institution = {No Organization}, keyword = {'human feedback', 'language models', 'reinforcement learning'}, month = {dec}, publisher = {TIB}, title = {Dense Reward for Free in RLHF}, url = {https://service.tib.eu/ldmservice/dataset/dense-reward-for-free-in-rlhf}, year = {2024} }