Dataset Groups Activity Stream Fine-tuning Language Models with Advantage-Induced Policy Alignment The dataset used in the paper is the Anthropic Helpfulness and Harmlessness dataset and the StackExchange dataset. BibTex: @dataset{Banghua_Zhu_and_Hiteshi_Sharma_and_Felipe_Vieira_Frujeri_and_Shi_Dong_and_Michael_I_Jordan_and_Jiantao_Jiao_2024, abstract = {The dataset used in the paper is the Anthropic Helpfulness and Harmlessness dataset and the StackExchange dataset.}, author = {Banghua Zhu and Hiteshi Sharma and Felipe Vieira Frujeri and Shi Dong and Michael I. Jordan and Jiantao Jiao}, doi = {10.57702/3oqqdleq}, institution = {No Organization}, keyword = {'Advantage-Induced Policy Alignment', 'Language Models', 'Reinforcement Learning'}, month = {dec}, publisher = {TIB}, title = {Fine-tuning Language Models with Advantage-Induced Policy Alignment}, url = {https://service.tib.eu/ldmservice/dataset/fine-tuning-language-models-with-advantage-induced-policy-alignment}, year = {2024} }