Dataset Groups Activity Stream On the Theory of Reinforcement Learning The dataset is used to study a theory of reinforcement learning (RL) in which the learner receives binary feedback only once at the end of an episode. BibTex: @dataset{Niladri_S_Chatterji_and_Aldo_Pacchiano_and_Peter_L_Bartlett_and_Michael_I_Jordan_2024, abstract = {The dataset is used to study a theory of reinforcement learning (RL) in which the learner receives binary feedback only once at the end of an episode.}, author = {Niladri S. Chatterji and Aldo Pacchiano and Peter L. Bartlett and Michael I. Jordan}, doi = {10.57702/tcscztm0}, institution = {No Organization}, keyword = {'binary feedback', 'episodic learning', 'reinforcement learning'}, month = {dec}, publisher = {TIB}, title = {On the Theory of Reinforcement Learning}, url = {https://service.tib.eu/ldmservice/dataset/on-the-theory-of-reinforcement-learning}, year = {2024} }