Dataset Groups Activity Stream Posterior Sampling for Reinforcement Learning The dataset used in the paper is a random finite horizon Markov decision process (MDP) with states S, actions A, and horizon τ. BibTex: @dataset{Ian_Osband_and_Benjamin_Van_Roy_and_Daniel_Russo_2024, abstract = {The dataset used in the paper is a random finite horizon Markov decision process (MDP) with states S, actions A, and horizon τ.}, author = {Ian Osband and Benjamin Van Roy and Daniel Russo}, doi = {10.57702/shyazt9q}, institution = {No Organization}, keyword = {'Markov decision processes', 'posterior sampling', 'reinforcement learning'}, month = {dec}, publisher = {TIB}, title = {Posterior Sampling for Reinforcement Learning}, url = {https://service.tib.eu/ldmservice/dataset/posterior-sampling-for-reinforcement-learning}, year = {2024} }