Dataset Groups Activity Stream Policy Optimization for Low-rank MDPs (POLO) Learning Adversarial Low-rank Markov Decision Processes with Unknown Transition and Full-information Feedback BibTex: @dataset{Canzhe_Zhao_and_Ruofeng_Yang_and_Baoxiang_Wang_and_Xuezhou_Zhang_and_Shuai_Li_2024, abstract = {Learning Adversarial Low-rank Markov Decision Processes with Unknown Transition and Full-information Feedback}, author = {Canzhe Zhao and Ruofeng Yang and Baoxiang Wang and Xuezhou Zhang and Shuai Li}, doi = {10.57702/p5oat7zd}, institution = {No Organization}, keyword = {'adversarial loss functions', 'low-rank MDPs', 'policy optimization'}, month = {dec}, publisher = {TIB}, title = {Policy Optimization for Low-rank MDPs (POLO)}, url = {https://service.tib.eu/ldmservice/dataset/policy-optimization-for-low-rank-mdps--polo-}, year = {2024} }