{ "id": "2210.07338", "version": "v1", "published": "2022-10-13T20:16:19.000Z", "updated": "2022-10-13T20:16:19.000Z", "title": "Reinforcement Learning with Unbiased Policy Evaluation and Linear Function Approximation", "authors": [ "Anna Winnicki", "R. Srikant" ], "comment": "9 pages, 0 figures", "categories": [ "cs.LG", "cs.SY", "eess.SY" ], "abstract": "We provide performance guarantees for a variant of simulation-based policy iteration for controlling Markov decision processes that involves the use of stochastic approximation algorithms along with state-of-the-art techniques that are useful for very large MDPs, including lookahead, function approximation, and gradient descent. Specifically, we analyze two algorithms; the first algorithm involves a least squares approach where a new set of weights associated with feature vectors is obtained via least squares minimization at each iteration and the second algorithm involves a two-time-scale stochastic approximation algorithm taking several steps of gradient descent towards the least squares solution before obtaining the next iterate using a stochastic approximation algorithm.", "revisions": [ { "version": "v1", "updated": "2022-10-13T20:16:19.000Z" } ], "analyses": { "keywords": [ "linear function approximation", "unbiased policy evaluation", "reinforcement learning", "two-time-scale stochastic approximation algorithm", "gradient descent" ], "note": { "typesetting": "TeX", "pages": 9, "language": "en", "license": "arXiv", "status": "editable" } } }