{
  "id": "2210.07338",
  "version": "v1",
  "published": "2022-10-13T20:16:19.000Z",
  "updated": "2022-10-13T20:16:19.000Z",
  "title": "Reinforcement Learning with Unbiased Policy Evaluation and Linear Function Approximation",
  "authors": [
    "Anna Winnicki",
    "R. Srikant"
  ],
  "comment": "9 pages, 0 figures",
  "categories": [
    "cs.LG",
    "cs.SY",
    "eess.SY"
  ],
  "abstract": "We provide performance guarantees for a variant of simulation-based policy iteration for controlling Markov decision processes that involves the use of stochastic approximation algorithms along with state-of-the-art techniques that are useful for very large MDPs, including lookahead, function approximation, and gradient descent. Specifically, we analyze two algorithms; the first algorithm involves a least squares approach where a new set of weights associated with feature vectors is obtained via least squares minimization at each iteration and the second algorithm involves a two-time-scale stochastic approximation algorithm taking several steps of gradient descent towards the least squares solution before obtaining the next iterate using a stochastic approximation algorithm.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-10-13T20:16:19.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "linear function approximation",
      "unbiased policy evaluation",
      "reinforcement learning",
      "two-time-scale stochastic approximation algorithm",
      "gradient descent"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 9,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}