{
  "id": "2005.10175",
  "version": "v1",
  "published": "2020-05-20T16:35:19.000Z",
  "updated": "2020-05-20T16:35:19.000Z",
  "title": "Finite-sample Analysis of Greedy-GQ with Linear Function Approximation under Markovian Noise",
  "authors": [
    "Yue Wang",
    "Shaofeng Zou"
  ],
  "comment": "UAI 2020",
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "Greedy-GQ is an off-policy two timescale algorithm for optimal control in reinforcement learning. This paper develops the first finite-sample analysis for the Greedy-GQ algorithm with linear function approximation under Markovian noise. Our finite-sample analysis provides theoretical justification for choosing stepsizes for this two timescale algorithm for faster convergence in practice, and suggests a trade-off between the convergence rate and the quality of the obtained policy. Our paper extends the finite-sample analyses of two timescale reinforcement learning algorithms from policy evaluation to optimal control, which is of more practical interest. Specifically, in contrast to existing finite-sample analyses for two timescale methods, e.g., GTD, GTD2 and TDC, where their objective functions are convex, the objective function of the Greedy-GQ algorithm is non-convex. Moreover, the Greedy-GQ algorithm is also not a linear two-timescale stochastic approximation algorithm. Our techniques in this paper provide a general framework for finite-sample analysis of non-convex value-based reinforcement learning algorithms for optimal control.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-05-20T16:35:19.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "linear function approximation",
      "finite-sample analysis",
      "markovian noise",
      "value-based reinforcement learning algorithms",
      "optimal control"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}