{
  "id": "2210.05918",
  "version": "v1",
  "published": "2022-10-12T04:37:54.000Z",
  "updated": "2022-10-12T04:37:54.000Z",
  "title": "Finite time analysis of temporal difference learning with linear function approximation: Tail averaging and regularisation",
  "authors": [
    "Gandharv Patil",
    "Prashanth L. A.",
    "Dheeraj Nagaraj",
    "Doina Precup"
  ],
  "categories": [
    "cs.LG",
    "cs.AI",
    "cs.SY",
    "eess.SY",
    "stat.ML"
  ],
  "abstract": "We study the finite-time behaviour of the popular temporal difference (TD) learning algorithm when combined with tail-averaging. We derive finite time bounds on the parameter error of the tail-averaged TD iterate under a step-size choice that does not require information about the eigenvalues of the matrix underlying the projected TD fixed point. Our analysis shows that tail-averaged TD converges at the optimal $O\\left(1/t\\right)$ rate, both in expectation and with high probability. In addition, our bounds exhibit a sharper rate of decay for the initial error (bias), which is an improvement over averaging all iterates. We also propose and analyse a variant of TD that incorporates regularisation. From analysis, we conclude that the regularised version of TD is useful for problems with ill-conditioned features.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-10-12T04:37:54.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "finite time analysis",
      "linear function approximation",
      "temporal difference learning",
      "tail averaging",
      "regularisation"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}