{ "id": "2210.05918", "version": "v1", "published": "2022-10-12T04:37:54.000Z", "updated": "2022-10-12T04:37:54.000Z", "title": "Finite time analysis of temporal difference learning with linear function approximation: Tail averaging and regularisation", "authors": [ "Gandharv Patil", "Prashanth L. A.", "Dheeraj Nagaraj", "Doina Precup" ], "categories": [ "cs.LG", "cs.AI", "cs.SY", "eess.SY", "stat.ML" ], "abstract": "We study the finite-time behaviour of the popular temporal difference (TD) learning algorithm when combined with tail-averaging. We derive finite time bounds on the parameter error of the tail-averaged TD iterate under a step-size choice that does not require information about the eigenvalues of the matrix underlying the projected TD fixed point. Our analysis shows that tail-averaged TD converges at the optimal $O\\left(1/t\\right)$ rate, both in expectation and with high probability. In addition, our bounds exhibit a sharper rate of decay for the initial error (bias), which is an improvement over averaging all iterates. We also propose and analyse a variant of TD that incorporates regularisation. From analysis, we conclude that the regularised version of TD is useful for problems with ill-conditioned features.", "revisions": [ { "version": "v1", "updated": "2022-10-12T04:37:54.000Z" } ], "analyses": { "keywords": [ "finite time analysis", "linear function approximation", "temporal difference learning", "tail averaging", "regularisation" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }