{ "id": "1907.09765", "version": "v1", "published": "2019-07-23T08:56:08.000Z", "updated": "2019-07-23T08:56:08.000Z", "title": "Variance Reduction in Actor Critic Methods (ACM)", "authors": [ "Eric Benhamou" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "After presenting Actor Critic Methods (ACM), we show ACM are control variate estimators. Using the projection theorem, we prove that the Q and Advantage Actor Critic (A2C) methods are optimal in the sense of the $L^2$ norm for the control variate estimators spanned by functions conditioned by the current state and action. This straightforward application of Pythagoras theorem provides a theoretical justification of the strong performance of QAC and AAC most often referred to as A2C methods in deep policy gradient methods. This enables us to derive a new formulation for Advantage Actor Critic methods that has lower variance and improves the traditional A2C method.", "revisions": [ { "version": "v1", "updated": "2019-07-23T08:56:08.000Z" } ], "analyses": { "keywords": [ "variance reduction", "control variate estimators", "advantage actor critic methods", "deep policy gradient methods", "traditional a2c method" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }