{
  "id": "1907.09765",
  "version": "v1",
  "published": "2019-07-23T08:56:08.000Z",
  "updated": "2019-07-23T08:56:08.000Z",
  "title": "Variance Reduction in Actor Critic Methods (ACM)",
  "authors": [
    "Eric Benhamou"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "After presenting Actor Critic Methods (ACM), we show ACM are control variate estimators. Using the projection theorem, we prove that the Q and Advantage Actor Critic (A2C) methods are optimal in the sense of the $L^2$ norm for the control variate estimators spanned by functions conditioned by the current state and action. This straightforward application of Pythagoras theorem provides a theoretical justification of the strong performance of QAC and AAC most often referred to as A2C methods in deep policy gradient methods. This enables us to derive a new formulation for Advantage Actor Critic methods that has lower variance and improves the traditional A2C method.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-07-23T08:56:08.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "variance reduction",
      "control variate estimators",
      "advantage actor critic methods",
      "deep policy gradient methods",
      "traditional a2c method"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}