{
  "id": "1809.06098",
  "version": "v1",
  "published": "2018-09-17T09:42:26.000Z",
  "updated": "2018-09-17T09:42:26.000Z",
  "title": "Policy Optimization via Importance Sampling",
  "authors": [
    "Alberto Maria Metelli",
    "Matteo Papini",
    "Francesco Faccio",
    "Marcello Restelli"
  ],
  "categories": [
    "cs.LG",
    "cs.AI",
    "stat.ML"
  ],
  "abstract": "Policy optimization is an effective reinforcement learning approach to solve continuous control tasks. Recent achievements have shown that alternating on-line and off-line optimization is a successful choice for efficient trajectory reuse. However, deciding when to stop optimizing and collect new trajectories is non-trivial as it requires to account for the variance of the objective function estimate. In this paper, we propose a novel model-free policy search algorithm, POIS, applicable in both control-based and parameter-based settings. We first derive a high-confidence bound for importance sampling estimation and then we define a surrogate objective function which is optimized off-line using a batch of trajectories. Finally, the algorithm is tested on a selection of continuous control tasks, with both linear and deep policies, and compared with the state-of-the-art policy optimization methods.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2018-09-17T09:42:26.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "importance sampling",
      "continuous control tasks",
      "novel model-free policy search algorithm",
      "state-of-the-art policy optimization methods",
      "objective function"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}