{ "id": "1809.06098", "version": "v1", "published": "2018-09-17T09:42:26.000Z", "updated": "2018-09-17T09:42:26.000Z", "title": "Policy Optimization via Importance Sampling", "authors": [ "Alberto Maria Metelli", "Matteo Papini", "Francesco Faccio", "Marcello Restelli" ], "categories": [ "cs.LG", "cs.AI", "stat.ML" ], "abstract": "Policy optimization is an effective reinforcement learning approach to solve continuous control tasks. Recent achievements have shown that alternating on-line and off-line optimization is a successful choice for efficient trajectory reuse. However, deciding when to stop optimizing and collect new trajectories is non-trivial as it requires to account for the variance of the objective function estimate. In this paper, we propose a novel model-free policy search algorithm, POIS, applicable in both control-based and parameter-based settings. We first derive a high-confidence bound for importance sampling estimation and then we define a surrogate objective function which is optimized off-line using a batch of trajectories. Finally, the algorithm is tested on a selection of continuous control tasks, with both linear and deep policies, and compared with the state-of-the-art policy optimization methods.", "revisions": [ { "version": "v1", "updated": "2018-09-17T09:42:26.000Z" } ], "analyses": { "keywords": [ "importance sampling", "continuous control tasks", "novel model-free policy search algorithm", "state-of-the-art policy optimization methods", "objective function" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }