{ "id": "2106.10075", "version": "v1", "published": "2021-06-18T11:51:49.000Z", "updated": "2021-06-18T11:51:49.000Z", "title": "Learning to Plan via a Multi-Step Policy Regression Method", "authors": [ "Stefan Wagner", "Michael Janschek", "Tobias Uelwer", "Stefan Harmeling" ], "comment": "Accepted at the 30th International Conference on Artificial Neural Networks (ICANN 2021)", "categories": [ "cs.LG", "cs.AI", "cs.RO" ], "abstract": "We propose a new approach to increase inference performance in environments that require a specific sequence of actions in order to be solved. This is for example the case for maze environments where ideally an optimal path is determined. Instead of learning a policy for a single step, we want to learn a policy that can predict n actions in advance. Our proposed method called policy horizon regression (PHR) uses knowledge of the environment sampled by A2C to learn an n dimensional policy vector in a policy distillation setup which yields n sequential actions per observation. We test our method on the MiniGrid and Pong environments and show drastic speedup during inference time by successfully predicting sequences of actions on a single observation.", "revisions": [ { "version": "v1", "updated": "2021-06-18T11:51:49.000Z" } ], "analyses": { "keywords": [ "multi-step policy regression method", "environment", "dimensional policy vector", "policy distillation setup", "increase inference performance" ], "tags": [ "conference paper" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }