{
  "id": "2106.10075",
  "version": "v1",
  "published": "2021-06-18T11:51:49.000Z",
  "updated": "2021-06-18T11:51:49.000Z",
  "title": "Learning to Plan via a Multi-Step Policy Regression Method",
  "authors": [
    "Stefan Wagner",
    "Michael Janschek",
    "Tobias Uelwer",
    "Stefan Harmeling"
  ],
  "comment": "Accepted at the 30th International Conference on Artificial Neural Networks (ICANN 2021)",
  "categories": [
    "cs.LG",
    "cs.AI",
    "cs.RO"
  ],
  "abstract": "We propose a new approach to increase inference performance in environments that require a specific sequence of actions in order to be solved. This is for example the case for maze environments where ideally an optimal path is determined. Instead of learning a policy for a single step, we want to learn a policy that can predict n actions in advance. Our proposed method called policy horizon regression (PHR) uses knowledge of the environment sampled by A2C to learn an n dimensional policy vector in a policy distillation setup which yields n sequential actions per observation. We test our method on the MiniGrid and Pong environments and show drastic speedup during inference time by successfully predicting sequences of actions on a single observation.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-06-18T11:51:49.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "multi-step policy regression method",
      "environment",
      "dimensional policy vector",
      "policy distillation setup",
      "increase inference performance"
    ],
    "tags": [
      "conference paper"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}