{
  "id": "1208.0984",
  "version": "v1",
  "published": "2012-08-05T06:34:44.000Z",
  "updated": "2012-08-05T06:34:44.000Z",
  "title": "APRIL: Active Preference-learning based Reinforcement Learning",
  "authors": [
    "Riad Akrour",
    "Marc Schoenauer",
    "Michèle Sebag"
  ],
  "journal": "ECML PKDD 2012 7524 (2012) 116-131",
  "categories": [
    "cs.LG"
  ],
  "abstract": "This paper focuses on reinforcement learning (RL) with limited prior knowledge. In the domain of swarm robotics for instance, the expert can hardly design a reward function or demonstrate the target behavior, forbidding the use of both standard RL and inverse reinforcement learning. Although with a limited expertise, the human expert is still often able to emit preferences and rank the agent demonstrations. Earlier work has presented an iterative preference-based RL framework: expert preferences are exploited to learn an approximate policy return, thus enabling the agent to achieve direct policy search. Iteratively, the agent selects a new candidate policy and demonstrates it; the expert ranks the new demonstration comparatively to the previous best one; the expert's ranking feedback enables the agent to refine the approximate policy return, and the process is iterated. In this paper, preference-based reinforcement learning is combined with active ranking in order to decrease the number of ranking queries to the expert needed to yield a satisfactory policy. Experiments on the mountain car and the cancer treatment testbeds witness that a couple of dozen rankings enable to learn a competent policy.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2012-08-05T06:34:44.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "reinforcement learning",
      "approximate policy return",
      "active preference-learning",
      "cancer treatment testbeds witness",
      "achieve direct policy search"
    ],
    "tags": [
      "journal article"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2012arXiv1208.0984A"
    }
  }
}