{
  "id": "1905.08165",
  "version": "v1",
  "published": "2019-05-20T15:23:13.000Z",
  "updated": "2019-05-20T15:23:13.000Z",
  "title": "Gradient Ascent for Active Exploration in Bandit Problems",
  "authors": [
    "Pierre Ménard"
  ],
  "comment": "21 pages, 1 figure",
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "We present a new algorithm based on an gradient ascent for a general Active Exploration bandit problem in the fixed confidence setting. This problem encompasses several well studied problems such that the Best Arm Identification or Thresholding Bandits. It consists of a new sampling rule based on an online lazy mirror ascent. We prove that this algorithm is asymptotically optimal and, most importantly, computationally efficient.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-05-20T15:23:13.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "gradient ascent",
      "general active exploration bandit problem",
      "online lazy mirror ascent",
      "best arm identification",
      "problem encompasses"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 21,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}