{ "id": "1905.08165", "version": "v1", "published": "2019-05-20T15:23:13.000Z", "updated": "2019-05-20T15:23:13.000Z", "title": "Gradient Ascent for Active Exploration in Bandit Problems", "authors": [ "Pierre Ménard" ], "comment": "21 pages, 1 figure", "categories": [ "stat.ML", "cs.LG" ], "abstract": "We present a new algorithm based on an gradient ascent for a general Active Exploration bandit problem in the fixed confidence setting. This problem encompasses several well studied problems such that the Best Arm Identification or Thresholding Bandits. It consists of a new sampling rule based on an online lazy mirror ascent. We prove that this algorithm is asymptotically optimal and, most importantly, computationally efficient.", "revisions": [ { "version": "v1", "updated": "2019-05-20T15:23:13.000Z" } ], "analyses": { "keywords": [ "gradient ascent", "general active exploration bandit problem", "online lazy mirror ascent", "best arm identification", "problem encompasses" ], "note": { "typesetting": "TeX", "pages": 21, "language": "en", "license": "arXiv", "status": "editable" } } }