{ "id": "2006.11647", "version": "v1", "published": "2020-06-20T20:21:33.000Z", "updated": "2020-06-20T20:21:33.000Z", "title": "An Optimal Elimination Algorithm for Learning a Best Arm", "authors": [ "Avinatan Hassidim", "Ron Kupfer", "Yaron Singer" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "We consider the classic problem of $(\\epsilon,\\delta)$-PAC learning a best arm where the goal is to identify with confidence $1-\\delta$ an arm whose mean is an $\\epsilon$-approximation to that of the highest mean arm in a multi-armed bandit setting. This problem is one of the most fundamental problems in statistics and learning theory, yet somewhat surprisingly its worst-case sample complexity is not well understood. In this paper, we propose a new approach for $(\\epsilon,\\delta)$-PAC learning a best arm. This approach leads to an algorithm whose sample complexity converges to \\emph{exactly} the optimal sample complexity of $(\\epsilon,\\delta)$-learning the mean of $n$ arms separately and we complement this result with a conditional matching lower bound. More specifically:", "revisions": [ { "version": "v1", "updated": "2020-06-20T20:21:33.000Z" } ], "analyses": { "keywords": [ "best arm", "optimal elimination algorithm", "worst-case sample complexity", "conditional matching lower bound", "sample complexity converges" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }