{ "id": "2210.05431", "version": "v1", "published": "2022-10-11T13:21:59.000Z", "updated": "2022-10-11T13:21:59.000Z", "title": "Non-Asymptotic Analysis of a UCB-based Top Two Algorithm", "authors": [ "Marc Jourdan", "Rémy Degenne" ], "comment": "32 pages, 5 figures, 3 tables", "categories": [ "stat.ML", "cs.LG" ], "abstract": "A Top Two sampling rule for bandit identification is a method which selects the next arm to sample from among two candidate arms, a leader and a challenger. Due to their simplicity and good empirical performance, they have received increased attention in recent years. For fixed-confidence best arm identification, theoretical guarantees for Top Two methods have only been obtained in the asymptotic regime, when the error level vanishes. We derive the first non-asymptotic upper bound on the expected sample complexity of a Top Two algorithm holding for any error level. Our analysis highlights sufficient properties for a regret minimization algorithm to be used as leader. They are satisfied by the UCB algorithm and our proposed UCB-based Top Two algorithm enjoys simultaneously non-asymptotic guarantees and competitive empirical performance.", "revisions": [ { "version": "v1", "updated": "2022-10-11T13:21:59.000Z" } ], "analyses": { "keywords": [ "non-asymptotic analysis", "analysis highlights sufficient properties", "fixed-confidence best arm identification", "algorithm enjoys simultaneously non-asymptotic guarantees", "first non-asymptotic upper bound" ], "note": { "typesetting": "TeX", "pages": 32, "language": "en", "license": "arXiv", "status": "editable" } } }