{
  "id": "1702.06103",
  "version": "v1",
  "published": "2017-02-20T18:43:05.000Z",
  "updated": "2017-02-20T18:43:05.000Z",
  "title": "An Improved Parametrization and Analysis of the EXP3++ Algorithm for Stochastic and Adversarial Bandits",
  "authors": [
    "Yevgeny Seldin",
    "Gábor Lugosi"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "We present a new strategy for gap estimation in randomized algorithms for multiarmed bandits and combine it with the EXP3++ algorithm of Seldin and Slivkins (2014). In the stochastic regime the strategy reduces dependence of regret on a time horizon from $(\\ln t)^3$ to $(\\ln t)^2$ and replaces an additive factor of order $\\Delta e^{1/\\Delta^2}$ by an additive factor of order $1/\\Delta^7$, where $\\Delta$ is the minimal gap of a problem instance. In the adversarial regime regret guarantee remains unchanged.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-02-20T18:43:05.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "adversarial bandits",
      "stochastic",
      "adversarial regime regret guarantee remains",
      "parametrization",
      "strategy reduces dependence"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}