{
  "id": "2406.16802",
  "version": "v1",
  "published": "2024-06-24T17:14:31.000Z",
  "updated": "2024-06-24T17:14:31.000Z",
  "title": "Improved Regret Bounds for Bandits with Expert Advice",
  "authors": [
    "Nicolò Cesa-Bianchi",
    "Khaled Eldowa",
    "Emmanuel Esposito",
    "Julia Olkhovskaya"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "In this research note, we revisit the bandits with expert advice problem. Under a restricted feedback model, we prove a lower bound of order $\\sqrt{K T \\ln(N/K)}$ for the worst-case regret, where $K$ is the number of actions, $N>K$ the number of experts, and $T$ the time horizon. This matches a previously known upper bound of the same order and improves upon the best available lower bound of $\\sqrt{K T (\\ln N) / (\\ln K)}$. For the standard feedback model, we prove a new instance-based upper bound that depends on the agreement between the experts and provides a logarithmic improvement compared to prior results.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-06-24T17:14:31.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "regret bounds",
      "lower bound",
      "expert advice problem",
      "standard feedback model",
      "restricted feedback model"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}