{
  "id": "1107.1744",
  "version": "v2",
  "published": "2011-07-08T22:18:05.000Z",
  "updated": "2011-10-08T06:06:43.000Z",
  "title": "Stochastic convex optimization with bandit feedback",
  "authors": [
    "Alekh Agarwal",
    "Dean P. Foster",
    "Daniel Hsu",
    "Sham M. Kakade",
    "Alexander Rakhlin"
  ],
  "categories": [
    "math.OC",
    "cs.LG",
    "cs.SY"
  ],
  "abstract": "This paper addresses the problem of minimizing a convex, Lipschitz function $f$ over a convex, compact set $\\xset$ under a stochastic bandit feedback model. In this model, the algorithm is allowed to observe noisy realizations of the function value $f(x)$ at any query point $x \\in \\xset$. The quantity of interest is the regret of the algorithm, which is the sum of the function values at algorithm's query points minus the optimal function value. We demonstrate a generalization of the ellipsoid algorithm that incurs $\\otil(\\poly(d)\\sqrt{T})$ regret. Since any algorithm has regret at least $\\Omega(\\sqrt{T})$ on this problem, our algorithm is optimal in terms of the scaling with $T$.",
  "revisions": [
    {
      "version": "v2",
      "updated": "2011-10-08T06:06:43.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "stochastic convex optimization",
      "stochastic bandit feedback model",
      "algorithms query points minus",
      "optimal function value",
      "paper addresses"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2011arXiv1107.1744A"
    }
  }
}