{ "id": "1107.1744", "version": "v2", "published": "2011-07-08T22:18:05.000Z", "updated": "2011-10-08T06:06:43.000Z", "title": "Stochastic convex optimization with bandit feedback", "authors": [ "Alekh Agarwal", "Dean P. Foster", "Daniel Hsu", "Sham M. Kakade", "Alexander Rakhlin" ], "categories": [ "math.OC", "cs.LG", "cs.SY" ], "abstract": "This paper addresses the problem of minimizing a convex, Lipschitz function $f$ over a convex, compact set $\\xset$ under a stochastic bandit feedback model. In this model, the algorithm is allowed to observe noisy realizations of the function value $f(x)$ at any query point $x \\in \\xset$. The quantity of interest is the regret of the algorithm, which is the sum of the function values at algorithm's query points minus the optimal function value. We demonstrate a generalization of the ellipsoid algorithm that incurs $\\otil(\\poly(d)\\sqrt{T})$ regret. Since any algorithm has regret at least $\\Omega(\\sqrt{T})$ on this problem, our algorithm is optimal in terms of the scaling with $T$.", "revisions": [ { "version": "v2", "updated": "2011-10-08T06:06:43.000Z" } ], "analyses": { "keywords": [ "stochastic convex optimization", "stochastic bandit feedback model", "algorithms query points minus", "optimal function value", "paper addresses" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2011arXiv1107.1744A" } } }