{ "id": "1311.0468", "version": "v1", "published": "2013-11-03T14:18:56.000Z", "updated": "2013-11-03T14:18:56.000Z", "title": "Thompson Sampling for Online Learning with Linear Experts", "authors": [ "Aditya Gopalan" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "In this note, we present a version of the Thompson sampling algorithm for the problem of online linear generalization with full information (i.e., the experts setting), studied by Kalai and Vempala, 2005. The algorithm uses a Gaussian prior and time-varying Gaussian likelihoods, and we show that it essentially reduces to Kalai and Vempala's Follow-the-Perturbed-Leader strategy, with exponentially distributed noise replaced by Gaussian noise. This implies sqrt(T) regret bounds for Thompson sampling (with time-varying likelihood) for online learning with full information.", "revisions": [ { "version": "v1", "updated": "2013-11-03T14:18:56.000Z" } ], "analyses": { "keywords": [ "online learning", "linear experts", "full information", "vempalas follow-the-perturbed-leader strategy", "online linear generalization" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2013arXiv1311.0468G" } } }