{
  "id": "1311.0468",
  "version": "v1",
  "published": "2013-11-03T14:18:56.000Z",
  "updated": "2013-11-03T14:18:56.000Z",
  "title": "Thompson Sampling for Online Learning with Linear Experts",
  "authors": [
    "Aditya Gopalan"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "In this note, we present a version of the Thompson sampling algorithm for the problem of online linear generalization with full information (i.e., the experts setting), studied by Kalai and Vempala, 2005. The algorithm uses a Gaussian prior and time-varying Gaussian likelihoods, and we show that it essentially reduces to Kalai and Vempala's Follow-the-Perturbed-Leader strategy, with exponentially distributed noise replaced by Gaussian noise. This implies sqrt(T) regret bounds for Thompson sampling (with time-varying likelihood) for online learning with full information.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2013-11-03T14:18:56.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "online learning",
      "linear experts",
      "full information",
      "vempalas follow-the-perturbed-leader strategy",
      "online linear generalization"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2013arXiv1311.0468G"
    }
  }
}