{
  "id": "1403.5341",
  "version": "v2",
  "published": "2014-03-21T01:42:53.000Z",
  "updated": "2015-06-08T19:05:44.000Z",
  "title": "An Information-Theoretic Analysis of Thompson Sampling",
  "authors": [
    "Daniel Russo",
    "Benjamin Van Roy"
  ],
  "categories": [
    "cs.LG"
  ],
  "abstract": "We provide an information-theoretic analysis of Thompson sampling that applies across a broad range of online optimization problems in which a decision-maker must learn from partial feedback. This analysis inherits the simplicity and elegance of information theory and leads to regret bounds that scale with the entropy of the optimal-action distribution. This strengthens preexisting results and yields new insight into how information improves performance.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2014-03-21T01:42:53.000Z",
      "comment": null,
      "journal": null,
      "doi": null
    },
    {
      "version": "v2",
      "updated": "2015-06-08T19:05:44.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "information-theoretic analysis",
      "thompson sampling",
      "online optimization problems",
      "optimal-action distribution",
      "broad range"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2014arXiv1403.5341R"
    }
  }
}