{ "id": "1403.5341", "version": "v2", "published": "2014-03-21T01:42:53.000Z", "updated": "2015-06-08T19:05:44.000Z", "title": "An Information-Theoretic Analysis of Thompson Sampling", "authors": [ "Daniel Russo", "Benjamin Van Roy" ], "categories": [ "cs.LG" ], "abstract": "We provide an information-theoretic analysis of Thompson sampling that applies across a broad range of online optimization problems in which a decision-maker must learn from partial feedback. This analysis inherits the simplicity and elegance of information theory and leads to regret bounds that scale with the entropy of the optimal-action distribution. This strengthens preexisting results and yields new insight into how information improves performance.", "revisions": [ { "version": "v1", "updated": "2014-03-21T01:42:53.000Z", "comment": null, "journal": null, "doi": null }, { "version": "v2", "updated": "2015-06-08T19:05:44.000Z" } ], "analyses": { "keywords": [ "information-theoretic analysis", "thompson sampling", "online optimization problems", "optimal-action distribution", "broad range" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2014arXiv1403.5341R" } } }