{ "id": "1606.03203", "version": "v1", "published": "2016-06-10T06:19:32.000Z", "updated": "2016-06-10T06:19:32.000Z", "title": "Causal Bandits: Learning Good Interventions via Causal Inference", "authors": [ "Finnian Lattimore", "Tor Lattimore", "Mark D. Reid" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "We study the problem of using causal models to improve the rate at which good interventions can be learned online in a stochastic environment. Our formalism combines multi-arm bandits and causal inference to model a novel type of bandit feedback that is not exploited by existing approaches. We propose a new algorithm that exploits the causal feedback and prove a bound on its simple regret that is strictly better (in all quantities) than algorithms that do not use the additional causal information.", "revisions": [ { "version": "v1", "updated": "2016-06-10T06:19:32.000Z" } ], "analyses": { "keywords": [ "causal inference", "causal bandits", "interventions", "additional causal information", "multi-arm bandits" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }