{
  "id": "1606.03203",
  "version": "v1",
  "published": "2016-06-10T06:19:32.000Z",
  "updated": "2016-06-10T06:19:32.000Z",
  "title": "Causal Bandits: Learning Good Interventions via Causal Inference",
  "authors": [
    "Finnian Lattimore",
    "Tor Lattimore",
    "Mark D. Reid"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "We study the problem of using causal models to improve the rate at which good interventions can be learned online in a stochastic environment. Our formalism combines multi-arm bandits and causal inference to model a novel type of bandit feedback that is not exploited by existing approaches. We propose a new algorithm that exploits the causal feedback and prove a bound on its simple regret that is strictly better (in all quantities) than algorithms that do not use the additional causal information.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2016-06-10T06:19:32.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "causal inference",
      "causal bandits",
      "interventions",
      "additional causal information",
      "multi-arm bandits"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}