{
  "id": "2004.13106",
  "version": "v1",
  "published": "2020-04-27T19:12:20.000Z",
  "updated": "2020-04-27T19:12:20.000Z",
  "title": "Learning to Rank in the Position Based Model with Bandit Feedback",
  "authors": [
    "Beyza Ermis",
    "Patrick Ernst",
    "Yannik Stein",
    "Giovanni Zappella"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "Personalization is a crucial aspect of many online experiences. In particular, content ranking is often a key component in delivering sophisticated personalization results. Commonly, supervised learning-to-rank methods are applied, which suffer from bias introduced during data collection by production systems in charge of producing the ranking. To compensate for this problem, we leverage contextual multi-armed bandits. We propose novel extensions of two well-known algorithms viz. LinUCB and Linear Thompson Sampling to the ranking use-case. To account for the biases in a production environment, we employ the position-based click model. Finally, we show the validity of the proposed algorithms by conducting extensive offline experiments on synthetic datasets as well as customer facing online A/B experiments.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-04-27T19:12:20.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "bandit feedback",
      "customer facing online a/b experiments",
      "leverage contextual multi-armed bandits",
      "delivering sophisticated personalization results",
      "online experiences"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}