{
  "id": "2205.08295",
  "version": "v1",
  "published": "2022-05-17T12:51:54.000Z",
  "updated": "2022-05-17T12:51:54.000Z",
  "title": "Semi-Parametric Contextual Bandits with Graph-Laplacian Regularization",
  "authors": [
    "Young-Geun Choi",
    "Gi-Soo Kim",
    "Seunghoon Paik",
    "Myunghee Cho Paik"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "Non-stationarity is ubiquitous in human behavior and addressing it in the contextual bandits is challenging. Several works have addressed the problem by investigating semi-parametric contextual bandits and warned that ignoring non-stationarity could harm performances. Another prevalent human behavior is social interaction which has become available in a form of a social network or graph structure. As a result, graph-based contextual bandits have received much attention. In this paper, we propose \"SemiGraphTS,\" a novel contextual Thompson-sampling algorithm for a graph-based semi-parametric reward model. Our algorithm is the first to be proposed in this setting. We derive an upper bound of the cumulative regret that can be expressed as a multiple of a factor depending on the graph structure and the order for the semi-parametric model without a graph. We evaluate the proposed and existing algorithms via simulation and real data example.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-05-17T12:51:54.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "graph-laplacian regularization",
      "graph structure",
      "prevalent human behavior",
      "real data example",
      "novel contextual thompson-sampling algorithm"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}