{ "id": "2205.08295", "version": "v1", "published": "2022-05-17T12:51:54.000Z", "updated": "2022-05-17T12:51:54.000Z", "title": "Semi-Parametric Contextual Bandits with Graph-Laplacian Regularization", "authors": [ "Young-Geun Choi", "Gi-Soo Kim", "Seunghoon Paik", "Myunghee Cho Paik" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "Non-stationarity is ubiquitous in human behavior and addressing it in the contextual bandits is challenging. Several works have addressed the problem by investigating semi-parametric contextual bandits and warned that ignoring non-stationarity could harm performances. Another prevalent human behavior is social interaction which has become available in a form of a social network or graph structure. As a result, graph-based contextual bandits have received much attention. In this paper, we propose \"SemiGraphTS,\" a novel contextual Thompson-sampling algorithm for a graph-based semi-parametric reward model. Our algorithm is the first to be proposed in this setting. We derive an upper bound of the cumulative regret that can be expressed as a multiple of a factor depending on the graph structure and the order for the semi-parametric model without a graph. We evaluate the proposed and existing algorithms via simulation and real data example.", "revisions": [ { "version": "v1", "updated": "2022-05-17T12:51:54.000Z" } ], "analyses": { "keywords": [ "graph-laplacian regularization", "graph structure", "prevalent human behavior", "real data example", "novel contextual thompson-sampling algorithm" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }