{
  "id": "2002.06723",
  "version": "v1",
  "published": "2020-02-17T00:10:58.000Z",
  "updated": "2020-02-17T00:10:58.000Z",
  "title": "Reward Design for Driver Repositioning Using Multi-Agent Reinforcement Learning",
  "authors": [
    "Zhenyu Shou",
    "Xuan Di"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "A large portion of the passenger requests is reportedly unserviced, partially due to vacant for-hire drivers' cruising behavior during the passenger seeking process. This paper aims to model the multi-driver repositioning task through a mean field multi-agent reinforcement learning (MARL) approach. Noticing that the direct application of MARL to the multi-driver system under a given reward mechanism will very likely yield a suboptimal equilibrium due to the selfishness of drivers, this study proposes a reward design scheme with which a more desired equilibrium can be reached. To effectively solve the bilevel optimization problem with upper level as the reward design and the lower level as a multi-agent system (MAS), a Bayesian optimization algorithm is adopted to speed up the learning process. We then use a synthetic dataset to test the proposed model. The results show that the weighted average of order response rate and overall service charge can be improved by 4% using a simple platform service charge, compared with that of no reward design.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-02-17T00:10:58.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "reward design",
      "driver repositioning",
      "simple platform service charge",
      "bayesian optimization algorithm",
      "bilevel optimization problem"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}