{
  "id": "2202.06450",
  "version": "v1",
  "published": "2022-02-14T01:31:46.000Z",
  "updated": "2022-02-14T01:31:46.000Z",
  "title": "Towards Deployment-Efficient Reinforcement Learning: Lower Bound and Optimality",
  "authors": [
    "Jiawei Huang",
    "Jinglin Chen",
    "Li Zhao",
    "Tao Qin",
    "Nan Jiang",
    "Tie-Yan Liu"
  ],
  "comment": "49 Pages; ICLR 2022",
  "categories": [
    "cs.LG",
    "cs.AI",
    "stat.ML"
  ],
  "abstract": "Deployment efficiency is an important criterion for many real-world applications of reinforcement learning (RL). Despite the community's increasing interest, there lacks a formal theoretical formulation for the problem. In this paper, we propose such a formulation for deployment-efficient RL (DE-RL) from an \"optimization with constraints\" perspective: we are interested in exploring an MDP and obtaining a near-optimal policy within minimal \\emph{deployment complexity}, whereas in each deployment the policy can sample a large batch of data. Using finite-horizon linear MDPs as a concrete structural model, we reveal the fundamental limit in achieving deployment efficiency by establishing information-theoretic lower bounds, and provide algorithms that achieve the optimal deployment efficiency. Moreover, our formulation for DE-RL is flexible and can serve as a building block for other practically relevant settings; we give \"Safe DE-RL\" and \"Sample-Efficient DE-RL\" as two examples, which may be worth future investigation.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-02-14T01:31:46.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "deployment-efficient reinforcement learning",
      "optimality",
      "optimal deployment efficiency",
      "formulation",
      "finite-horizon linear mdps"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 49,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}