{
  "id": "2212.06355",
  "version": "v1",
  "published": "2022-12-13T03:38:57.000Z",
  "updated": "2022-12-13T03:38:57.000Z",
  "title": "A Review of Off-Policy Evaluation in Reinforcement Learning",
  "authors": [
    "Masatoshi Uehara",
    "Chengchun Shi",
    "Nathan Kallus"
  ],
  "comment": "Still under revision",
  "categories": [
    "stat.ML",
    "cs.LG",
    "math.ST",
    "stat.ME",
    "stat.TH"
  ],
  "abstract": "Reinforcement learning (RL) is one of the most vibrant research frontiers in machine learning and has been recently applied to solve a number of challenging problems. In this paper, we primarily focus on off-policy evaluation (OPE), one of the most fundamental topics in RL. In recent years, a number of OPE methods have been developed in the statistics and computer science literature. We provide a discussion on the efficiency bound of OPE, some of the existing state-of-the-art OPE methods, their statistical properties and some other related research directions that are currently actively explored.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-12-13T03:38:57.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "off-policy evaluation",
      "reinforcement learning",
      "existing state-of-the-art ope methods",
      "computer science literature",
      "vibrant research frontiers"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}