{ "id": "2212.06355", "version": "v1", "published": "2022-12-13T03:38:57.000Z", "updated": "2022-12-13T03:38:57.000Z", "title": "A Review of Off-Policy Evaluation in Reinforcement Learning", "authors": [ "Masatoshi Uehara", "Chengchun Shi", "Nathan Kallus" ], "comment": "Still under revision", "categories": [ "stat.ML", "cs.LG", "math.ST", "stat.ME", "stat.TH" ], "abstract": "Reinforcement learning (RL) is one of the most vibrant research frontiers in machine learning and has been recently applied to solve a number of challenging problems. In this paper, we primarily focus on off-policy evaluation (OPE), one of the most fundamental topics in RL. In recent years, a number of OPE methods have been developed in the statistics and computer science literature. We provide a discussion on the efficiency bound of OPE, some of the existing state-of-the-art OPE methods, their statistical properties and some other related research directions that are currently actively explored.", "revisions": [ { "version": "v1", "updated": "2022-12-13T03:38:57.000Z" } ], "analyses": { "keywords": [ "off-policy evaluation", "reinforcement learning", "existing state-of-the-art ope methods", "computer science literature", "vibrant research frontiers" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }