{ "id": "2109.08134", "version": "v1", "published": "2021-09-16T17:43:09.000Z", "updated": "2021-09-16T17:43:09.000Z", "title": "Comparison and Unification of Three Regularization Methods in Batch Reinforcement Learning", "authors": [ "Sarah Rathnam", "Susan A. Murphy", "Finale Doshi-Velez" ], "comment": "ICML Workshop on Reinforcement Learning Theory 2021", "categories": [ "cs.LG", "stat.ML" ], "abstract": "In batch reinforcement learning, there can be poorly explored state-action pairs resulting in poorly learned, inaccurate models and poorly performing associated policies. Various regularization methods can mitigate the problem of learning overly-complex models in Markov decision processes (MDPs), however they operate in technically and intuitively distinct ways and lack a common form in which to compare them. This paper unifies three regularization methods in a common framework -- a weighted average transition matrix. Considering regularization methods in this common form illuminates how the MDP structure and the state-action pair distribution of the batch data set influence the relative performance of regularization methods. We confirm intuitions generated from the common framework by empirical evaluation across a range of MDPs and data collection policies.", "revisions": [ { "version": "v1", "updated": "2021-09-16T17:43:09.000Z" } ], "analyses": { "keywords": [ "regularization methods", "batch reinforcement learning", "batch data set influence", "unification", "common framework" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }