{
  "id": "2109.08134",
  "version": "v1",
  "published": "2021-09-16T17:43:09.000Z",
  "updated": "2021-09-16T17:43:09.000Z",
  "title": "Comparison and Unification of Three Regularization Methods in Batch Reinforcement Learning",
  "authors": [
    "Sarah Rathnam",
    "Susan A. Murphy",
    "Finale Doshi-Velez"
  ],
  "comment": "ICML Workshop on Reinforcement Learning Theory 2021",
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "In batch reinforcement learning, there can be poorly explored state-action pairs resulting in poorly learned, inaccurate models and poorly performing associated policies. Various regularization methods can mitigate the problem of learning overly-complex models in Markov decision processes (MDPs), however they operate in technically and intuitively distinct ways and lack a common form in which to compare them. This paper unifies three regularization methods in a common framework -- a weighted average transition matrix. Considering regularization methods in this common form illuminates how the MDP structure and the state-action pair distribution of the batch data set influence the relative performance of regularization methods. We confirm intuitions generated from the common framework by empirical evaluation across a range of MDPs and data collection policies.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-09-16T17:43:09.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "regularization methods",
      "batch reinforcement learning",
      "batch data set influence",
      "unification",
      "common framework"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}