{
  "id": "2203.13446",
  "version": "v1",
  "published": "2022-03-25T04:33:15.000Z",
  "updated": "2022-03-25T04:33:15.000Z",
  "title": "Randomized Policy Optimization for Optimal Stopping",
  "authors": [
    "Xinyi Guan",
    "Velibor V. Mišić"
  ],
  "comment": "65 pages, 1 figure",
  "categories": [
    "math.OC",
    "cs.LG"
  ],
  "abstract": "Optimal stopping is the problem of determining when to stop a stochastic system in order to maximize reward, which is of practical importance in domains such as finance, operations management and healthcare. Existing methods for high-dimensional optimal stopping that are popular in practice produce deterministic linear policies -- policies that deterministically stop based on the sign of a weighted sum of basis functions -- but are not guaranteed to find the optimal policy within this policy class given a fixed basis function architecture. In this paper, we propose a new methodology for optimal stopping based on randomized linear policies, which choose to stop with a probability that is determined by a weighted sum of basis functions. We motivate these policies by establishing that under mild conditions, given a fixed basis function architecture, optimizing over randomized linear policies is equivalent to optimizing over deterministic linear policies. We formulate the problem of learning randomized linear policies from data as a smooth non-convex sample average approximation (SAA) problem. We theoretically prove the almost sure convergence of our randomized policy SAA problem and establish bounds on the out-of-sample performance of randomized policies obtained from our SAA problem based on Rademacher complexity. We also show that the SAA problem is in general NP-Hard, and consequently develop a practical heuristic for solving our randomized policy problem. Through numerical experiments on a benchmark family of option pricing problem instances, we show that our approach can substantially outperform state-of-the-art methods.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-03-25T04:33:15.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "optimal stopping",
      "randomized policy optimization",
      "randomized linear policies",
      "produce deterministic linear policies",
      "fixed basis function architecture"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 65,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}