{
  "id": "2412.16561",
  "version": "v1",
  "published": "2024-12-21T10:07:40.000Z",
  "updated": "2024-12-21T10:07:40.000Z",
  "title": "A learning-based approach to stochastic optimal control under reach-avoid constraint",
  "authors": [
    "Tingting Ni",
    "Maryam Kamgarpour"
  ],
  "categories": [
    "math.OC",
    "cs.LG"
  ],
  "abstract": "We develop a model-free approach to optimally control stochastic, Markovian systems subject to a reach-avoid constraint. Specifically, the state trajectory must remain within a safe set while reaching a target set within a finite time horizon. Due to the time-dependent nature of these constraints, we show that, in general, the optimal policy for this constrained stochastic control problem is non-Markovian, which increases the computational complexity. To address this challenge, we apply the state-augmentation technique from arXiv:2402.19360, reformulating the problem as a constrained Markov decision process (CMDP) on an extended state space. This transformation allows us to search for a Markovian policy, avoiding the complexity of non-Markovian policies. To learn the optimal policy without a system model, and using only trajectory data, we develop a log-barrier policy gradient approach. We prove that under suitable assumptions, the policy parameters converge to the optimal parameters, while ensuring that the system trajectories satisfy the stochastic reach-avoid constraint with high probability.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-12-21T10:07:40.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "stochastic optimal control",
      "learning-based approach",
      "optimal policy",
      "log-barrier policy gradient approach",
      "constrained stochastic control problem"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}