{
  "id": "2308.03297",
  "version": "v1",
  "published": "2023-08-07T04:47:49.000Z",
  "updated": "2023-08-07T04:47:49.000Z",
  "title": "Approximate Constrained Discounted Dynamic Programming with Uniform Feasibility and Optimality",
  "authors": [
    "Hyeong Soo Chang"
  ],
  "categories": [
    "math.OC"
  ],
  "abstract": "We consider a dynamic programming (DP) approach to approximately solving an infinite-horizon constrained Markov decision process (CMDP) problem with a fixed initial-state for the expected total discounted-reward criterion with a uniform-feasibility constraint of the expected total discounted-cost in a deterministic, history-independent, and stationary policy set. We derive a DP-equation that recursively holds for a CMDP problem and its sub-CMDP problems, where each problem, induced from the parameters of the original CMDP problem, admits a uniformly-optimal feasible policy in its policy set associated with the inputs to the problem. A policy constructed from the DP-equation is shown to achieve the optimal values, defined for the CMDP problem the policy is a solution to, at all states. Based on the result, we discuss off-line and on-line computational algorithms, motivated from policy iteration for MDPs, whose output sequences have local convergences for the original CMDP problem.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-08-07T04:47:49.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "approximate constrained discounted dynamic programming",
      "uniform feasibility",
      "original cmdp problem",
      "infinite-horizon constrained markov decision process",
      "optimality"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}