{
  "id": "2406.03734",
  "version": "v1",
  "published": "2024-06-06T04:14:06.000Z",
  "updated": "2024-06-06T04:14:06.000Z",
  "title": "Policy Gradient Methods for the Cost-Constrained LQR: Strong Duality and Global Convergence",
  "authors": [
    "Feiran Zhao",
    "Keyou You"
  ],
  "categories": [
    "math.OC",
    "cs.SY",
    "eess.SY"
  ],
  "abstract": "In safety-critical applications, reinforcement learning (RL) needs to consider safety constraints. However, theoretical understandings of constrained RL for continuous control are largely absent. As a case study, this paper presents a cost-constrained LQR formulation, where a number of LQR costs with user-defined penalty matrices are subject to constraints. To solve it, we propose a policy gradient primal-dual method to find an optimal state feedback gain. Despite the non-convexity of the cost-constrained LQR problem, we provide a constructive proof for strong duality and a geometric interpretation of an optimal multiplier set. By proving that the concave dual function is Lipschitz smooth, we further provide convergence guarantees for the PG primal-dual method. Finally, we perform simulations to validate our theoretical findings.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-06-06T04:14:06.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "policy gradient methods",
      "cost-constrained lqr",
      "strong duality",
      "global convergence",
      "policy gradient primal-dual method"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}