{ "id": "2406.03734", "version": "v1", "published": "2024-06-06T04:14:06.000Z", "updated": "2024-06-06T04:14:06.000Z", "title": "Policy Gradient Methods for the Cost-Constrained LQR: Strong Duality and Global Convergence", "authors": [ "Feiran Zhao", "Keyou You" ], "categories": [ "math.OC", "cs.SY", "eess.SY" ], "abstract": "In safety-critical applications, reinforcement learning (RL) needs to consider safety constraints. However, theoretical understandings of constrained RL for continuous control are largely absent. As a case study, this paper presents a cost-constrained LQR formulation, where a number of LQR costs with user-defined penalty matrices are subject to constraints. To solve it, we propose a policy gradient primal-dual method to find an optimal state feedback gain. Despite the non-convexity of the cost-constrained LQR problem, we provide a constructive proof for strong duality and a geometric interpretation of an optimal multiplier set. By proving that the concave dual function is Lipschitz smooth, we further provide convergence guarantees for the PG primal-dual method. Finally, we perform simulations to validate our theoretical findings.", "revisions": [ { "version": "v1", "updated": "2024-06-06T04:14:06.000Z" } ], "analyses": { "keywords": [ "policy gradient methods", "cost-constrained lqr", "strong duality", "global convergence", "policy gradient primal-dual method" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }