{ "id": "2108.03952", "version": "v1", "published": "2021-08-09T11:33:46.000Z", "updated": "2021-08-09T11:33:46.000Z", "title": "Safe Deep Reinforcement Learning for Multi-Agent Systems with Continuous Action Spaces", "authors": [ "Ziyad Sheebaelhamd", "Konstantinos Zisis", "Athina Nisioti", "Dimitris Gkouletsos", "Dario Pavllo", "Jonas Kohler" ], "comment": "ICML 2021 Workshop on Reinforcement Learning for Real Life", "categories": [ "cs.LG", "cs.RO" ], "abstract": "Multi-agent control problems constitute an interesting area of application for deep reinforcement learning models with continuous action spaces. Such real-world applications, however, typically come with critical safety constraints that must not be violated. In order to ensure safety, we enhance the well-known multi-agent deep deterministic policy gradient (MADDPG) framework by adding a safety layer to the deep policy network. %which automatically corrects invalid actions. In particular, we extend the idea of linearizing the single-step transition dynamics, as was done for single-agent systems in Safe DDPG (Dalal et al., 2018), to multi-agent settings. We additionally propose to circumvent infeasibility problems in the action correction step using soft constraints (Kerrigan & Maciejowski, 2000). Results from the theory of exact penalty functions can be used to guarantee constraint satisfaction of the soft constraints under mild assumptions. We empirically find that the soft formulation achieves a dramatic decrease in constraint violations, making safety available even during the learning procedure.", "revisions": [ { "version": "v1", "updated": "2021-08-09T11:33:46.000Z" } ], "analyses": { "keywords": [ "safe deep reinforcement learning", "continuous action spaces", "multi-agent systems", "multi-agent deep deterministic policy", "deep deterministic policy gradient" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }