{ "id": "1912.10697", "version": "v1", "published": "2019-12-23T09:26:13.000Z", "updated": "2019-12-23T09:26:13.000Z", "title": "Hamilton-Jacobi-Bellman Equations for Q-Learning in Continuous Time", "authors": [ "Jeongho Kim", "Insoon Yang" ], "categories": [ "math.OC", "cs.LG", "cs.SY", "eess.SY" ], "abstract": "In this paper, we introduce Hamilton-Jacobi-Bellman (HJB) equations for Q-functions in continuous time optimal control problems with Lipschitz continuous controls. The standard Q-function used in reinforcement learning is shown to be the unique viscosity solution of the HJB equation. A necessary and sufficient condition for optimality is provided using the viscosity solution framework. By using the HJB equation, we develop a Q-learning method for continuous-time dynamical systems. A DQN-like algorithm is also proposed for high-dimensional state and control spaces. The performance of the proposed Q-learning algorithm is demonstrated using 1-, 10- and 20-dimensional dynamical systems.", "revisions": [ { "version": "v1", "updated": "2019-12-23T09:26:13.000Z" } ], "analyses": { "keywords": [ "hamilton-jacobi-bellman equations", "q-learning", "continuous time optimal control problems", "hjb equation", "viscosity solution framework" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }