{
  "id": "1912.10697",
  "version": "v1",
  "published": "2019-12-23T09:26:13.000Z",
  "updated": "2019-12-23T09:26:13.000Z",
  "title": "Hamilton-Jacobi-Bellman Equations for Q-Learning in Continuous Time",
  "authors": [
    "Jeongho Kim",
    "Insoon Yang"
  ],
  "categories": [
    "math.OC",
    "cs.LG",
    "cs.SY",
    "eess.SY"
  ],
  "abstract": "In this paper, we introduce Hamilton-Jacobi-Bellman (HJB) equations for Q-functions in continuous time optimal control problems with Lipschitz continuous controls. The standard Q-function used in reinforcement learning is shown to be the unique viscosity solution of the HJB equation. A necessary and sufficient condition for optimality is provided using the viscosity solution framework. By using the HJB equation, we develop a Q-learning method for continuous-time dynamical systems. A DQN-like algorithm is also proposed for high-dimensional state and control spaces. The performance of the proposed Q-learning algorithm is demonstrated using 1-, 10- and 20-dimensional dynamical systems.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-12-23T09:26:13.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "hamilton-jacobi-bellman equations",
      "q-learning",
      "continuous time optimal control problems",
      "hjb equation",
      "viscosity solution framework"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}