{ "id": "1802.07668", "version": "v1", "published": "2018-02-21T17:07:06.000Z", "updated": "2018-02-21T17:07:06.000Z", "title": "A model for system uncertainty in reinforcement learning", "authors": [ "Ryan Murray", "Michele Palladino" ], "categories": [ "math.OC" ], "abstract": "This work provides a rigorous framework for studying continuous time control problems in uncertain environments. The framework considered models uncertainty in state dynamics as a measure on the space of functions. This measure is considered to change over time as agents learn their environment. This model can be seem as a variant of either Bayesian reinforcement learning or adaptive control. We study necessary conditions for locally optimal trajectories within this model, in particular deriving an appropriate dynamic programming principle and Hamilton-Jacobi equations. This model provides one possible framework for studying the tradeoff between exploration and exploitation in reinforcement learning.", "revisions": [ { "version": "v1", "updated": "2018-02-21T17:07:06.000Z" } ], "analyses": { "keywords": [ "reinforcement learning", "system uncertainty", "studying continuous time control problems", "appropriate dynamic programming principle", "study necessary conditions" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }