{
  "id": "2212.14449",
  "version": "v1",
  "published": "2022-12-29T20:25:18.000Z",
  "updated": "2022-12-29T20:25:18.000Z",
  "title": "Policy Mirror Ascent for Efficient and Independent Learning in Mean Field Games",
  "authors": [
    "Batuhan Yardim",
    "Semih Cayci",
    "Matthieu Geist",
    "Niao He"
  ],
  "comment": "43 pages",
  "categories": [
    "math.OC",
    "cs.GT",
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "Mean-field games have been used as a theoretical tool to obtain an approximate Nash equilibrium for symmetric and anonymous $N$-player games in literature. However, limiting applicability, existing theoretical results assume variations of a \"population generative model\", which allows arbitrary modifications of the population distribution by the learning algorithm. Instead, we show that $N$ agents running policy mirror ascent converge to the Nash equilibrium of the regularized game within $\\tilde{\\mathcal{O}}(\\varepsilon^{-2})$ samples from a single sample trajectory without a population generative model, up to a standard $\\mathcal{O}(\\frac{1}{\\sqrt{N}})$ error due to the mean field. Taking a divergent approach from literature, instead of working with the best-response map we first show that a policy mirror ascent map can be used to construct a contractive operator having the Nash equilibrium as its fixed point. Next, we prove that conditional TD-learning in $N$-agent games can learn value functions within $\\tilde{\\mathcal{O}}(\\varepsilon^{-2})$ time steps. These results allow proving sample complexity guarantees in the oracle-free setting by only relying on a sample path from the $N$ agent simulator. Furthermore, we demonstrate that our methodology allows for independent learning by $N$ agents with finite sample guarantees.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-12-29T20:25:18.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "mean field games",
      "independent learning",
      "policy mirror ascent converge",
      "running policy mirror ascent",
      "theoretical results assume variations"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 43,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}