{
  "id": "2006.14606",
  "version": "v1",
  "published": "2020-06-25T17:48:14.000Z",
  "updated": "2020-06-25T17:48:14.000Z",
  "title": "Global Convergence and Induced Kernels of Gradient-Based Meta-Learning with Neural Nets",
  "authors": [
    "Haoxiang Wang",
    "Ruoyu Sun",
    "Bo Li"
  ],
  "comment": "Under review",
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "Gradient-based meta-learning (GBML) with deep neural nets (DNNs) has become a popular approach for few-shot learning. However, due to the non-convexity of DNNs and the complex bi-level optimization in GBML, the theoretical properties of GBML with DNNs remain largely unknown. In this paper, we first develop a novel theoretical analysis to answer the following questions: Does GBML with DNNs have global convergence guarantees? We provide a positive answer to this question by proving that GBML with over-parameterized DNNs is guaranteed to converge to global optima at a linear rate. The second question we aim to address is: How does GBML achieve fast adaption to new tasks with experience on past similar tasks? To answer it, we prove that GBML is equivalent to a functional gradient descent operation that explicitly propagates experience from the past tasks to new ones. Finally, inspired by our theoretical analysis, we develop a new kernel-based meta-learning approach. We show that the proposed approach outperforms GBML with standard DNNs on the Omniglot dataset when the number of past tasks for meta-training is small. The code is available at https://github.com/ AI-secure/Meta-Neural-Kernel .",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-06-25T17:48:14.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "neural nets",
      "induced kernels",
      "gradient-based meta-learning",
      "gbml achieve fast adaption",
      "past tasks"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}