{
  "id": "1705.10412",
  "version": "v1",
  "published": "2017-05-29T23:03:01.000Z",
  "updated": "2017-05-29T23:03:01.000Z",
  "title": "Gradient Descent Can Take Exponential Time to Escape Saddle Points",
  "authors": [
    "Simon S. Du",
    "Chi Jin",
    "Jason D. Lee",
    "Michael I. Jordan",
    "Barnabas Poczos",
    "Aarti Singh"
  ],
  "categories": [
    "math.OC",
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "Although gradient descent (GD) almost always escapes saddle points asymptotically [Lee et al., 2016], this paper shows that even with fairly natural random initialization schemes and non-pathological functions, GD can be significantly slowed down by saddle points, taking exponential time to escape. On the other hand, gradient descent with perturbations [Ge et al., 2015, Jin et al., 2017] is not slowed down by saddle points - it can find an approximate local minimizer in polynomial time. This result implies that GD is inherently slower than perturbed GD, and justifies the importance of adding perturbations for efficient non-convex optimization. While our focus is theoretical, we also present experiments that illustrate our theoretical findings.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-05-29T23:03:01.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "escape saddle points",
      "gradient descent",
      "exponential time",
      "fairly natural random initialization schemes",
      "escapes saddle points"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}