{
  "id": "1607.06534",
  "version": "v1",
  "published": "2016-07-22T01:37:30.000Z",
  "updated": "2016-07-22T01:37:30.000Z",
  "title": "The Landscape of Empirical Risk for Non-convex Losses",
  "authors": [
    "Song Mei",
    "Yu Bai",
    "Andrea Montanari"
  ],
  "comment": "Submitted to NIPS 2016",
  "categories": [
    "stat.ML"
  ],
  "abstract": "We revisit the problem of learning a noisy linear classifier by minimizing the empirical risk associated to the square loss. While the empirical risk is non-convex, we prove that its structure is remarkably simple. Namely, when the sample size is larger than $C \\, d\\log d$ (with $d$ the dimension, and $C$ a constant) the following happen with high probability: $(a)$ The empirical risk has a unique local minimum (which is also the global minimum); $(b)$ Gradient descent converges exponentially fast to the global minimizer, from any initialization; $(c)$ The global minimizer approaches the true parameter at nearly optimal rate. The core of our argument is to establish a uniform convergence result for the gradients and Hessians of the empirical risk.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2016-07-22T01:37:30.000Z"
    }
  ],
  "analyses": {
    "subjects": [
      "68Q32"
    ],
    "keywords": [
      "empirical risk",
      "non-convex losses",
      "gradient descent converges exponentially fast",
      "global minimizer approaches",
      "noisy linear classifier"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}