{
  "id": "1511.07948",
  "version": "v1",
  "published": "2015-11-25T04:41:20.000Z",
  "updated": "2015-11-25T04:41:20.000Z",
  "title": "Learning Halfspaces and Neural Networks with Random Initialization",
  "authors": [
    "Yuchen Zhang",
    "Jason D. Lee",
    "Martin J. Wainwright",
    "Michael I. Jordan"
  ],
  "comment": "31 pages",
  "categories": [
    "cs.LG"
  ],
  "abstract": "We study non-convex empirical risk minimization for learning halfspaces and neural networks. For loss functions that are $L$-Lipschitz continuous, we present algorithms to learn halfspaces and multi-layer neural networks that achieve arbitrarily small excess risk $\\epsilon>0$. The time complexity is polynomial in the input dimension $d$ and the sample size $n$, but exponential in the quantity $(L/\\epsilon^2)\\log(L/\\epsilon)$. These algorithms run multiple rounds of random initialization followed by arbitrary optimization steps. We further show that if the data is separable by some neural network with constant margin $\\gamma>0$, then there is a polynomial-time algorithm for learning a neural network that separates the training data with margin $\\Omega(\\gamma)$. As a consequence, the algorithm achieves arbitrary generalization error $\\epsilon>0$ with ${\\rm poly}(d,1/\\epsilon)$ sample and time complexity. We establish the same learnability result when the labels are randomly flipped with probability $\\eta<1/2$.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2015-11-25T04:41:20.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "neural network",
      "random initialization",
      "learning halfspaces",
      "arbitrarily small excess risk",
      "non-convex empirical risk minimization"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 31,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2015arXiv151107948Z"
    }
  }
}