{ "id": "1511.07948", "version": "v1", "published": "2015-11-25T04:41:20.000Z", "updated": "2015-11-25T04:41:20.000Z", "title": "Learning Halfspaces and Neural Networks with Random Initialization", "authors": [ "Yuchen Zhang", "Jason D. Lee", "Martin J. Wainwright", "Michael I. Jordan" ], "comment": "31 pages", "categories": [ "cs.LG" ], "abstract": "We study non-convex empirical risk minimization for learning halfspaces and neural networks. For loss functions that are $L$-Lipschitz continuous, we present algorithms to learn halfspaces and multi-layer neural networks that achieve arbitrarily small excess risk $\\epsilon>0$. The time complexity is polynomial in the input dimension $d$ and the sample size $n$, but exponential in the quantity $(L/\\epsilon^2)\\log(L/\\epsilon)$. These algorithms run multiple rounds of random initialization followed by arbitrary optimization steps. We further show that if the data is separable by some neural network with constant margin $\\gamma>0$, then there is a polynomial-time algorithm for learning a neural network that separates the training data with margin $\\Omega(\\gamma)$. As a consequence, the algorithm achieves arbitrary generalization error $\\epsilon>0$ with ${\\rm poly}(d,1/\\epsilon)$ sample and time complexity. We establish the same learnability result when the labels are randomly flipped with probability $\\eta<1/2$.", "revisions": [ { "version": "v1", "updated": "2015-11-25T04:41:20.000Z" } ], "analyses": { "keywords": [ "neural network", "random initialization", "learning halfspaces", "arbitrarily small excess risk", "non-convex empirical risk minimization" ], "note": { "typesetting": "TeX", "pages": 31, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2015arXiv151107948Z" } } }