{ "id": "1905.13742", "version": "v1", "published": "2019-05-31T17:52:26.000Z", "updated": "2019-05-31T17:52:26.000Z", "title": "High Dimensional Classification via Empirical Risk Minimization: Improvements and Optimality", "authors": [ "Xiaoyi Mai", "Zhenyu Liao" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "In this article, we investigate a family of classification algorithms defined by the principle of empirical risk minimization, in the high dimensional regime where the feature dimension $p$ and data number $n$ are both large and comparable. Based on recent advances in high dimensional statistics and random matrix theory, we provide under mixture data model a unified stochastic characterization of classifiers learned with different loss functions. Our results are instrumental to an in-depth understanding as well as practical improvements on this fundamental classification approach. As the main outcome, we demonstrate the existence of a universally optimal loss function which yields the best high dimensional performance at any given $n/p$ ratio.", "revisions": [ { "version": "v1", "updated": "2019-05-31T17:52:26.000Z" } ], "analyses": { "keywords": [ "empirical risk minimization", "high dimensional classification", "improvements", "optimality", "best high dimensional performance" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }