{
  "id": "1905.13742",
  "version": "v1",
  "published": "2019-05-31T17:52:26.000Z",
  "updated": "2019-05-31T17:52:26.000Z",
  "title": "High Dimensional Classification via Empirical Risk Minimization: Improvements and Optimality",
  "authors": [
    "Xiaoyi Mai",
    "Zhenyu Liao"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "In this article, we investigate a family of classification algorithms defined by the principle of empirical risk minimization, in the high dimensional regime where the feature dimension $p$ and data number $n$ are both large and comparable. Based on recent advances in high dimensional statistics and random matrix theory, we provide under mixture data model a unified stochastic characterization of classifiers learned with different loss functions. Our results are instrumental to an in-depth understanding as well as practical improvements on this fundamental classification approach. As the main outcome, we demonstrate the existence of a universally optimal loss function which yields the best high dimensional performance at any given $n/p$ ratio.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-05-31T17:52:26.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "empirical risk minimization",
      "high dimensional classification",
      "improvements",
      "optimality",
      "best high dimensional performance"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}