{
  "id": "1710.09412",
  "version": "v1",
  "published": "2017-10-25T18:30:49.000Z",
  "updated": "2017-10-25T18:30:49.000Z",
  "title": "mixup: Beyond Empirical Risk Minimization",
  "authors": [
    "Hongyi Zhang",
    "Moustapha Cisse",
    "Yann N. Dauphin",
    "David Lopez-Paz"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "Large deep neural networks are powerful, but exhibit undesirable behaviors such as memorization and sensitivity to adversarial examples. In this work, we propose mixup, a simple learning principle to alleviate these issues. In essence, mixup trains a neural network on convex combinations of pairs of examples and their labels. By doing so, mixup regularizes the neural network to favor simple linear behavior in-between training examples. Our experiments on the ImageNet-2012, CIFAR-10, CIFAR-100, Google commands and UCI datasets show that mixup improves the generalization of state-of-the-art neural network architectures. We also find that mixup reduces the memorization of corrupt labels, increases the robustness to adversarial examples, and stabilizes the training of generative adversarial networks.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-10-25T18:30:49.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "empirical risk minimization",
      "favor simple linear behavior in-between",
      "adversarial examples",
      "state-of-the-art neural network architectures",
      "linear behavior in-between training examples"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}