{ "id": "1710.09412", "version": "v1", "published": "2017-10-25T18:30:49.000Z", "updated": "2017-10-25T18:30:49.000Z", "title": "mixup: Beyond Empirical Risk Minimization", "authors": [ "Hongyi Zhang", "Moustapha Cisse", "Yann N. Dauphin", "David Lopez-Paz" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "Large deep neural networks are powerful, but exhibit undesirable behaviors such as memorization and sensitivity to adversarial examples. In this work, we propose mixup, a simple learning principle to alleviate these issues. In essence, mixup trains a neural network on convex combinations of pairs of examples and their labels. By doing so, mixup regularizes the neural network to favor simple linear behavior in-between training examples. Our experiments on the ImageNet-2012, CIFAR-10, CIFAR-100, Google commands and UCI datasets show that mixup improves the generalization of state-of-the-art neural network architectures. We also find that mixup reduces the memorization of corrupt labels, increases the robustness to adversarial examples, and stabilizes the training of generative adversarial networks.", "revisions": [ { "version": "v1", "updated": "2017-10-25T18:30:49.000Z" } ], "analyses": { "keywords": [ "empirical risk minimization", "favor simple linear behavior in-between", "adversarial examples", "state-of-the-art neural network architectures", "linear behavior in-between training examples" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }