{ "id": "2004.12289", "version": "v1", "published": "2020-04-26T05:15:36.000Z", "updated": "2020-04-26T05:15:36.000Z", "title": "Deep k-NN for Noisy Labels", "authors": [ "Dara Bahri", "Heinrich Jiang", "Maya Gupta" ], "comment": "Full paper (including supplemental) can be found at https://github.com/dbahri/deepknn", "categories": [ "cs.LG", "cs.AI", "stat.ML" ], "abstract": "Modern machine learning models are often trained on examples with noisy labels that hurt performance and are hard to identify. In this paper, we provide an empirical study showing that a simple $k$-nearest neighbor-based filtering approach on the logit layer of a preliminary model can remove mislabeled training data and produce more accurate models than many recently proposed methods. We also provide new statistical guarantees into its efficacy.", "revisions": [ { "version": "v1", "updated": "2020-04-26T05:15:36.000Z" } ], "analyses": { "keywords": [ "noisy labels", "deep k-nn", "modern machine learning models", "remove mislabeled training data", "nearest neighbor-based filtering approach" ], "tags": [ "github project" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }