{ "id": "2102.12959", "version": "v1", "published": "2021-02-24T12:35:43.000Z", "updated": "2021-02-24T12:35:43.000Z", "title": "A statistical theory of out-of-distribution detection", "authors": [ "Xi Wang", "Laurence Aitchison" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "We introduce a principled approach to detecting out-of-distribution (OOD) data by exploiting a connection to data curation. In data curation, we exclude ambiguous or difficult-to-classify input points from the dataset, and these excluded points are by definition OOD. We can therefore obtain the likelihood for OOD points by using a principled generative model of data-curation initially developed to explain the cold-posterior effect in Bayesian neural networks (Aitchison 2020). This model gives higher OOD probabilities when predictive uncertainty is higher and can be trained using maximum-likelihood jointly over the in-distribution and OOD points. This approach gives superior performance to past methods that did not provide a probability for OOD points, and therefore could not be trained using maximum-likelihood.", "revisions": [ { "version": "v1", "updated": "2021-02-24T12:35:43.000Z" } ], "analyses": { "keywords": [ "out-of-distribution detection", "statistical theory", "ood points", "data curation", "bayesian neural networks" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }