{ "id": "1704.01155", "version": "v1", "published": "2017-04-04T18:56:53.000Z", "updated": "2017-04-04T18:56:53.000Z", "title": "Feature Squeezing: Detecting Adversarial Examples in Deep Neural Networks", "authors": [ "Weilin Xu", "David Evans", "Yanjun Qi" ], "categories": [ "cs.CV", "cs.CR", "cs.LG" ], "abstract": "Although deep neural networks (DNNs) have achieved great success in many computer vision tasks, recent studies have shown they are vulnerable to adversarial examples. Such examples, typically generated by adding small but purposeful distortions, can frequently fool DNN models. Previous studies to defend against adversarial examples mostly focused on refining the DNN models. They have either shown limited success or suffer from the expensive computation. We propose a new strategy, \\emph{feature squeezing}, that can be used to harden DNN models by detecting adversarial examples. Feature squeezing reduces the search space available to an adversary by coalescing samples that correspond to many different feature vectors in the original space into a single sample. By comparing a DNN model's prediction on the original input with that on the squeezed input, feature squeezing detects adversarial examples with high accuracy and few false positives. This paper explores two instances of feature squeezing: reducing the color bit depth of each pixel and smoothing using a spatial filter. These strategies are straightforward, inexpensive, and complementary to defensive methods that operate on the underlying model, such as adversarial training.", "revisions": [ { "version": "v1", "updated": "2017-04-04T18:56:53.000Z" } ], "analyses": { "keywords": [ "deep neural networks", "detecting adversarial examples", "feature squeezing detects adversarial examples", "harden dnn models", "computer vision tasks" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }