{ "id": "1812.11337", "version": "v1", "published": "2018-12-29T11:06:39.000Z", "updated": "2018-12-29T11:06:39.000Z", "title": "Quantized Guided Pruning for Efficient Hardware Implementations of Convolutional Neural Networks", "authors": [ "Ghouthi Boukli Hacene", "Vincent Gripon", "Matthieu Arzel", "Nicolas Farrugia", "Yoshua Bengio" ], "categories": [ "cs.LG", "cs.CV", "cs.NE" ], "abstract": "Convolutional Neural Networks (CNNs) are state-of-the-art in numerous computer vision tasks such as object classification and detection. However, the large amount of parameters they contain leads to a high computational complexity and strongly limits their usability in budget-constrained devices such as embedded devices. In this paper, we propose a combination of a new pruning technique and a quantization scheme that effectively reduce the complexity and memory usage of convolutional layers of CNNs, and replace the complex convolutional operation by a low-cost multiplexer. We perform experiments on the CIFAR10, CIFAR100 and SVHN and show that the proposed method achieves almost state-of-the-art accuracy, while drastically reducing the computational and memory footprints. We also propose an efficient hardware architecture to accelerate CNN operations. The proposed hardware architecture is a pipeline and accommodates multiple layers working at the same time to speed up the inference process.", "revisions": [ { "version": "v1", "updated": "2018-12-29T11:06:39.000Z" } ], "analyses": { "keywords": [ "convolutional neural networks", "efficient hardware implementations", "quantized guided pruning", "high computational complexity", "accommodates multiple layers" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }