{
  "id": "1812.11337",
  "version": "v1",
  "published": "2018-12-29T11:06:39.000Z",
  "updated": "2018-12-29T11:06:39.000Z",
  "title": "Quantized Guided Pruning for Efficient Hardware Implementations of Convolutional Neural Networks",
  "authors": [
    "Ghouthi Boukli Hacene",
    "Vincent Gripon",
    "Matthieu Arzel",
    "Nicolas Farrugia",
    "Yoshua Bengio"
  ],
  "categories": [
    "cs.LG",
    "cs.CV",
    "cs.NE"
  ],
  "abstract": "Convolutional Neural Networks (CNNs) are state-of-the-art in numerous computer vision tasks such as object classification and detection. However, the large amount of parameters they contain leads to a high computational complexity and strongly limits their usability in budget-constrained devices such as embedded devices. In this paper, we propose a combination of a new pruning technique and a quantization scheme that effectively reduce the complexity and memory usage of convolutional layers of CNNs, and replace the complex convolutional operation by a low-cost multiplexer. We perform experiments on the CIFAR10, CIFAR100 and SVHN and show that the proposed method achieves almost state-of-the-art accuracy, while drastically reducing the computational and memory footprints. We also propose an efficient hardware architecture to accelerate CNN operations. The proposed hardware architecture is a pipeline and accommodates multiple layers working at the same time to speed up the inference process.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2018-12-29T11:06:39.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "convolutional neural networks",
      "efficient hardware implementations",
      "quantized guided pruning",
      "high computational complexity",
      "accommodates multiple layers"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}