{
  "id": "2007.14917",
  "version": "v1",
  "published": "2020-07-29T15:43:19.000Z",
  "updated": "2020-07-29T15:43:19.000Z",
  "title": "Compressing Deep Neural Networks via Layer Fusion",
  "authors": [
    "James O' Neill",
    "Greg Ver Steeg",
    "Aram Galstyan"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "This paper proposes \\textit{layer fusion} - a model compression technique that discovers which weights to combine and then fuses weights of similar fully-connected, convolutional and attention layers. Layer fusion can significantly reduce the number of layers of the original network with little additional computation overhead, while maintaining competitive performance. From experiments on CIFAR-10, we find that various deep convolution neural networks can remain within 2\\% accuracy points of the original networks up to a compression ratio of 3.33 when iteratively retrained with layer fusion. For experiments on the WikiText-2 language modelling dataset where pretrained transformer models are used, we achieve compression that leads to a network that is 20\\% of its original size while being within 5 perplexity points of the original network. We also find that other well-established compression techniques can achieve competitive performance when compared to their original networks given a sufficient number of retraining steps. Generally, we observe a clear inflection point in performance as the amount of compression increases, suggesting a bound on the amount of compression that can be achieved before an exponential degradation in performance.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-07-29T15:43:19.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "compressing deep neural networks",
      "layer fusion",
      "original network",
      "deep convolution neural networks",
      "little additional computation overhead"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}