{
  "id": "1607.02720",
  "version": "v1",
  "published": "2016-07-10T09:23:34.000Z",
  "updated": "2016-07-10T09:23:34.000Z",
  "title": "Memory Efficient Nonuniform Quantization for Deep Convolutional Neural Network",
  "authors": [
    "Fangxuan Sun",
    "Jun Lin"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "Convolutional neural network (CNN) is one of the most famous algorithms for deep learning. It has been applied in various applications due to its remarkable performance. The real-time hardware implement of CNN is highly demanded due to its excellent performance in computer vision. However, the cost of memory of a deep CNN is very huge which increases the area of hardware implementation. In this paper, we apply several methods in the quantization of CNN and use about 5 bits for convolutional layers. The accuracy lost is less than $2\\%$ without fine tuning. Our experiment is depending on the VGG-16 net and Alex net. In VGG-16 net, the total memory needed after uniform quantization is 16.85 MB per image and the total memory needed after our quantization is only about 8.42 MB. Our quantization method has saved $50.0\\%$ of the memory needed in VGG-16 and Alex net compared with the state-of-art quantization method.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2016-07-10T09:23:34.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "deep convolutional neural network",
      "memory efficient nonuniform quantization",
      "quantization method",
      "total memory",
      "alex net"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}