{ "id": "1607.02720", "version": "v1", "published": "2016-07-10T09:23:34.000Z", "updated": "2016-07-10T09:23:34.000Z", "title": "Memory Efficient Nonuniform Quantization for Deep Convolutional Neural Network", "authors": [ "Fangxuan Sun", "Jun Lin" ], "categories": [ "cs.CV" ], "abstract": "Convolutional neural network (CNN) is one of the most famous algorithms for deep learning. It has been applied in various applications due to its remarkable performance. The real-time hardware implement of CNN is highly demanded due to its excellent performance in computer vision. However, the cost of memory of a deep CNN is very huge which increases the area of hardware implementation. In this paper, we apply several methods in the quantization of CNN and use about 5 bits for convolutional layers. The accuracy lost is less than $2\\%$ without fine tuning. Our experiment is depending on the VGG-16 net and Alex net. In VGG-16 net, the total memory needed after uniform quantization is 16.85 MB per image and the total memory needed after our quantization is only about 8.42 MB. Our quantization method has saved $50.0\\%$ of the memory needed in VGG-16 and Alex net compared with the state-of-art quantization method.", "revisions": [ { "version": "v1", "updated": "2016-07-10T09:23:34.000Z" } ], "analyses": { "keywords": [ "deep convolutional neural network", "memory efficient nonuniform quantization", "quantization method", "total memory", "alex net" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }