@inproceedings{de_prado_quenn:_2018, title = {{QUENN}: {QUantization} Engine for low-power Neural Networks}, url = {https://arxiv.org/abs/1811.05896v1}, doi = {10.1145/3203217.3203282}, shorttitle = {{QUENN}}, abstract = {Deep Learning is moving to edge devices, ushering in a new age of distributed Artificial Intelligence ({AI}). The high demand of computational resources required by deep neural networks may be alleviated by approximate computing techniques, and most notably reduced-precision arithmetic with coarsely quantized numerical representations. In this context, Bonseyes comes in as an initiative to enable stakeholders to bring {AI} to low-power and autonomous environments such as: Automotive, Medical Healthcare and Consumer Electronics. To achieve this, we introduce {LPDNN}, a framework for optimized deployment of Deep Neural Networks on heterogeneous embedded devices. In this work, we detail the quantization engine that is integrated in {LPDNN}. The engine depends on a fine-grained workflow which enables a Neural Network Design Exploration and a sensitivity analysis of each layer for quantization. We demonstrate the engine with a case study on Alexnet and {VGG}16 for three different techniques for direct quantization: standard fixed-point, dynamic fixed-point and k-means clustering, and demonstrate the potential of the latter. We argue that using a Gaussian quantizer with k-means clustering can achieve better performance than linear quantizers. Without retraining, we achieve over 55.64{\textbackslash}\% saving for weights' storage and 69.17{\textbackslash}\% for run-time memory accesses with less than 1{\textbackslash}\% drop in top5 accuracy in Imagenet.}, eventtitle = {{CF} '18 15th {ACM} International Conference on Computing Frontiers}, booktitle = {{CF} '18 Proceedings of the 15th {ACM} International Conference on Computing Frontiers}, author = {de Prado, Miguel and Denna, Maurizio and Benini, Luca and Pazos, Nuria}, urldate = {2019-01-29}, date = {2018-11-14}, langid = {english}, note = {{ACM} International Conference on Computing Frontiers 2018}, }