@article{anderson_low-memory_2017,
	title = {Low-memory {GEMM}-based convolution algorithms for deep neural networks},
	url = {http://arxiv.org/abs/1709.03395},
	abstract = {Deep neural networks ({DNNs}) require very large amounts of computation both for training and for inference when deployed in the field. A common approach to implementing {DNNs} is to recast the most computationally expensive operations as general matrix multiplication ({GEMM}). However, as we demonstrate in this paper, there are a great many different ways to express {DNN} convolution operations using {GEMM}. Although different approaches all perform the same number of operations, the size of temporary data structures differs significantly. Convolution of an input matrix with dimensions \$C {\textbackslash}times H {\textbackslash}times W\$, requires \$O(K{\textasciicircum}2CHW)\$ additional space using the classical im2col approach. More recently memory-efficient approaches requiring just \$O({KCHW})\$ auxiliary space have been proposed. We present two novel {GEMM}-based algorithms that require just \$O({MHW})\$ and \$O({KW})\$ additional space respectively, where \$M\$ is the number of channels in the result of the convolution. These algorithms dramatically reduce the space overhead of {DNN} convolution, making it much more suitable for memory-limited embedded systems. Experimental evaluation shows that our low-memory algorithms are just as fast as the best patch-building approaches despite requiring just a fraction of the amount of additional memory. Our low-memory algorithms have excellent data locality which gives them a further edge over patch-building algorithms when multiple cores are used. As a result, our low memory algorithms often outperform the best patch-building algorithms using multiple threads.},
	journaltitle = {{arXiv}:1709.03395 [cs]},
	author = {Anderson, Andrew and Vasudevan, Aravind and Keane, Cormac and Gregg, David},
	urldate = {2018-06-05},
	date = {2017-09-08},
	eprinttype = {arxiv},
	eprint = {1709.03395},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
}