@inproceedings{df8af90c72a6412eaf08b78ea2441ac1,
title = "Fused DSConv: Optimizing sparse CNN inference for execution on edge devices",
abstract = "Accelerating CNN on resource-constrained edge devices is becoming an increasingly important problem with the emergence of IoT and edge computing. This paper proposes an execution strategy and an implementation for efficient execution of CNNs. Our execution strategy combines two previously published, but not widely used, ideas - direct sparse convolution and fusion of two convolution layers. Together with a scheme for caching intermediate results, this results in a very efficient mechanism for speeding up inference after the model has been sparsified. We also demonstrate an efficient implementation that uses both multi-core and SIMD parallelism. Our experimental results demonstrate that our scheme significantly outperforms existing implementations on an edge device, while also scaling better in a server environment.",
author = "Jia Guo and Radu Teodorescu and Gagan Agrawal",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 21st IEEE/ACM International Symposium on Cluster, Cloud and Internet Computing, CCGrid 2021 ; Conference date: 10-05-2021 Through 13-05-2021",
year = "2021",
month = may,
doi = "10.1109/CCGrid51090.2021.00064",
language = "English (US)",
series = "Proceedings - 21st IEEE/ACM International Symposium on Cluster, Cloud and Internet Computing, CCGrid 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "545--554",
editor = "Laurent Lefevre and Stacy Patterson and Lee, {Young Choon} and Haiying Shen and Shashikant Ilager and Mohammad Goudarzi and Toosi, {Adel N.} and Rajkumar Buyya",
booktitle = "Proceedings - 21st IEEE/ACM International Symposium on Cluster, Cloud and Internet Computing, CCGrid 2021",
}