@article{Vasilache:2019:NAL:3366460.3355606, author = {Vasilache, Nicolas and Zinenko, Oleksandr and Theodoridis, Theodoros and Goyal, Priya and Devito, Zachary and Moses, William S. and Verdoolaege, Sven and Adams, Andrew and Cohen, Albert}, title = {The Next 700 Accelerated Layers: From Mathematical Expressions of Network Computation Graphs to Accelerated GPU Kernels, Automatically}, journal = {ACM Trans. Archit. Code Optim.}, issue_date = {November 2019}, volume = {16}, number = {4}, month = oct, year = {2019}, issn = {1544-3566}, pages = {38:1–38:26}, articleno = {38}, numpages = {26}, url = {http://doi.acm.org/10.1145/3355606}, doi = {10.1145/3355606}, acmid = {3355606}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Deep learning layers, GPU acceleration, polyhedral compilation}, } @inproceedings{Augustine:2019:GPC:3314221.3314615, author = {Augustine, Travis and Sarma, Janarthanan and Pouchet, Louis-Noël and Rodríguez, Gabriel}, title = {Generating Piecewise-regular Code from Irregular Structures}, booktitle = {Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation}, series = {PLDI 2019}, year = {2019}, isbn = {978-1-4503-6712-7}, location = {Phoenix, AZ, USA}, pages = {625–639}, numpages = {15}, url = {http://doi.acm.org/10.1145/3314221.3314615}, doi = {10.1145/3314221.3314615}, acmid = {3314615}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Polyhedral compilation, SpMV, sparse data structure, trace compression}, } @inproceedings{Rawat:2016:ERM:2884045.2884047, author = {Rawat, Prashant Singh and Hong, Changwan and Ravishankar, Mahesh and Grover, Vinod and Pouchet, Louis-Noël and Sadayappan, P.}, title = {Effective Resource Management for Enhancing Performance of 2D and 3D Stencils on GPUs}, booktitle = {Proceedings of the 9th Annual Workshop on General Purpose Processing Using Graphics Processing Unit}, series = {GPGPU '16}, year = {2016}, isbn = {978-1-4503-4195-0}, location = {Barcelona, Spain}, pages = {92–102}, numpages = {11}, url = {http://doi.acm.org/10.1145/2884045.2884047}, doi = {10.1145/2884045.2884047}, acmid = {2884047}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {GPGPU, resource management, stencil computations, tiling}, }

@article{DBLP:journals/corr/abs-1805-02566,

author    = {Hyoukjun Kwon and
             Michael Pellauer and
             Tushar Krishna},
title     = {Understanding Reuse, Performance, and Hardware Cost of DNN Dataflows: A Data-Centric Approach},
journal   = {CoRR},
volume    = {abs/1805.02566},
year      = {2018},
url       = {http://arxiv.org/abs/1805.02566},
archivePrefix = {arXiv},
eprint    = {1805.02566},
timestamp = {Mon, 13 Aug 2018 16:46:45 +0200},
biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1805-02566},
bibsource = {dblp computer science bibliography, https://dblp.org}

}

@inproceedings{Stock:2014:FED:2594291.2594342, author = {Stock, Kevin and Kong, Martin and Grosser, Tobias and Pouchet, Louis-Noël and Rastello, Fabrice and Ramanujam, J. and Sadayappan, P.}, title = {A Framework for Enhancing Data Reuse via Associative Reordering}, booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming Language Design and Implementation}, series = {PLDI '14}, year = {2014}, isbn = {978-1-4503-2784-8}, location = {Edinburgh, United Kingdom}, pages = {65–76}, numpages = {12}, url = {http://doi.acm.org/10.1145/2594291.2594342}, doi = {10.1145/2594291.2594342}, acmid = {2594342}, publisher = {ACM}, address = {New York, NY, USA}, }

@ARTICLE{7738524, author={Y. H. Chen and T. Krishna and J. S. Emer and V. Sze}, journal={IEEE Journal of Solid-State Circuits}, title={Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks}, year={2017}, volume={52}, number={1}, pages={127-138}, url = {http://ieeexplore.ieee.org/document/7738524/}, doi={10.1109/JSSC.2016.2616357}, ISSN={0018-9200}, month={Jan},}

@article{Vasilache:2019:NAL:3366460.3355606, author = {Vasilache, Nicolas and Zinenko, Oleksandr and Theodoridis, Theodoros and Goyal, Priya and Devito, Zachary and Moses, William S. and Verdoolaege, Sven and Adams, Andrew and Cohen, Albert}, title = {The Next 700 Accelerated Layers: From Mathematical Expressions of Network Computation Graphs to Accelerated GPU Kernels, Automatically}, journal = {ACM Trans. Archit. Code Optim.}, issue_date = {October 2019}, volume = {16}, number = {4}, month = oct, year = {2019}, issn = {1544-3566}, pages = {38:1–38:26}, articleno = {38}, numpages = {26}, url = {http://doi.acm.org/10.1145/3355606}, doi = {10.1145/3355606}, acmid = {3355606}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Deep learning layers, GPU acceleration, polyhedral compilation},