@inproceedings{0f5f853794a2492a960649b7b7f582ce,
title = "Block-Level Surrogate Models for Inference Time Estimation in Hardware Aware Neural Architecture Search",
abstract = "Hardware-Aware Neural Architecture Search (HA-NAS) is an attractive approach for discovering network architectures that balance task accuracy and deployment efficiency. In an iterative search algorithm, inference time is typically determined at every step by directly profiling architectures on hardware. This imposes limitations on the scalability of search processes because access to specialized devices for profiling is required. As such, the ability to assess inference time without hardware access is an important aspect to enable deep learning on resource-constrained embedded devices. Previous work estimates inference time by summing individual contributions of the architecture{\textquoteright}s parts. In this work, we propose using block-level inference time estimators to find the network-level inference time. Individual estimators are trained on collected datasets of independently sampled and profiled architecture block instances. Our experiments on isolated blocks commonly found in classification architectures show that gradient boosted decision trees serve as an accurate surrogate for inference time. More specifically, their Spearman correlation coefficient exceeds 0.98 on all tested platforms. When such blocks are connected in sequence, the sum of all block estimations correlates with the measured network inference time, having Spearman correlation coefficients above 0.71 on evaluated CPUs and an accelerator platform. Furthermore, we demonstrate the applicability of our Surrogate Model (SM) methodology in its intended HA-NAS context. To this end, we evaluate and compare two HA-NAS processes: one that relies on profiling via hardware-in-the-loop and one that leverages block-level surrogate models. We find that both processes yield similar Pareto-optimal architectures. This shows that our method facilitates a similar task-performance outcome without relying on hardware access for profiling during architecture search.",
keywords = "AutoML, Inference time estimation, Neural network design",
author = "Kurt Stolle and Sebastian Vogel and {van der Sommen}, Fons and Sanberg, {Willem P.}",
note = "ID 737; 2022 European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, ECML PKDD 2022, ECML PKDD ; Conference date: 19-09-2022 Through 23-09-2022",
year = "2023",
month = mar,
day = "17",
doi = "10.1007/978-3-031-26419-1_28",
language = "English",
isbn = "978-3-031-26418-4",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "463--479",
editor = "Massih-Reza Amini and St{\'e}phane Canu and Asja Fischer and Tias Guns and {Kralj Novak}, Petra and Grigorios Tsoumakas",
booktitle = "Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2022, Proceedings",
address = "Germany",
url = "https://2022.ecmlpkdd.org/",
}