@article{29edb8f5bda9413d8b62bdc7f231c13d,
title = "Graph-based time-space trade-offs for approximate near neighbors",
abstract = " We take a first step towards a rigorous asymptotic analysis of graph-based approaches for finding (approximate) nearest neighbors in high-dimensional spaces, by analyzing the complexity of (randomized) greedy walks on the approximate near neighbor graph. For random data sets of size \$n = 2\textasciicircum{}\{o(d)\}\$ on the \$d\$-dimensional Euclidean unit sphere, using near neighbor graphs we can provably solve the approximate nearest neighbor problem with approximation factor \$c > 1\$ in query time \$n\textasciicircum{}\{\textbackslash{}rho\_q + o(1)\}\$ and space \$n\textasciicircum{}\{1 + \textbackslash{}rho\_s + o(1)\}\$, for arbitrary \$\textbackslash{}rho\_q, \textbackslash{}rho\_s \textbackslash{}geq 0\$ satisfying \textbackslash{}begin\{align\} (2c\textasciicircum{}2 - 1) \textbackslash{}rho\_q + 2 c\textasciicircum{}2 (c\textasciicircum{}2 - 1) \textbackslash{}sqrt\{\textbackslash{}rho\_s (1 - \textbackslash{}rho\_s)\} \textbackslash{}geq c\textasciicircum{}4. \textbackslash{}end\{align\} Graph-based near neighbor searching is especially competitive with hash-based methods for small \$c\$ and near-linear memory, and in this regime the asymptotic scaling of a greedy graph-based search matches the recent optimal hash-based trade-offs of Andoni-Laarhoven-Razenshteyn-Waingarten [SODA'17]. We further study how the trade-offs scale when the data set is of size \$n = 2\textasciicircum{}\{\textbackslash{}Theta(d)\}\$, and analyze asymptotic complexities when applying these results to lattice sieving. ",
keywords = "cs.DS, cs.CC, cs.CG, cs.CR, cs.IR",
author = "Thijs Laarhoven",
note = "26 pages, 4 figures",
year = "2017",
month = dec,
day = "8",
language = "English",
journal = "arXiv",
issn = "2331-8422",
publisher = "Cornell University Library",
}