Skip to main content

rust_igraph/algorithms/spatial/
nearest_neighbor_graph.rs

1//! k-nearest-neighbor graph of a spatial point set (ALGO-GEO-005).
2//!
3//! Counterpart of `igraph_nearest_neighbor_graph()` from
4//! `references/igraph/src/spatial/nearest_neighbor.cpp:144`.
5//!
6//! Each point is joined to at most `k` of its nearest *other* points, subject
7//! to an optional `cutoff` radius. The result is a directed graph where every
8//! arc `i → j` means "`j` is one of `i`'s `k` nearest neighbours within
9//! `cutoff`". When `directed` is `false` the directed graph is collapsed to
10//! undirected, merging a reciprocal pair `i → j`, `j → i` into a single
11//! undirected edge.
12//!
13//! The reference uses a k-d tree (nanoflann) for `O(n·log n)` queries; we use
14//! a straightforward `O(n²·d)` all-pairs scan, which yields the identical edge
15//! set on every tie-free configuration. The cutoff comparison is **strict**
16//! (`dist < cutoff`), mirroring nanoflann's `dist < worstDist()` admission
17//! test: a point lying exactly at distance `cutoff` is *not* connected. For
18//! the L2 metric distances are compared squared (so the squared radius
19//! `cutoff²` is the threshold), exactly as the reference squares the cutoff
20//! before the search.
21
22use crate::algorithms::spatial::edge_lengths::DistanceMetric;
23use crate::core::{Graph, IgraphError, IgraphResult};
24
25/// Build the k-nearest-neighbor graph of a point set.
26///
27/// `points` holds one row per point with a shared, arbitrary dimensionality
28/// (inferred from the first row). For each point the `k` nearest *other*
29/// points within `cutoff` are connected by an outgoing arc.
30///
31/// * `metric` — [`DistanceMetric::Euclidean`] (L2) or
32///   [`DistanceMetric::Manhattan`] (L1).
33/// * `k` — maximum neighbours per point; a negative value means "no limit"
34///   (every point within `cutoff`). `k == 0` produces an edgeless graph.
35/// * `cutoff` — maximum connection distance; a negative value (or
36///   [`f64::INFINITY`]) means "no cutoff". The comparison is strict, so a
37///   point exactly at distance `cutoff` is excluded.
38/// * `directed` — when `false`, the directed neighbour graph is collapsed to
39///   undirected, merging reciprocal arcs into one edge.
40///
41/// # Errors
42///
43/// - [`IgraphError::InvalidArgument`] if the points are zero-dimensional
44///   (and there is at least one point).
45/// - [`IgraphError::InvalidArgument`] if the rows have inconsistent
46///   dimensionality.
47///
48/// # Examples
49///
50/// ```
51/// use rust_igraph::{nearest_neighbor_graph, DistanceMetric};
52///
53/// // Three points on a line: each links to its single nearest neighbour.
54/// let pts = vec![vec![0.0], vec![1.0], vec![5.0]];
55/// let g = nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, 1, -1.0, true).unwrap();
56/// assert!(g.is_directed());
57/// assert_eq!(g.vcount(), 3);
58/// // 0→1, 1→0, 2→1
59/// assert_eq!(g.ecount(), 3);
60/// ```
61// `a`/`b` index points and `d` the dimension axis — the natural notation
62// for an all-pairs geometric scan.
63#[allow(clippy::many_single_char_names)]
64pub fn nearest_neighbor_graph(
65    points: &[Vec<f64>],
66    metric: DistanceMetric,
67    k: i64,
68    cutoff: f64,
69    directed: bool,
70) -> IgraphResult<Graph> {
71    let n = points.len();
72
73    // Null graph short-circuit: with zero points the dimensionality is
74    // meaningless, so a 0-dimensional empty set is not an error (matches the
75    // reference, which calls igraph_empty() before the dimension check).
76    if n == 0 {
77        return Graph::new(0, directed);
78    }
79
80    let dim = points[0].len();
81    if dim == 0 {
82        return Err(IgraphError::InvalidArgument(
83            "nearest_neighbor_graph: 0-dimensional points are not supported".into(),
84        ));
85    }
86    for (i, row) in points.iter().enumerate().skip(1) {
87        if row.len() != dim {
88            return Err(IgraphError::InvalidArgument(format!(
89                "nearest_neighbor_graph: point row {i} has dimension {} but expected {dim}",
90                row.len()
91            )));
92        }
93    }
94
95    let n_u32 = u32::try_from(n).map_err(|_| {
96        IgraphError::InvalidArgument("nearest_neighbor_graph: too many points".into())
97    })?;
98
99    // Negative cutoff => no cutoff. For L2 the threshold is the squared
100    // radius (distances are accumulated squared); for L1 it is the radius
101    // itself. INFINITY squared stays INFINITY, so an unbounded search admits
102    // every finite distance.
103    let radius = if cutoff < 0.0 { f64::INFINITY } else { cutoff };
104    let threshold = match metric {
105        DistanceMetric::Euclidean => radius * radius,
106        DistanceMetric::Manhattan => radius,
107    };
108
109    // k < 0 means "no neighbour limit": at most n - 1 others can qualify.
110    let neighbor_count = if k < 0 {
111        n
112    } else {
113        usize::try_from(k).unwrap_or(usize::MAX)
114    };
115
116    let mut directed_edges: Vec<(u32, u32)> = Vec::new();
117    let mut cands: Vec<(f64, usize)> = Vec::new();
118
119    for a in 0..n {
120        cands.clear();
121        for b in 0..n {
122            if a == b {
123                continue;
124            }
125            let dist = match metric {
126                DistanceMetric::Euclidean => points[a]
127                    .iter()
128                    .zip(points[b].iter())
129                    .map(|(&x, &y)| (x - y) * (x - y))
130                    .sum::<f64>(),
131                DistanceMetric::Manhattan => points[a]
132                    .iter()
133                    .zip(points[b].iter())
134                    .map(|(&x, &y)| (x - y).abs())
135                    .sum::<f64>(),
136            };
137            // Strict admission, mirroring nanoflann's `dist < worstDist()`.
138            if dist < threshold {
139                cands.push((dist, b));
140            }
141        }
142        // Keep the `neighbor_count` smallest; ties broken by point index so
143        // the output is deterministic (the reference leaves ties to k-d tree
144        // visit order, which is unspecified).
145        cands.sort_by(|p, q| p.0.total_cmp(&q.0).then(p.1.cmp(&q.1)));
146        let take = neighbor_count.min(cands.len());
147        let u = u32::try_from(a)
148            .map_err(|_| IgraphError::Internal("nearest_neighbor_graph: vertex id overflow"))?;
149        for &(_, b) in &cands[..take] {
150            let v = u32::try_from(b)
151                .map_err(|_| IgraphError::Internal("nearest_neighbor_graph: vertex id overflow"))?;
152            directed_edges.push((u, v));
153        }
154    }
155
156    if directed {
157        let mut graph = Graph::new(n_u32, true)?;
158        graph.add_edges(directed_edges)?;
159        Ok(graph)
160    } else {
161        // COLLAPSE to undirected: deduplicate unordered endpoint pairs so a
162        // reciprocal arc pair becomes a single edge.
163        let mut seen: std::collections::HashSet<(u32, u32)> = std::collections::HashSet::new();
164        let mut undirected_edges: Vec<(u32, u32)> = Vec::new();
165        for (u, v) in directed_edges {
166            let key = (u.min(v), u.max(v));
167            if seen.insert(key) {
168                undirected_edges.push(key);
169            }
170        }
171        let mut graph = Graph::new(n_u32, false)?;
172        graph.add_edges(undirected_edges)?;
173        Ok(graph)
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    /// Directed arc set as a sorted `(from, to)` vec.
182    fn arc_set(g: &Graph) -> Vec<(u32, u32)> {
183        let mut edges: Vec<(u32, u32)> = (0..g.ecount())
184            .map(|e| {
185                g.edge(u32::try_from(e).expect("edge id fits in u32"))
186                    .expect("edge")
187            })
188            .collect();
189        edges.sort_unstable();
190        edges
191    }
192
193    /// Undirected edge set as a sorted `(min, max)` vec.
194    fn undirected_edge_set(g: &Graph) -> Vec<(u32, u32)> {
195        let mut edges: Vec<(u32, u32)> = (0..g.ecount())
196            .map(|e| {
197                let (u, v) = g
198                    .edge(u32::try_from(e).expect("edge id fits in u32"))
199                    .expect("edge");
200                (u.min(v), u.max(v))
201            })
202            .collect();
203        edges.sort_unstable();
204        edges
205    }
206
207    #[test]
208    fn empty_point_set() {
209        let g =
210            nearest_neighbor_graph(&[], DistanceMetric::Euclidean, 2, 5.0, true).expect("empty");
211        assert_eq!(g.vcount(), 0);
212        assert_eq!(g.ecount(), 0);
213        assert!(g.is_directed());
214    }
215
216    #[test]
217    fn single_point_no_edges() {
218        let g = nearest_neighbor_graph(
219            &[vec![1.0, 6.0, 4.0]],
220            DistanceMetric::Euclidean,
221            -1,
222            100.0,
223            true,
224        )
225        .expect("singleton");
226        assert_eq!(g.vcount(), 1);
227        assert_eq!(g.ecount(), 0);
228    }
229
230    #[test]
231    fn zero_dimensional_error() {
232        let err = nearest_neighbor_graph(&[vec![]], DistanceMetric::Euclidean, 1, 10.0, true)
233            .unwrap_err();
234        assert!(matches!(err, IgraphError::InvalidArgument(_)));
235    }
236
237    #[test]
238    fn inconsistent_dimensions_error() {
239        let err = nearest_neighbor_graph(
240            &[vec![0.0, 0.0], vec![1.0]],
241            DistanceMetric::Euclidean,
242            1,
243            10.0,
244            true,
245        )
246        .unwrap_err();
247        assert!(matches!(err, IgraphError::InvalidArgument(_)));
248    }
249
250    #[test]
251    fn zero_neighbors_is_edgeless() {
252        let pts = vec![vec![12.0, 8.0], vec![8.0, 6.0], vec![5.0, 12.0]];
253        let g =
254            nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, 0, 5.0, true).expect("zero k");
255        assert_eq!(g.ecount(), 0);
256    }
257
258    #[test]
259    fn zero_cutoff_is_edgeless() {
260        let pts = vec![vec![12.0, 8.0], vec![8.0, 6.0], vec![5.0, 12.0]];
261        let g = nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, -1, 0.0, true)
262            .expect("zero cut");
263        assert_eq!(g.ecount(), 0);
264    }
265
266    #[test]
267    fn strict_cutoff_excludes_boundary() {
268        // 1d points 8 and 5 are exactly distance 3 apart; with cutoff 3 the
269        // strict admission test (dist < cutoff) drops that connection.
270        let pts = vec![vec![8.0], vec![5.0]];
271        let g =
272            nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, -1, 3.0, true).expect("strict");
273        assert_eq!(g.ecount(), 0);
274        // Loosening the cutoff past 3 admits both reciprocal arcs.
275        let g2 =
276            nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, -1, 3.5, true).expect("loose");
277        assert_eq!(arc_set(&g2), vec![(0, 1), (1, 0)]);
278    }
279
280    // Authentic igraph C anchor (igraph_nearest_neighbor_graph.out, "2d 2
281    // neighbors, cutoff 5", L2). No k-boundary ties, so the directed edge set
282    // is unambiguous.
283    #[test]
284    fn c_anchor_2d_k2_cutoff5_l2() {
285        let pts = vec![
286            vec![12.0, 8.0],
287            vec![8.0, 6.0],
288            vec![5.0, 12.0],
289            vec![10.0, 1.0],
290            vec![12.0, 2.0],
291        ];
292        let g = nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, 2, 5.0, true).expect("2d");
293        assert_eq!(arc_set(&g), vec![(0, 1), (1, 0), (3, 4), (4, 3)]);
294    }
295
296    // Authentic igraph C anchor ("3d, 2 neighbors, cutoff INFINITY", L2): a
297    // dense, tie-free configuration exercising the k = 2 selection.
298    #[test]
299    fn c_anchor_3d_k2_cutoff_inf_l2() {
300        let pts = vec![
301            vec![1.0, 6.0, 4.0],
302            vec![6.0, 2.0, 3.0],
303            vec![3.0, 6.0, 6.0],
304            vec![3.0, 2.0, 2.0],
305            vec![2.0, 3.0, 3.0],
306        ];
307        let g = nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, 2, -1.0, true).expect("3d");
308        assert_eq!(
309            arc_set(&g),
310            vec![
311                (0, 2),
312                (0, 4),
313                (1, 3),
314                (1, 4),
315                (2, 0),
316                (2, 4),
317                (3, 1),
318                (3, 4),
319                (4, 0),
320                (4, 3),
321            ]
322        );
323    }
324
325    // Authentic igraph C anchor ("4d 1 neighbors, cutoff INFINITY", L2).
326    #[test]
327    fn c_anchor_4d_k1_cutoff_inf_l2() {
328        let pts = vec![
329            vec![1.0, 6.0, 4.0, 4.0],
330            vec![6.0, 2.0, 3.0, 3.0],
331            vec![3.0, 6.0, 6.0, 5.0],
332            vec![3.0, 2.0, 2.0, 3.0],
333            vec![2.0, 3.0, 3.0, 4.0],
334        ];
335        let g = nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, 1, -1.0, true).expect("4d");
336        assert_eq!(arc_set(&g), vec![(0, 2), (1, 3), (2, 0), (3, 4), (4, 3)]);
337    }
338
339    // Authentic igraph C anchor ("2d 2 neighbors, cutoff 5", L1).
340    #[test]
341    fn c_anchor_2d_k2_cutoff5_l1() {
342        let pts = vec![
343            vec![12.0, 8.0],
344            vec![8.0, 6.0],
345            vec![5.0, 12.0],
346            vec![10.0, 1.0],
347            vec![12.0, 2.0],
348        ];
349        let g = nearest_neighbor_graph(&pts, DistanceMetric::Manhattan, 2, 5.0, true).expect("l1");
350        assert_eq!(arc_set(&g), vec![(3, 4), (4, 3)]);
351    }
352
353    #[test]
354    fn undirected_collapses_reciprocal_arcs() {
355        // The 2d/k2/cutoff5/L2 anchor has two reciprocal pairs; COLLAPSE
356        // merges each into a single undirected edge.
357        let pts = vec![
358            vec![12.0, 8.0],
359            vec![8.0, 6.0],
360            vec![5.0, 12.0],
361            vec![10.0, 1.0],
362            vec![12.0, 2.0],
363        ];
364        let g =
365            nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, 2, 5.0, false).expect("undir");
366        assert!(!g.is_directed());
367        assert_eq!(undirected_edge_set(&g), vec![(0, 1), (3, 4)]);
368    }
369
370    #[test]
371    fn unlimited_neighbors_infinite_cutoff_is_complete() {
372        let pts = vec![vec![0.0], vec![1.0], vec![2.0]];
373        let g = nearest_neighbor_graph(&pts, DistanceMetric::Euclidean, -1, -1.0, true)
374            .expect("complete");
375        // Every ordered pair of distinct points: 3 * 2 = 6 arcs.
376        assert_eq!(g.ecount(), 6);
377    }
378}