Skip to main content

rust_igraph/algorithms/community/
walktrap.rs

1//! Walktrap random-walk community detection (ALGO-CO-008).
2//!
3//! Counterpart of `igraph_community_walktrap()` from
4//! `references/igraph/src/community/walktrap/`.
5//!
6//! Pons & Latapy (2005, 2006): two vertices are *close* when short
7//! random walks (length `t`, default 4) starting from each of them end
8//! up with similar probability distributions on the vertex set. The
9//! distance between communities `C1`, `C2` is the L2 norm of the
10//! probability vectors weighted by `1/sqrt(deg(v))`. Walktrap is a
11//! Ward-style hierarchical agglomeration that, at each step, merges the
12//! adjacent community pair whose merger minimises the increase in
13//! variance σ (equivalently, the lower-bound `Δσ`). The dendrogram is
14//! then cut at the level with maximum Newman-Girvan modularity.
15//!
16//! References:
17//! - P. Pons, M. Latapy. *Computing communities in large networks
18//!   using random walks*. J. Graph Algorithms Appl. 10 (2006), 191-218.
19//!   <https://doi.org/10.7155/jgaa.00124>
20//!
21//! Phase-1 scope: **undirected** graphs only — matches upstream C
22//! (`igraph_community_walktrap` does not support directed input). Edge
23//! weights, when given, must be finite and non-negative. Multi-edges
24//! are accepted and folded by weight-sum; self-loops are kept. Per the
25//! igraph #2043 fix, each vertex receives a synthesized self-loop of
26//! weight `mean(incident edge weight)` (or `1.0` if isolated) so
27//! diffusion is well-defined.
28//!
29//! Complexity: `O(t · |E| · nb_merges)` worst case in the present
30//! full-vector implementation. The sparse-vector trick from upstream is
31//! a future optimisation.
32
33#![allow(
34    clippy::cast_possible_truncation,
35    clippy::cast_possible_wrap,
36    clippy::cast_precision_loss,
37    clippy::cast_sign_loss,
38    clippy::float_cmp,
39    clippy::items_after_statements,
40    clippy::many_single_char_names,
41    clippy::needless_range_loop,
42    clippy::too_many_lines
43)]
44
45use crate::core::{Graph, IgraphError, IgraphResult};
46
47/// Default random-walk length matching the igraph reference (`steps = 4`).
48pub const WALKTRAP_DEFAULT_STEPS: u32 = 4;
49
50/// Tuning knobs for [`walktrap_with_options`].
51#[derive(Debug, Clone, Copy)]
52pub struct WalktrapOptions {
53    /// Random-walk length `t`. Must be `>= 1`; igraph default is `4`.
54    pub steps: u32,
55}
56
57impl Default for WalktrapOptions {
58    fn default() -> Self {
59        Self {
60            steps: WALKTRAP_DEFAULT_STEPS,
61        }
62    }
63}
64
65/// Result of [`walktrap`] / [`walktrap_weighted`] /
66/// [`walktrap_with_options`].
67#[derive(Debug, Clone)]
68pub struct WalktrapResult {
69    /// Per-vertex community label of the best-modularity dendrogram cut,
70    /// densified to `0..nb_clusters`.
71    pub membership: Vec<u32>,
72    /// Number of distinct communities in `membership`.
73    pub nb_clusters: u32,
74    /// Merges in dendrogram order. Each row `[c1, c2]` merges clusters
75    /// `c1` and `c2` into the new cluster `n + i` where `i` is the
76    /// merge index. Same encoding as `igraph_community_walktrap`.
77    pub merges: Vec<[u32; 2]>,
78    /// Modularity trajectory. `modularity[i]` is the modularity *after*
79    /// `i` merges starting from the all-singletons partition. Length =
80    /// `merges.len() + 1` when the graph has edges. For an edgeless
81    /// graph the single entry is `NaN`, matching the C convention.
82    pub modularity: Vec<f64>,
83}
84
85/// Run Walktrap on an unweighted, undirected graph with default `steps = 4`.
86///
87/// # Errors
88/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
89///
90/// # Examples
91///
92/// ```
93/// use rust_igraph::{Graph, walktrap};
94///
95/// // Triangle: walktrap puts all three vertices in one community.
96/// let mut g = Graph::with_vertices(3);
97/// for &(u, v) in &[(0, 1), (1, 2), (2, 0)] {
98///     g.add_edge(u, v).unwrap();
99/// }
100/// let r = walktrap(&g).unwrap();
101/// assert_eq!(r.nb_clusters, 1);
102/// assert_eq!(r.membership, vec![0, 0, 0]);
103/// ```
104pub fn walktrap(graph: &Graph) -> IgraphResult<WalktrapResult> {
105    walktrap_with_options(graph, None, WalktrapOptions::default())
106}
107
108/// Run Walktrap on a weighted, undirected graph with default `steps = 4`.
109///
110/// `weights[i]` is the weight of edge id `i`. Must be finite and
111/// non-negative; length must equal `graph.ecount()`.
112///
113/// # Errors
114/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
115/// - [`IgraphError::InvalidArgument`] if `weights.len() != ecount` or
116///   any weight is negative or non-finite.
117///
118/// # Examples
119///
120/// ```
121/// use rust_igraph::{Graph, walktrap_weighted};
122///
123/// let mut g = Graph::with_vertices(3);
124/// g.add_edge(0, 1).unwrap();
125/// g.add_edge(1, 2).unwrap();
126/// g.add_edge(2, 0).unwrap();
127/// let r = walktrap_weighted(&g, &[1.0, 2.0, 1.0]).unwrap();
128/// assert_eq!(r.nb_clusters, 1);
129/// ```
130pub fn walktrap_weighted(graph: &Graph, weights: &[f64]) -> IgraphResult<WalktrapResult> {
131    walktrap_with_options(graph, Some(weights), WalktrapOptions::default())
132}
133
134/// Run Walktrap with a custom [`WalktrapOptions`].
135///
136/// `weights = None` runs the unweighted variant; otherwise see
137/// [`walktrap_weighted`] for the weight contract.
138///
139/// # Errors
140/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
141/// - [`IgraphError::InvalidArgument`] for `opts.steps == 0`, weight
142///   length mismatch, negative / non-finite weights, or a vertex whose
143///   incident-edge total weight underflows to zero.
144///
145/// # Examples
146///
147/// ```
148/// use rust_igraph::{Graph, walktrap_with_options, WalktrapOptions};
149///
150/// let mut g = Graph::with_vertices(3);
151/// g.add_edge(0, 1).unwrap();
152/// g.add_edge(1, 2).unwrap();
153/// g.add_edge(2, 0).unwrap();
154/// let opts = WalktrapOptions { steps: 6 };
155/// let r = walktrap_with_options(&g, None, opts).unwrap();
156/// assert_eq!(r.nb_clusters, 1);
157/// ```
158pub fn walktrap_with_options(
159    graph: &Graph,
160    weights: Option<&[f64]>,
161    opts: WalktrapOptions,
162) -> IgraphResult<WalktrapResult> {
163    if graph.is_directed() {
164        return Err(IgraphError::Unsupported(
165            "walktrap is undirected-only (matches igraph C reference)",
166        ));
167    }
168    if opts.steps == 0 {
169        return Err(IgraphError::InvalidArgument(
170            "walktrap: steps must be >= 1".to_string(),
171        ));
172    }
173    if let Some(w) = weights {
174        if w.len() != graph.ecount() {
175            return Err(IgraphError::InvalidArgument(
176                "walktrap: weights.len() must equal graph.ecount()".to_string(),
177            ));
178        }
179        for &v in w {
180            if !v.is_finite() || v < 0.0 {
181                return Err(IgraphError::InvalidArgument(
182                    "walktrap: weights must be finite and non-negative".to_string(),
183                ));
184            }
185        }
186    }
187
188    run(graph, weights, opts.steps)
189}
190
191// ---------------------------------------------------------------------
192// Internal: graph adapter
193// ---------------------------------------------------------------------
194
195#[derive(Clone, Copy)]
196struct AdjEntry {
197    neighbor: u32,
198    weight: f64,
199}
200
201struct InternalGraph {
202    n: u32,
203    /// `vertices[v]` is a sorted-by-neighbor adjacency including a
204    /// synthesized self-loop at position 0 with weight = mean incident
205    /// edge weight (or `1.0` when the vertex has no incident edges).
206    vertices: Vec<Vec<AdjEntry>>,
207    /// `total_weight[v]` includes the synthesized self-loop, matching
208    /// upstream's "strength" used in `P · D⁻¹` diffusion.
209    total_weight: Vec<f64>,
210    /// Sum of all `total_weight[v]`; the global "2m + n·mean-self-loop"
211    /// normalising constant used in the modularity formula.
212    total_weight_global: f64,
213    /// Sum of original (non-synthesised) edge weights; the `m` in the
214    /// classical modularity `Q = Σ (e_ii - a_i²)`.
215    /// We retain it for the dendrogram modularity computed at the end.
216    raw_total_weight: f64,
217}
218
219fn build_internal_graph(graph: &Graph, weights: Option<&[f64]>) -> IgraphResult<InternalGraph> {
220    let n = graph.vcount();
221    let mut vertices: Vec<Vec<AdjEntry>> = vec![Vec::new(); n as usize];
222    let mut deg: Vec<u32> = vec![0; n as usize];
223    let mut total_weight: Vec<f64> = vec![0.0; n as usize];
224    let mut raw_total_weight = 0.0f64;
225
226    let m = graph.ecount();
227    for eid in 0..m {
228        let (u, v) = graph.edge(eid as u32)?;
229        let w = match weights {
230            Some(ws) => ws[eid],
231            None => 1.0,
232        };
233        deg[u as usize] = deg[u as usize].saturating_add(1);
234        deg[v as usize] = deg[v as usize].saturating_add(1);
235        total_weight[u as usize] += w;
236        total_weight[v as usize] += w;
237        raw_total_weight += w;
238
239        vertices[u as usize].push(AdjEntry {
240            neighbor: v,
241            weight: w,
242        });
243        if u != v {
244            vertices[v as usize].push(AdjEntry {
245                neighbor: u,
246                weight: w,
247            });
248        }
249    }
250
251    // Prepend a synthesized self-loop with weight = mean incident edge
252    // weight (or 1.0 if isolated). Mirrors walktrap_graph.cpp lines
253    // 182-190 and the #2043 fix.
254    for v in 0..n as usize {
255        let mean_w = if deg[v] == 0 {
256            1.0
257        } else {
258            total_weight[v] / f64::from(deg[v])
259        };
260        vertices[v].push(AdjEntry {
261            neighbor: v as u32,
262            weight: mean_w,
263        });
264        total_weight[v] += mean_w;
265    }
266
267    // Sort each adjacency by neighbor id and fold parallel edges.
268    for v in 0..n as usize {
269        vertices[v].sort_by_key(|e| e.neighbor);
270        let mut folded: Vec<AdjEntry> = Vec::with_capacity(vertices[v].len());
271        for entry in &vertices[v] {
272            if let Some(last) = folded.last_mut() {
273                if last.neighbor == entry.neighbor {
274                    last.weight += entry.weight;
275                    continue;
276                }
277            }
278            folded.push(*entry);
279        }
280        vertices[v] = folded;
281    }
282
283    let mut total_weight_global = 0.0f64;
284    for v in 0..n as usize {
285        if total_weight[v] <= 0.0 {
286            return Err(IgraphError::InvalidArgument(
287                "walktrap: vertex with zero strength found; all vertices must have positive strength"
288                    .to_string(),
289            ));
290        }
291        total_weight_global += total_weight[v];
292    }
293
294    Ok(InternalGraph {
295        n,
296        vertices,
297        total_weight,
298        total_weight_global,
299        raw_total_weight,
300    })
301}
302
303// ---------------------------------------------------------------------
304// Internal: probabilities
305// ---------------------------------------------------------------------
306
307/// Length-n probability vector representing a community's t-step random
308/// walk distribution. Full (dense) representation; the upstream
309/// sparse/full switch is a deferred optimisation.
310type Probabilities = Vec<f64>;
311
312/// Compute `P^t · D⁻¹ · A` starting from a uniform distribution over
313/// `members`. `D⁻¹` is the diagonal of `1 / total_weight[v]`, and
314/// neighbours are taken from `g.vertices` (which already includes the
315/// synthesized self-loops).
316fn compute_probabilities(g: &InternalGraph, members: &[u32], steps: u32) -> Probabilities {
317    let n = g.n as usize;
318    let mut current = vec![0.0f64; n];
319    let inv_size = 1.0 / members.len() as f64;
320    for &v in members {
321        current[v as usize] = inv_size;
322    }
323    let mut next = vec![0.0f64; n];
324    for _ in 0..steps {
325        for v in 0..n {
326            next[v] = 0.0;
327        }
328        for u in 0..n {
329            let pu = current[u];
330            if pu == 0.0 {
331                continue;
332            }
333            let factor = pu / g.total_weight[u];
334            for entry in &g.vertices[u] {
335                next[entry.neighbor as usize] += factor * entry.weight;
336            }
337        }
338        std::mem::swap(&mut current, &mut next);
339    }
340    current
341}
342
343/// Squared distance between two communities' probability vectors,
344/// `Σ_v (P_a[v] - P_b[v])² / total_weight[v]`.
345fn distance_sq(g: &InternalGraph, a: &Probabilities, b: &Probabilities) -> f64 {
346    let mut acc = 0.0f64;
347    for v in 0..g.n as usize {
348        let d = a[v] - b[v];
349        acc += d * d / g.total_weight[v];
350    }
351    acc
352}
353
354// ---------------------------------------------------------------------
355// Internal: communities + neighbor pool
356// ---------------------------------------------------------------------
357
358#[derive(Clone)]
359struct CommunityState {
360    /// `false` means this community has been merged away.
361    active: bool,
362    size: u32,
363    /// `internal_weight` — sum of original edge weights with both
364    /// endpoints in this community (loops counted once). At the
365    /// singleton level this is the self-loop sum from the original
366    /// graph (NOT the synthesised one).
367    internal_weight: f64,
368    /// `total_weight` — sum of original edge weights incident to this
369    /// community. At the singleton level this is the degree-weight of
370    /// the original vertex (excluding the synthesised self-loop).
371    total_weight: f64,
372    /// Cached `P^t`. `None` until first needed.
373    probabilities: Option<Probabilities>,
374    /// Members of this community, by original vertex id. Built only
375    /// when we need to compute probabilities.
376    members: Vec<u32>,
377}
378
379#[derive(Clone)]
380struct NeighborEntry {
381    c1: u32,
382    c2: u32,
383    /// Lower-bound `Δσ` until `exact` is set.
384    delta_sigma: f64,
385    /// True once `delta_sigma` has been refined via the exact
386    /// L2-distance formula.
387    exact: bool,
388    /// Sum of original edge weights between c1 and c2.
389    weight: f64,
390    /// `false` means this neighbour entry has been superseded by a
391    /// merge and should be skipped on heap pop.
392    alive: bool,
393}
394
395struct Communities {
396    g: InternalGraph,
397    steps: u32,
398    nodes: Vec<CommunityState>,
399    /// Stable storage for neighbour entries. Indices into this vec are
400    /// used as `NeighborId`.
401    neighbors_pool: Vec<NeighborEntry>,
402    /// For each community id, sorted list of `(other_id, neighbor_id)`
403    /// adjacencies. Always `other_id != id`. We keep both halves so we
404    /// can iterate from either community.
405    adj: Vec<Vec<(u32, u32)>>,
406    /// Min-heap on `delta_sigma` of alive neighbour entries.
407    heap: Vec<u32>,
408    merges: Vec<[u32; 2]>,
409    modularity_trajectory: Vec<f64>,
410}
411
412// ---------------------------------------------------------------------
413// Internal: min-heap on `neighbors_pool[id].delta_sigma`
414// ---------------------------------------------------------------------
415
416fn heap_key(pool: &[NeighborEntry], id: u32) -> f64 {
417    pool[id as usize].delta_sigma
418}
419
420fn heap_push(heap: &mut Vec<u32>, pool: &[NeighborEntry], id: u32) {
421    heap.push(id);
422    let last = heap.len() - 1;
423    sift_up(heap, pool, last);
424}
425
426fn heap_pop(heap: &mut Vec<u32>, pool: &[NeighborEntry]) -> Option<u32> {
427    let last = heap.pop()?;
428    if heap.is_empty() {
429        return Some(last);
430    }
431    let top = std::mem::replace(&mut heap[0], last);
432    sift_down(heap, pool, 0);
433    Some(top)
434}
435
436fn sift_up(heap: &mut [u32], pool: &[NeighborEntry], mut i: usize) {
437    while i > 0 {
438        let parent = (i - 1) / 2;
439        if heap_key(pool, heap[i]) < heap_key(pool, heap[parent]) {
440            heap.swap(i, parent);
441            i = parent;
442        } else {
443            break;
444        }
445    }
446}
447
448fn sift_down(heap: &mut [u32], pool: &[NeighborEntry], mut i: usize) {
449    let n = heap.len();
450    loop {
451        let l = 2 * i + 1;
452        let r = 2 * i + 2;
453        let mut smallest = i;
454        if l < n && heap_key(pool, heap[l]) < heap_key(pool, heap[smallest]) {
455            smallest = l;
456        }
457        if r < n && heap_key(pool, heap[r]) < heap_key(pool, heap[smallest]) {
458            smallest = r;
459        }
460        if smallest == i {
461            break;
462        }
463        heap.swap(i, smallest);
464        i = smallest;
465    }
466}
467
468// ---------------------------------------------------------------------
469// Top-level driver (placeholder — full body lands in Step 4).
470// ---------------------------------------------------------------------
471
472fn run(graph: &Graph, weights: Option<&[f64]>, steps: u32) -> IgraphResult<WalktrapResult> {
473    let g = build_internal_graph(graph, weights)?;
474    drive(g, graph, weights, steps)
475}
476
477fn drive(
478    g: InternalGraph,
479    graph: &Graph,
480    weights: Option<&[f64]>,
481    steps: u32,
482) -> IgraphResult<WalktrapResult> {
483    let n = g.n;
484
485    // Trivial cases.
486    if n == 0 {
487        return Ok(WalktrapResult {
488            membership: Vec::new(),
489            nb_clusters: 0,
490            merges: Vec::new(),
491            modularity: vec![f64::NAN],
492        });
493    }
494    if g.raw_total_weight <= 0.0 {
495        // Edgeless: identity membership, NaN modularity, no merges.
496        return Ok(WalktrapResult {
497            membership: (0..n).collect(),
498            nb_clusters: n,
499            merges: Vec::new(),
500            modularity: vec![f64::NAN],
501        });
502    }
503
504    // Build initial singleton communities (one per vertex).
505    let mut nodes: Vec<CommunityState> = Vec::with_capacity(2 * n as usize);
506    for v in 0..n as usize {
507        // For the singleton community {v}, the original-graph internal
508        // weight equals the synthesised loop minus the synthesised
509        // contribution. We need the *original* self-loop weight if any.
510        // We compute these from `graph + weights` directly to keep
511        // book-keeping simple and unambiguous.
512        nodes.push(CommunityState {
513            active: true,
514            size: 1,
515            internal_weight: 0.0,
516            total_weight: 0.0,
517            probabilities: None,
518            members: vec![v as u32],
519        });
520    }
521    // Fill total_weight and internal_weight from the original edges.
522    for eid in 0..graph.ecount() {
523        let (u, v) = graph.edge(eid as u32)?;
524        let w = match weights {
525            Some(ws) => ws[eid],
526            None => 1.0,
527        };
528        nodes[u as usize].total_weight += w;
529        if u == v {
530            nodes[u as usize].internal_weight += w;
531        }
532        if u != v {
533            nodes[v as usize].total_weight += w;
534        }
535    }
536
537    // Initialise neighbour list and heap.
538    let mut neighbors_pool: Vec<NeighborEntry> = Vec::new();
539    let mut adj: Vec<Vec<(u32, u32)>> = vec![Vec::new(); (2 * n) as usize];
540    let mut heap: Vec<u32> = Vec::new();
541
542    // Build community-level adjacency: collapse parallel edges between
543    // distinct endpoints; skip self-loops (a self-loop is internal to
544    // the singleton and not a neighbour pair).
545    use std::collections::BTreeMap;
546    let mut pair_weight: BTreeMap<(u32, u32), f64> = BTreeMap::new();
547    for eid in 0..graph.ecount() {
548        let (u, v) = graph.edge(eid as u32)?;
549        if u == v {
550            continue;
551        }
552        let w = match weights {
553            Some(ws) => ws[eid],
554            None => 1.0,
555        };
556        let (a, b) = if u < v { (u, v) } else { (v, u) };
557        *pair_weight.entry((a, b)).or_insert(0.0) += w;
558    }
559
560    for ((c1, c2), w) in pair_weight {
561        // Lower-bound Δσ = -1 / min(|adj(c1)| as community, |adj(c2)|).
562        // At the singleton level upstream uses 1/min(deg(c1), deg(c2))
563        // where degrees come from the synthesised adjacency.
564        let d1 = g.vertices[c1 as usize].len() as f64;
565        let d2 = g.vertices[c2 as usize].len() as f64;
566        let ds_lower = -1.0 / d1.min(d2);
567        let id = neighbors_pool.len() as u32;
568        neighbors_pool.push(NeighborEntry {
569            c1,
570            c2,
571            delta_sigma: ds_lower,
572            exact: false,
573            weight: w,
574            alive: true,
575        });
576        adj[c1 as usize].push((c2, id));
577        adj[c2 as usize].push((c1, id));
578        heap_push(&mut heap, &neighbors_pool, id);
579    }
580    for v in 0..(2 * n) as usize {
581        adj[v].sort_by_key(|&(other, _)| other);
582    }
583
584    let mut comms = Communities {
585        g,
586        steps,
587        nodes,
588        neighbors_pool,
589        adj,
590        heap,
591        merges: Vec::new(),
592        modularity_trajectory: Vec::new(),
593    };
594
595    // Initial modularity (all singletons): Q_0 = -Σ a_c² where
596    // a_c = total_weight_c / (2m). For loop-free graphs the diagonal
597    // term Σ e_cc = Σ loop_weight / m which is 0; the canonical
598    // Walktrap output matches this.
599    let m = graph_total_edge_weight(graph, weights);
600    comms
601        .modularity_trajectory
602        .push(initial_modularity(&comms, m));
603
604    // Greedy agglomeration loop: pop neighbour entries until none
605    // remain; refine non-exact entries on the fly.
606    while let Some(id) = pop_exact(&mut comms) {
607        merge_pair(&mut comms, id, m);
608    }
609
610    // Recover the partition at argmax modularity.
611    let merges = comms.merges.clone();
612    let modularity = comms.modularity_trajectory.clone();
613    let mut packed = membership_at_best_modularity(n, &merges, &modularity);
614    let nb_clusters = densify_membership(&mut packed);
615
616    Ok(WalktrapResult {
617        membership: packed,
618        nb_clusters,
619        merges,
620        modularity,
621    })
622}
623
624fn graph_total_edge_weight(graph: &Graph, weights: Option<&[f64]>) -> f64 {
625    if let Some(ws) = weights {
626        ws.iter().copied().sum()
627    } else {
628        graph.ecount() as f64
629    }
630}
631
632fn initial_modularity(comms: &Communities, m: f64) -> f64 {
633    if m <= 0.0 {
634        return f64::NAN;
635    }
636    let two_m = 2.0 * m;
637    let mut q = 0.0f64;
638    for c in &comms.nodes {
639        if !c.active {
640            continue;
641        }
642        let a = c.total_weight / two_m;
643        q += c.internal_weight / m - a * a;
644    }
645    q
646}
647
648fn pop_exact(comms: &mut Communities) -> Option<u32> {
649    loop {
650        let id = heap_pop(&mut comms.heap, &comms.neighbors_pool)?;
651        if !comms.neighbors_pool[id as usize].alive {
652            continue;
653        }
654        if comms.neighbors_pool[id as usize].exact {
655            return Some(id);
656        }
657        // Refine: compute the exact Δσ via the L2 distance.
658        refine_delta_sigma(comms, id);
659        comms.neighbors_pool[id as usize].exact = true;
660        heap_push(&mut comms.heap, &comms.neighbors_pool, id);
661    }
662}
663
664fn refine_delta_sigma(comms: &mut Communities, id: u32) {
665    let entry = comms.neighbors_pool[id as usize].clone();
666    let c1 = entry.c1 as usize;
667    let c2 = entry.c2 as usize;
668    ensure_probabilities(comms, entry.c1);
669    ensure_probabilities(comms, entry.c2);
670    let s1 = f64::from(comms.nodes[c1].size);
671    let s2 = f64::from(comms.nodes[c2].size);
672    let p1 = comms.nodes[c1]
673        .probabilities
674        .as_ref()
675        .map_or_else(Vec::new, std::clone::Clone::clone);
676    let p2 = comms.nodes[c2]
677        .probabilities
678        .as_ref()
679        .map_or_else(Vec::new, std::clone::Clone::clone);
680    let d2 = distance_sq(&comms.g, &p1, &p2);
681    // Pons-Latapy: Δσ_{c1, c2} = (s1 * s2 / (s1 + s2)) * d² / N where
682    // N = Σ_v total_weight[v] (the global walk normaliser). We use the
683    // factor exactly as in walktrap_communities.cpp.
684    let n_global = comms.g.total_weight_global;
685    let delta = (s1 * s2 / (s1 + s2)) * d2 / n_global;
686    comms.neighbors_pool[id as usize].delta_sigma = delta;
687}
688
689fn ensure_probabilities(comms: &mut Communities, c: u32) {
690    if comms.nodes[c as usize].probabilities.is_some() {
691        return;
692    }
693    let members = comms.nodes[c as usize].members.clone();
694    let p = compute_probabilities(&comms.g, &members, comms.steps);
695    comms.nodes[c as usize].probabilities = Some(p);
696}
697
698fn merge_pair(comms: &mut Communities, id: u32, m: f64) {
699    let entry = comms.neighbors_pool[id as usize].clone();
700    comms.neighbors_pool[id as usize].alive = false;
701    let c1 = entry.c1;
702    let c2 = entry.c2;
703    let new_id = comms.nodes.len() as u32;
704
705    // Build the merged community.
706    let size = comms.nodes[c1 as usize].size + comms.nodes[c2 as usize].size;
707    let internal = comms.nodes[c1 as usize].internal_weight
708        + comms.nodes[c2 as usize].internal_weight
709        + entry.weight;
710    let total = comms.nodes[c1 as usize].total_weight + comms.nodes[c2 as usize].total_weight;
711    let mut members = Vec::with_capacity(
712        comms.nodes[c1 as usize].members.len() + comms.nodes[c2 as usize].members.len(),
713    );
714    members.extend_from_slice(&comms.nodes[c1 as usize].members);
715    members.extend_from_slice(&comms.nodes[c2 as usize].members);
716
717    // Cache P^t for the merged community using the size-weighted blend.
718    let p_merged: Option<Probabilities> = if let (Some(p1), Some(p2)) = (
719        comms.nodes[c1 as usize].probabilities.as_ref(),
720        comms.nodes[c2 as usize].probabilities.as_ref(),
721    ) {
722        let s1 = f64::from(comms.nodes[c1 as usize].size);
723        let s2 = f64::from(comms.nodes[c2 as usize].size);
724        let denom = s1 + s2;
725        let mut p = vec![0.0f64; comms.g.n as usize];
726        for v in 0..comms.g.n as usize {
727            p[v] = (s1 * p1[v] + s2 * p2[v]) / denom;
728        }
729        Some(p)
730    } else {
731        None
732    };
733
734    comms.nodes.push(CommunityState {
735        active: true,
736        size,
737        internal_weight: internal,
738        total_weight: total,
739        probabilities: p_merged,
740        members,
741    });
742    comms.adj.push(Vec::new());
743    comms.nodes[c1 as usize].active = false;
744    comms.nodes[c2 as usize].active = false;
745    // Free old probability caches to keep peak memory bounded.
746    comms.nodes[c1 as usize].probabilities = None;
747    comms.nodes[c2 as usize].probabilities = None;
748
749    // Rewire adjacency: combine c1's and c2's neighbour lists into the
750    // new community. For each k adjacent to c1, c2, or both: create one
751    // new neighbour entry between new_id and k. Mark the old c1↔k and
752    // c2↔k entries as dead.
753    use std::collections::BTreeMap;
754    let mut combined: BTreeMap<u32, (f64, Option<f64>, Option<f64>)> = BTreeMap::new();
755    // Format: combined[k] = (weight_sum, ds_via_c1, ds_via_c2)
756    for &(k, nid) in &comms.adj[c1 as usize] {
757        let entry_k = &comms.neighbors_pool[nid as usize];
758        if !entry_k.alive {
759            continue;
760        }
761        let ds_known = if entry_k.exact {
762            Some(entry_k.delta_sigma)
763        } else {
764            None
765        };
766        let slot = combined.entry(k).or_insert((0.0, None, None));
767        slot.0 += entry_k.weight;
768        slot.1 = ds_known;
769    }
770    for &(k, nid) in &comms.adj[c2 as usize] {
771        let entry_k = &comms.neighbors_pool[nid as usize];
772        if !entry_k.alive {
773            continue;
774        }
775        let ds_known = if entry_k.exact {
776            Some(entry_k.delta_sigma)
777        } else {
778            None
779        };
780        let slot = combined.entry(k).or_insert((0.0, None, None));
781        slot.0 += entry_k.weight;
782        slot.2 = ds_known;
783    }
784    // Mark all c1's / c2's edges as dead.
785    for &(_, nid) in &comms.adj[c1 as usize] {
786        comms.neighbors_pool[nid as usize].alive = false;
787    }
788    for &(_, nid) in &comms.adj[c2 as usize] {
789        comms.neighbors_pool[nid as usize].alive = false;
790    }
791
792    // Create the merged neighbour entries.
793    for (k, (w, ds_c1, ds_c2)) in combined {
794        if k == new_id || !comms.nodes[k as usize].active {
795            continue;
796        }
797        // Compose a lower bound for Δσ from the three-formula rule.
798        // For the chain cases we don't know an exact value, so fall back
799        // to the singleton-style `-1/min(|adj(new)|, |adj(k)|)` lower
800        // bound which is always ≤ the true value.
801        let (delta, exact) = if let (Some(d1), Some(d2)) = (ds_c1, ds_c2) {
802            // Triangle: both old neighbours were exact.
803            let s1 = f64::from(comms.nodes[c1 as usize].size);
804            let s2 = f64::from(comms.nodes[c2 as usize].size);
805            let sk = f64::from(comms.nodes[k as usize].size);
806            let new_s = s1 + s2;
807            let triangle =
808                ((s1 + sk) * d1 + (s2 + sk) * d2 - sk * entry.delta_sigma) / (new_s + sk);
809            (triangle, true)
810        } else {
811            let d_new = (comms.adj[c1 as usize].len() + comms.adj[c2 as usize].len()) as f64;
812            let d_k = comms.adj[k as usize].len() as f64;
813            let denom = d_new.min(d_k).max(1.0);
814            (-1.0 / denom, false)
815        };
816
817        let id_new = comms.neighbors_pool.len() as u32;
818        comms.neighbors_pool.push(NeighborEntry {
819            c1: new_id.min(k),
820            c2: new_id.max(k),
821            delta_sigma: delta,
822            exact,
823            weight: w,
824            alive: true,
825        });
826        comms.adj[new_id as usize].push((k, id_new));
827        comms.adj[k as usize].push((new_id, id_new));
828        heap_push(&mut comms.heap, &comms.neighbors_pool, id_new);
829    }
830    comms.adj[new_id as usize].sort_by_key(|&(o, _)| o);
831    comms.adj[c1 as usize].clear();
832    comms.adj[c2 as usize].clear();
833
834    // Record merge + modularity step.
835    comms.merges.push([c1, c2]);
836    let q_prev = comms.modularity_trajectory.last().copied().unwrap_or(0.0);
837    // ΔQ = (entry.weight / m) - 2 · a_c1 · a_c2 where a_c = total/2m.
838    let two_m = 2.0 * m;
839    let a1 = comms.nodes[c1 as usize].total_weight / two_m;
840    let a2 = comms.nodes[c2 as usize].total_weight / two_m;
841    let delta_q = entry.weight / m - 2.0 * a1 * a2;
842    comms.modularity_trajectory.push(q_prev + delta_q);
843}
844
845fn membership_at_best_modularity(n: u32, merges: &[[u32; 2]], modularity: &[f64]) -> Vec<u32> {
846    // Find the merge prefix that maximises modularity.
847    let mut best = 0usize;
848    let mut best_q = f64::NEG_INFINITY;
849    for (i, &q) in modularity.iter().enumerate() {
850        if q.is_finite() && q > best_q {
851            best_q = q;
852            best = i;
853        }
854    }
855    // Apply the first `best` merges to a union-find seeded at identity.
856    let mut parent: Vec<u32> = (0..n).collect();
857    let mut rep: Vec<u32> = (0..(n + best as u32)).collect();
858    fn find(rep: &mut [u32], x: u32) -> u32 {
859        let mut r = x;
860        while rep[r as usize] != r {
861            r = rep[r as usize];
862        }
863        let mut cur = x;
864        while rep[cur as usize] != r {
865            let nxt = rep[cur as usize];
866            rep[cur as usize] = r;
867            cur = nxt;
868        }
869        r
870    }
871    for (i, m) in merges.iter().take(best).enumerate() {
872        let new_id = n + i as u32;
873        let r1 = find(&mut rep, m[0]);
874        let r2 = find(&mut rep, m[1]);
875        rep[r1 as usize] = new_id;
876        rep[r2 as usize] = new_id;
877    }
878    for v in 0..n {
879        parent[v as usize] = find(&mut rep, v);
880    }
881    parent
882}
883
884fn densify_membership(membership: &mut [u32]) -> u32 {
885    use std::collections::BTreeMap;
886    let mut remap: BTreeMap<u32, u32> = BTreeMap::new();
887    let mut next = 0u32;
888    for v in membership.iter_mut() {
889        let id = *remap.entry(*v).or_insert_with(|| {
890            let n = next;
891            next += 1;
892            n
893        });
894        *v = id;
895    }
896    next
897}
898
899// ---------------------------------------------------------------------
900// Tests
901// ---------------------------------------------------------------------
902
903#[cfg(test)]
904mod tests {
905    use super::*;
906
907    fn near(a: f64, b: f64, tol: f64) -> bool {
908        (a - b).abs() <= tol || (a.is_nan() && b.is_nan())
909    }
910
911    fn vec_near(a: &[f64], b: &[f64], tol: f64) -> bool {
912        a.len() == b.len() && a.iter().zip(b).all(|(&x, &y)| near(x, y, tol))
913    }
914
915    fn triangle() -> Graph {
916        let mut g = Graph::with_vertices(3);
917        for &(u, v) in &[(0, 1), (1, 2), (2, 0)] {
918            g.add_edge(u, v).expect("add edge");
919        }
920        g
921    }
922
923    /// Mirrors `community_walktrap.out` "Basic test".
924    #[test]
925    fn c_basic_triangle() {
926        let g = triangle();
927        let r = walktrap(&g).expect("walktrap on triangle");
928        assert_eq!(r.merges, vec![[1, 2], [0, 3]]);
929        assert!(vec_near(
930            &r.modularity,
931            &[-1.0 / 3.0, -2.0 / 9.0, 0.0],
932            1e-12
933        ));
934        assert_eq!(r.membership, vec![0, 0, 0]);
935        assert_eq!(r.nb_clusters, 1);
936    }
937
938    /// Mirrors `community_walktrap.out` "Bug 2042".
939    #[test]
940    fn c_bug2042_single_weighted_edge_with_isolate() {
941        let mut g = Graph::with_vertices(3);
942        g.add_edge(0, 1).expect("add edge");
943        let r = walktrap_weighted(&g, &[0.2]).expect("walktrap weighted");
944        assert_eq!(r.merges, vec![[0, 1]]);
945        assert!(vec_near(&r.modularity, &[-0.5, 0.0], 1e-12));
946        // Membership labels are remapped to dense `0..k`; the
947        // partition is `{0,1}` and `{2}`.
948        assert_eq!(r.membership[0], r.membership[1]);
949        assert_ne!(r.membership[0], r.membership[2]);
950        assert_eq!(r.nb_clusters, 2);
951    }
952
953    /// Mirrors `community_walktrap.out` "Small weighted graph".
954    #[test]
955    fn c_ring6_weighted() {
956        let mut g = Graph::with_vertices(6);
957        for &(u, v) in &[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)] {
958            g.add_edge(u, v).expect("add edge");
959        }
960        let weights = [1.0_f64, 0.5, 0.25, 0.75, 1.25, 1.5];
961        let r = walktrap_weighted(&g, &weights).expect("walktrap weighted");
962        assert_eq!(r.merges, vec![[3, 4], [1, 2], [0, 5], [7, 8], [6, 9]]);
963        let expected_mod = [
964            -0.196_145_124_716_553_3,
965            -0.089_569_160_997_732_43,
966            -0.014_739_229_024_943_318,
967            0.146_258_503_401_360_5,
968            0.122_448_979_591_836_7,
969            0.0, // zapsmall(1e-15) in C output
970        ];
971        assert_eq!(r.modularity.len(), expected_mod.len());
972        for (a, b) in r.modularity.iter().zip(&expected_mod) {
973            assert!(near(*a, *b, 1e-12), "mod mismatch: {a} vs {b}");
974        }
975        // Argmax index = 3 → membership at that cut:
976        //   merge 0: c3,c4 → c6; merge 1: c1,c2 → c7; merge 2: c0,c5 → c8.
977        // Three communities at cut: {0,5} {1,2} {3,4}. Densify-friendly assertions:
978        assert_eq!(r.nb_clusters, 3);
979        assert_eq!(r.membership[0], r.membership[5]);
980        assert_eq!(r.membership[1], r.membership[2]);
981        assert_eq!(r.membership[3], r.membership[4]);
982        assert_ne!(r.membership[0], r.membership[1]);
983        assert_ne!(r.membership[0], r.membership[3]);
984        assert_ne!(r.membership[1], r.membership[3]);
985    }
986
987    /// Mirrors `community_walktrap.out` "Isolated vertices".
988    #[test]
989    fn c_five_isolated() {
990        let g = Graph::with_vertices(5);
991        let r = walktrap(&g).expect("walktrap isolated");
992        assert!(r.merges.is_empty());
993        assert_eq!(r.modularity.len(), 1);
994        assert!(r.modularity[0].is_nan());
995        assert_eq!(r.membership, vec![0, 1, 2, 3, 4]);
996        assert_eq!(r.nb_clusters, 5);
997    }
998
999    #[test]
1000    fn rejects_directed_graph() {
1001        let g = Graph::new(3, true).expect("directed graph");
1002        assert!(matches!(walktrap(&g), Err(IgraphError::Unsupported(_))));
1003    }
1004
1005    #[test]
1006    fn rejects_steps_zero() {
1007        let g = triangle();
1008        let opts = WalktrapOptions { steps: 0 };
1009        assert!(matches!(
1010            walktrap_with_options(&g, None, opts),
1011            Err(IgraphError::InvalidArgument(_))
1012        ));
1013    }
1014
1015    #[test]
1016    fn rejects_negative_weight() {
1017        let g = triangle();
1018        let w = [1.0_f64, -0.1, 1.0];
1019        assert!(matches!(
1020            walktrap_weighted(&g, &w),
1021            Err(IgraphError::InvalidArgument(_))
1022        ));
1023    }
1024
1025    #[test]
1026    fn rejects_weight_length_mismatch() {
1027        let g = triangle();
1028        let w = [1.0_f64, 1.0];
1029        assert!(matches!(
1030            walktrap_weighted(&g, &w),
1031            Err(IgraphError::InvalidArgument(_))
1032        ));
1033    }
1034
1035    #[test]
1036    fn rejects_nan_weight() {
1037        let g = triangle();
1038        let w = [1.0_f64, f64::NAN, 1.0];
1039        assert!(matches!(
1040            walktrap_weighted(&g, &w),
1041            Err(IgraphError::InvalidArgument(_))
1042        ));
1043    }
1044
1045    #[test]
1046    fn empty_graph_returns_empty_membership() {
1047        let g = Graph::with_vertices(0);
1048        let r = walktrap(&g).expect("walktrap on empty graph");
1049        assert!(r.membership.is_empty());
1050        assert_eq!(r.nb_clusters, 0);
1051        assert!(r.merges.is_empty());
1052        assert_eq!(r.modularity.len(), 1);
1053        assert!(r.modularity[0].is_nan());
1054    }
1055
1056    #[test]
1057    fn single_vertex_no_edges() {
1058        let g = Graph::with_vertices(1);
1059        let r = walktrap(&g).expect("walktrap on single vertex");
1060        assert_eq!(r.membership, vec![0]);
1061        assert_eq!(r.nb_clusters, 1);
1062        assert!(r.merges.is_empty());
1063        // m == 0 → NaN trajectory.
1064        assert!(r.modularity[0].is_nan());
1065    }
1066
1067    /// Two K4 cliques bridged by a single light edge should split into
1068    /// two communities at the optimal modularity cut.
1069    #[test]
1070    fn two_k4_with_bridge() {
1071        let mut g = Graph::with_vertices(8);
1072        for &(u, v) in &[
1073            (0, 1),
1074            (0, 2),
1075            (0, 3),
1076            (1, 2),
1077            (1, 3),
1078            (2, 3),
1079            (4, 5),
1080            (4, 6),
1081            (4, 7),
1082            (5, 6),
1083            (5, 7),
1084            (6, 7),
1085            (3, 4),
1086        ] {
1087            g.add_edge(u, v).expect("add edge");
1088        }
1089        let r = walktrap(&g).expect("walktrap");
1090        assert_eq!(r.nb_clusters, 2);
1091        for v in 0..4u32 {
1092            assert_eq!(r.membership[v as usize], r.membership[0]);
1093        }
1094        for v in 4..8u32 {
1095            assert_eq!(r.membership[v as usize], r.membership[4]);
1096        }
1097        assert_ne!(r.membership[0], r.membership[4]);
1098    }
1099
1100    #[test]
1101    fn multi_edges_are_folded_by_weight_sum() {
1102        // Two K3s + a doubled bridge. Multi-edge handling shouldn't
1103        // change the partition.
1104        let mut g = Graph::with_vertices(6);
1105        for &(u, v) in &[
1106            (0, 1),
1107            (0, 2),
1108            (1, 2),
1109            (3, 4),
1110            (3, 5),
1111            (4, 5),
1112            (2, 3),
1113            (2, 3), // doubled bridge
1114        ] {
1115            g.add_edge(u, v).expect("add edge");
1116        }
1117        let r = walktrap(&g).expect("walktrap");
1118        // Smoke: must succeed and produce a valid partition.
1119        assert_eq!(r.membership.len(), 6);
1120        assert!((1..=6).contains(&r.nb_clusters));
1121    }
1122
1123    #[cfg(all(test, feature = "proptest-harness"))]
1124    mod prop {
1125        use super::*;
1126        use proptest::prelude::*;
1127
1128        prop_compose! {
1129            fn small_undirected_graph()(n in 2u32..=8u32, edges_seed in any::<u64>()) -> Graph {
1130                let mut g = Graph::with_vertices(n);
1131                let mut rng = edges_seed;
1132                let target_m = ((n * (n - 1)) / 2).min(n + 4) as usize;
1133                let mut added = 0usize;
1134                let mut guard = 0usize;
1135                while added < target_m && guard < target_m * 8 {
1136                    rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
1137                    let u = ((rng >> 33) % u64::from(n)) as u32;
1138                    rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
1139                    let v = ((rng >> 33) % u64::from(n)) as u32;
1140                    guard += 1;
1141                    if u == v { continue; }
1142                    if g.add_edge(u, v).is_ok() { added += 1; }
1143                }
1144                g
1145            }
1146        }
1147
1148        proptest! {
1149            #[test]
1150            fn walktrap_partition_is_valid(g in small_undirected_graph()) {
1151                let n = g.vcount();
1152                let r = walktrap(&g).expect("walktrap");
1153                prop_assert_eq!(r.membership.len(), n as usize);
1154                // Densified labels live in 0..nb_clusters.
1155                for &c in &r.membership {
1156                    prop_assert!(c < r.nb_clusters);
1157                }
1158                // Modularity trajectory length matches merges + 1.
1159                if g.ecount() > 0 {
1160                    prop_assert_eq!(r.modularity.len(), r.merges.len() + 1);
1161                    for &q in &r.modularity {
1162                        prop_assert!(q.is_finite(), "modularity must be finite for connected weighted ops");
1163                    }
1164                }
1165            }
1166
1167            #[test]
1168            fn walktrap_steps_in_range_does_not_crash(
1169                g in small_undirected_graph(),
1170                steps in 1u32..=8,
1171            ) {
1172                let r = walktrap_with_options(&g, None, WalktrapOptions { steps })
1173                    .expect("walktrap with options");
1174                prop_assert_eq!(r.membership.len(), g.vcount() as usize);
1175            }
1176        }
1177    }
1178}