Skip to main content

rust_igraph/algorithms/community/
fast_greedy_modularity.rs

1//! Fast greedy modularity community detection (ALGO-CO-007).
2//!
3//! Counterpart of `igraph_community_fastgreedy()` from
4//! `references/igraph/src/community/fast_modularity.c`.
5//!
6//! Clauset-Newman-Moore (2004): start with every vertex its own
7//! community, then greedily merge the community-pair whose merger
8//! yields the largest improvement in Newman-Girvan modularity. The
9//! Wakita-Tsurumi (2007) data-structure trick maintains a global
10//! max-heap over per-community max-`ΔQ` so each merge runs in
11//! `O((|N(to)| + |N(from)|) log V)`.
12//!
13//! References:
14//! - A. Clauset, M. E. J. Newman, C. Moore. *Finding community
15//!   structure in very large networks*, Phys. Rev. E 70 (2004).
16//!   <https://doi.org/10.1103/PhysRevE.70.066111>
17//! - K. Wakita, T. Tsurumi. *Finding community structure in mega-scale
18//!   social networks*, arXiv:cs/0702048.
19//!
20//! Phase-1 scope: **undirected**, optionally weighted with non-negative
21//! finite weights; rejects multi-edges (matches the upstream constraint)
22//! and directed graphs. Self-loops are accepted and contribute to the
23//! `loop_weight_sum` term, exactly as in C.
24//!
25//! Per-pair `ΔQ` update rules on each merge of `from → to`:
26//! - **Triangle** (both `to` and `from` already know `k`): the new
27//!   pair `(to, k)` has `ΔQ' = ΔQ(to, k) + ΔQ(from, k)`.
28//! - **Chain case 1** (only `to` knows `k`): `ΔQ' = ΔQ(to, k) -
29//!   2 · a[from] · a[k]` (the old `from`-`k` non-edge is now part of
30//!   `to`'s neighbourhood with implicit edge weight 0).
31//! - **Chain case 2** (only `from` knows `k`): `ΔQ' = ΔQ(from, k) -
32//!   2 · a[to] · a[k]`.
33//!
34//! `a[c]` is the fraction of total edge weight incident to community
35//! `c`. The running modularity `q` is updated as `q += ΔQ` on each
36//! accepted merge. The C drives the merge to completion (full
37//! dendrogram of `n-1` rows) and reports the partition at the highest
38//! Q-step; we do the same.
39//!
40//! Complexity: `O(|E| · log V)` per merge × up to `|V|` merges, so
41//! `O(|V| · |E| · log V)` worst case — matches the C reference.
42
43#![allow(
44    clippy::cast_possible_truncation,
45    clippy::cast_possible_wrap,
46    clippy::cast_precision_loss,
47    clippy::cast_sign_loss,
48    clippy::float_cmp,
49    clippy::items_after_statements,
50    clippy::many_single_char_names,
51    clippy::needless_range_loop,
52    clippy::too_many_lines
53)]
54
55use std::cmp::Ordering;
56use std::collections::{BTreeMap, BTreeSet, BinaryHeap};
57
58use crate::algorithms::properties::multiplicity::has_multiple;
59use crate::core::{Graph, IgraphError, IgraphResult};
60
61/// Result of [`fast_greedy_modularity`] / [`fast_greedy_modularity_weighted`].
62#[derive(Debug, Clone)]
63pub struct FastGreedyResult {
64    /// Per-vertex community label of the best-modularity partition,
65    /// densified to `0..nb_clusters`.
66    pub membership: Vec<u32>,
67    /// Number of distinct communities in `membership`.
68    pub nb_clusters: u32,
69    /// Merges in dendrogram order. Each row `[c1, c2]` merges clusters
70    /// `c1` and `c2` into the new cluster `n + i` where `i` is the
71    /// merge index. Same encoding as
72    /// `igraph_community_fastgreedy()` / Walktrap / EB.
73    pub merges: Vec<[u32; 2]>,
74    /// Modularity trajectory. `modularity[i]` is the modularity *after*
75    /// `i` merges have been applied to the all-singletons start
76    /// partition. Length = `merges.len() + 1`. For an edgeless graph
77    /// the single entry is `NaN`, matching the C convention.
78    pub modularity: Vec<f64>,
79}
80
81/// Run fast greedy modularity community detection on `graph` (unweighted).
82///
83/// # Errors
84/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
85/// - [`IgraphError::InvalidArgument`] if the graph has multi-edges.
86///
87/// # Examples
88///
89/// ```
90/// use rust_igraph::{Graph, fast_greedy_modularity};
91///
92/// // Two K5 cliques joined by a single bridge edge (0,5).
93/// let mut g = Graph::with_vertices(10);
94/// for &(u, v) in &[
95///     (0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4),
96///     (2, 3), (2, 4), (3, 4),
97///     (5, 6), (5, 7), (5, 8), (5, 9), (6, 7), (6, 8), (6, 9),
98///     (7, 8), (7, 9), (8, 9),
99///     (0, 5),
100/// ] {
101///     g.add_edge(u, v).unwrap();
102/// }
103/// let r = fast_greedy_modularity(&g).unwrap();
104/// assert_eq!(r.nb_clusters, 2);
105/// assert!((r.modularity.iter().copied().fold(f64::NEG_INFINITY, f64::max) - 0.452_381).abs() < 1e-5);
106/// ```
107pub fn fast_greedy_modularity(graph: &Graph) -> IgraphResult<FastGreedyResult> {
108    fast_greedy_modularity_impl(graph, None)
109}
110
111/// Run fast greedy modularity community detection on `graph` with edge `weights`.
112///
113/// `weights[i]` is the weight of edge id `i`; length must equal
114/// `graph.ecount()`. Weights must be finite and non-negative.
115///
116/// # Errors
117/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
118/// - [`IgraphError::InvalidArgument`] if the graph has multi-edges,
119///   `weights.len() != graph.ecount()`, or any weight is negative / NaN.
120///
121/// # Examples
122///
123/// ```
124/// use rust_igraph::{Graph, fast_greedy_modularity_weighted};
125///
126/// let mut g = Graph::with_vertices(4);
127/// g.add_edge(0, 1).unwrap();
128/// g.add_edge(1, 2).unwrap();
129/// g.add_edge(2, 3).unwrap();
130/// let r = fast_greedy_modularity_weighted(&g, &[1.0, 2.0, 1.0]).unwrap();
131/// assert!(r.nb_clusters >= 1);
132/// ```
133pub fn fast_greedy_modularity_weighted(
134    graph: &Graph,
135    weights: &[f64],
136) -> IgraphResult<FastGreedyResult> {
137    fast_greedy_modularity_impl(graph, Some(weights))
138}
139
140fn fast_greedy_modularity_impl(
141    graph: &Graph,
142    weights: Option<&[f64]>,
143) -> IgraphResult<FastGreedyResult> {
144    if graph.is_directed() {
145        return Err(IgraphError::Unsupported(
146            "fast_greedy_modularity is undirected-only; directed variant is a follow-up AWU",
147        ));
148    }
149    let n = graph.vcount();
150    let n_us = n as usize;
151    let m_us = graph.ecount();
152
153    if let Some(w) = weights {
154        if w.len() != m_us {
155            return Err(IgraphError::InvalidArgument(
156                "weights length must equal graph.ecount()".into(),
157            ));
158        }
159        for &x in w {
160            if x.is_nan() {
161                return Err(IgraphError::InvalidArgument(
162                    "weights must not be NaN".into(),
163                ));
164            }
165            if x < 0.0 {
166                return Err(IgraphError::InvalidArgument(
167                    "weights must be non-negative".into(),
168                ));
169            }
170        }
171    }
172    if has_multiple(graph)? {
173        return Err(IgraphError::InvalidArgument(
174            "fast_greedy_modularity requires graphs without multi-edges".into(),
175        ));
176    }
177
178    if n == 0 {
179        return Ok(FastGreedyResult {
180            membership: Vec::new(),
181            nb_clusters: 0,
182            merges: Vec::new(),
183            modularity: Vec::new(),
184        });
185    }
186
187    // a[c] starts as raw incident strength; normalised by 2W below.
188    let mut a = vec![0.0_f64; n_us];
189    let mut loop_weight_sum = 0.0_f64;
190    let mut weight_sum = 0.0_f64;
191
192    for e in 0..m_us {
193        let (u, v) = graph.edge(e as u32)?;
194        let w = match weights {
195            Some(ws) => ws[e],
196            None => 1.0,
197        };
198        weight_sum += w;
199        a[u as usize] += w;
200        a[v as usize] += w;
201        if u == v {
202            loop_weight_sum += 2.0 * w;
203        }
204    }
205
206    if m_us == 0 {
207        // No edges: all singletons; Q undefined → NaN (matches C).
208        return Ok(FastGreedyResult {
209            membership: (0..n).collect(),
210            nb_clusters: n,
211            merges: Vec::new(),
212            modularity: vec![f64::NAN],
213        });
214    }
215
216    let two_w = 2.0 * weight_sum;
217    // a[i] /= 2W
218    if two_w > 0.0 {
219        for slot in &mut a {
220            *slot /= two_w;
221        }
222    }
223
224    // Per-community sorted neighbour map (neighbour id -> ΔQ).
225    let mut neis: Vec<BTreeMap<u32, f64>> = vec![BTreeMap::new(); n_us];
226
227    // Initial ΔQ for each non-loop edge.
228    for e in 0..m_us {
229        let (u, v) = graph.edge(e as u32)?;
230        if u == v {
231            continue;
232        }
233        let w = match weights {
234            Some(ws) => ws[e],
235            None => 1.0,
236        };
237        // dq = 2 * (w / 2W - a[u] * a[v])   (after a has been normalised by 2W)
238        let dq = 2.0 * (w / two_w - a[u as usize] * a[v as usize]);
239        // No multi-edges by precondition: assert by inserting.
240        neis[u as usize].insert(v, dq);
241        neis[v as usize].insert(u, dq);
242    }
243
244    // Initial modularity: loop_weight_sum / 2W - sum(a^2).
245    let mut q = if two_w > 0.0 {
246        let mut s = loop_weight_sum / two_w;
247        for &ai in &a {
248            s -= ai * ai;
249        }
250        s
251    } else {
252        0.0
253    };
254
255    // Build heap of (dq, c1, c2) for each canonical (c1 < c2) live edge.
256    let mut heap: BinaryHeap<HeapEntry> = BinaryHeap::new();
257    for c in 0..n_us {
258        for (&k, &dq) in &neis[c] {
259            if (c as u32) < k {
260                heap.push(HeapEntry {
261                    dq,
262                    c1: c as u32,
263                    c2: k,
264                });
265            }
266        }
267    }
268
269    let mut alive = vec![true; n_us];
270    let mut size = vec![1_u32; n_us];
271    let mut id: Vec<u32> = (0..n).collect();
272
273    let total_merges_cap = n_us.saturating_sub(1);
274    let mut merges: Vec<[u32; 2]> = Vec::with_capacity(total_merges_cap);
275    let mut modularity_traj: Vec<f64> = Vec::with_capacity(total_merges_cap + 1);
276    modularity_traj.push(q);
277
278    let mut best_q = q;
279    let mut best_n_merges = 0_usize;
280
281    while merges.len() < total_merges_cap {
282        // Pop until we find a non-stale entry.
283        let chosen = loop {
284            let Some(e) = heap.pop() else {
285                break None;
286            };
287            if !alive[e.c1 as usize] || !alive[e.c2 as usize] {
288                continue;
289            }
290            // Validate the stored dq still matches.
291            if let Some(&cur) = neis[e.c1 as usize].get(&e.c2) {
292                if cur == e.dq {
293                    break Some(e);
294                }
295            }
296        };
297        let Some(entry) = chosen else {
298            // No more inter-community edges (disconnected components remain).
299            break;
300        };
301
302        // Choose `to` = smaller id, `from` = larger id. This matches the
303        // common convention; the dendrogram encoding stores [to, from].
304        let (to, from) = if entry.c1 < entry.c2 {
305            (entry.c1, entry.c2)
306        } else {
307            (entry.c2, entry.c1)
308        };
309
310        q += entry.dq;
311
312        // Take both neighbour maps out so we can iterate freely without
313        // overlapping borrows.
314        let to_neis = std::mem::take(&mut neis[to as usize]);
315        let from_neis = std::mem::take(&mut neis[from as usize]);
316
317        // Collect the union of neighbour keys, skipping the two endpoints.
318        let mut all_keys: BTreeSet<u32> = BTreeSet::new();
319        for &k in to_neis.keys() {
320            if k != from {
321                all_keys.insert(k);
322            }
323        }
324        for &k in from_neis.keys() {
325            if k != to {
326                all_keys.insert(k);
327            }
328        }
329
330        let mut new_to_neis: BTreeMap<u32, f64> = BTreeMap::new();
331        for k in all_keys {
332            let in_to = to_neis.get(&k).copied();
333            let in_from = from_neis.get(&k).copied();
334            let new_dq = match (in_to, in_from) {
335                (Some(t), Some(f)) => t + f, // triangle
336                (Some(t), None) => t - 2.0 * a[from as usize] * a[k as usize], // chain 1
337                (None, Some(f)) => f - 2.0 * a[to as usize] * a[k as usize], // chain 2
338                (None, None) => unreachable!(),
339            };
340            new_to_neis.insert(k, new_dq);
341
342            // Mirror in k's map: drop `from`, set `to` -> new_dq.
343            let km = &mut neis[k as usize];
344            km.remove(&from);
345            km.insert(to, new_dq);
346
347            // Push the updated edge onto the heap in canonical (c1<c2) order.
348            let (c1, c2) = if to < k { (to, k) } else { (k, to) };
349            heap.push(HeapEntry { dq: new_dq, c1, c2 });
350        }
351        neis[to as usize] = new_to_neis;
352
353        alive[from as usize] = false;
354        size[to as usize] += size[from as usize];
355        a[to as usize] += a[from as usize];
356        a[from as usize] = 0.0;
357
358        merges.push([id[to as usize], id[from as usize]]);
359        id[to as usize] = u32::try_from(n_us)
360            .map_err(|_| IgraphError::Internal("vcount exceeds u32::MAX"))?
361            + u32::try_from(merges.len() - 1)
362                .map_err(|_| IgraphError::Internal("merges.len exceeds u32::MAX"))?;
363
364        modularity_traj.push(q);
365
366        if q > best_q {
367            best_q = q;
368            best_n_merges = merges.len();
369        }
370    }
371
372    // Build membership by applying the first best_n_merges merges to a
373    // singleton labelling. label[v] = current cluster label.
374    let mut label: Vec<u32> = (0..n).collect();
375    // Walk merges in chronological order; each merge produces a fresh
376    // cluster id n+i. Rewrite all vertices whose current label matches
377    // either side of the merge to the new id.
378    for (i, m) in merges.iter().take(best_n_merges).enumerate() {
379        let [c_a, c_b] = *m;
380        let new_id = n + u32::try_from(i)
381            .map_err(|_| IgraphError::Internal("merge index exceeds u32::MAX"))?;
382        for slot in &mut label {
383            if *slot == c_a || *slot == c_b {
384                *slot = new_id;
385            }
386        }
387    }
388
389    let (membership, nb_clusters) = densify_labels(&label);
390
391    Ok(FastGreedyResult {
392        membership,
393        nb_clusters,
394        merges,
395        modularity: modularity_traj,
396    })
397}
398
399fn densify_labels(labels: &[u32]) -> (Vec<u32>, u32) {
400    let mut remap: Vec<(u32, u32)> = Vec::new();
401    let mut out = Vec::with_capacity(labels.len());
402    for &l in labels {
403        let dense = if let Some(&(_, d)) = remap.iter().find(|&&(orig, _)| orig == l) {
404            d
405        } else {
406            let d = u32::try_from(remap.len()).unwrap_or(u32::MAX);
407            remap.push((l, d));
408            d
409        };
410        out.push(dense);
411    }
412    let k = u32::try_from(remap.len()).unwrap_or(u32::MAX);
413    (out, k)
414}
415
416/// Heap entry: max-heap on `dq`, ties broken by smaller `(c1, c2)`
417/// winning, so the greedy choice is deterministic across runs.
418#[derive(Debug, Clone, Copy)]
419struct HeapEntry {
420    dq: f64,
421    c1: u32,
422    c2: u32,
423}
424
425impl PartialEq for HeapEntry {
426    fn eq(&self, other: &Self) -> bool {
427        self.dq.to_bits() == other.dq.to_bits() && self.c1 == other.c1 && self.c2 == other.c2
428    }
429}
430impl Eq for HeapEntry {}
431
432impl PartialOrd for HeapEntry {
433    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
434        Some(self.cmp(other))
435    }
436}
437
438impl Ord for HeapEntry {
439    fn cmp(&self, other: &Self) -> Ordering {
440        // primary: higher dq is "greater" (max-heap).
441        match self.dq.partial_cmp(&other.dq) {
442            Some(Ordering::Equal) | None => {
443                // tie: smaller (c1, c2) is "greater" so pops first.
444                other.c1.cmp(&self.c1).then_with(|| other.c2.cmp(&self.c2))
445            }
446            Some(o) => o,
447        }
448    }
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454
455    fn two_k5_bridge() -> Graph {
456        let mut g = Graph::with_vertices(10);
457        for u in 0..5u32 {
458            for v in (u + 1)..5 {
459                g.add_edge(u, v).expect("clique edge");
460            }
461        }
462        for u in 5..10u32 {
463            for v in (u + 1)..10 {
464                g.add_edge(u, v).expect("clique edge");
465            }
466        }
467        g.add_edge(0, 5).expect("bridge");
468        g
469    }
470
471    #[test]
472    fn empty_graph_returns_empty() {
473        let g = Graph::with_vertices(0);
474        let r = fast_greedy_modularity(&g).unwrap();
475        assert!(r.membership.is_empty());
476        assert_eq!(r.nb_clusters, 0);
477        assert!(r.merges.is_empty());
478        assert!(r.modularity.is_empty());
479    }
480
481    #[test]
482    fn edgeless_graph_yields_singletons_with_nan() {
483        let g = Graph::with_vertices(5);
484        let r = fast_greedy_modularity(&g).unwrap();
485        assert_eq!(r.membership, vec![0, 1, 2, 3, 4]);
486        assert_eq!(r.nb_clusters, 5);
487        assert!(r.merges.is_empty());
488        assert_eq!(r.modularity.len(), 1);
489        assert!(r.modularity[0].is_nan());
490    }
491
492    #[test]
493    fn single_vertex_no_edges() {
494        let g = Graph::with_vertices(1);
495        let r = fast_greedy_modularity(&g).unwrap();
496        assert_eq!(r.membership, vec![0]);
497        assert_eq!(r.nb_clusters, 1);
498        assert!(r.merges.is_empty());
499    }
500
501    #[test]
502    fn two_k5_bridge_q_matches_upstream() {
503        let g = two_k5_bridge();
504        let r = fast_greedy_modularity(&g).unwrap();
505        let best_q = r
506            .modularity
507            .iter()
508            .copied()
509            .fold(f64::NEG_INFINITY, f64::max);
510        assert!(
511            (best_q - 0.452_381).abs() < 1e-5,
512            "best Q = {best_q}, expected ≈ 0.452381"
513        );
514        assert_eq!(r.nb_clusters, 2);
515        // Vertices 0..4 share one community, 5..9 the other.
516        for v in 1..5 {
517            assert_eq!(r.membership[v], r.membership[0]);
518        }
519        for v in 6..10 {
520            assert_eq!(r.membership[v], r.membership[5]);
521        }
522        assert_ne!(r.membership[0], r.membership[5]);
523    }
524
525    #[test]
526    fn two_k5_bridge_with_uniform_weights_matches_unweighted() {
527        let g = two_k5_bridge();
528        let weights = vec![2.0_f64; g.ecount()];
529        let r = fast_greedy_modularity_weighted(&g, &weights).unwrap();
530        let best_q = r
531            .modularity
532            .iter()
533            .copied()
534            .fold(f64::NEG_INFINITY, f64::max);
535        assert!((best_q - 0.452_381).abs() < 1e-5);
536        assert_eq!(r.nb_clusters, 2);
537    }
538
539    #[test]
540    fn dendrogram_size_bounded_by_n_minus_1() {
541        let g = two_k5_bridge();
542        let r = fast_greedy_modularity(&g).unwrap();
543        assert!(r.merges.len() <= 9);
544        assert_eq!(r.modularity.len(), r.merges.len() + 1);
545    }
546
547    #[test]
548    fn two_disjoint_k4_yields_two_components() {
549        let mut g = Graph::with_vertices(8);
550        for u in 0..4u32 {
551            for v in (u + 1)..4 {
552                g.add_edge(u, v).unwrap();
553            }
554        }
555        for u in 4..8u32 {
556            for v in (u + 1)..8 {
557                g.add_edge(u, v).unwrap();
558            }
559        }
560        let r = fast_greedy_modularity(&g).unwrap();
561        // Disconnected → algorithm stops before crossing components.
562        assert_eq!(r.nb_clusters, 2);
563        for v in 1..4 {
564            assert_eq!(r.membership[v], r.membership[0]);
565        }
566        for v in 5..8 {
567            assert_eq!(r.membership[v], r.membership[4]);
568        }
569        assert_ne!(r.membership[0], r.membership[4]);
570    }
571
572    #[test]
573    fn rejects_directed_graph() {
574        let mut g = Graph::new(3, true).expect("3v directed graph");
575        g.add_edge(0, 1).expect("0-1");
576        let err = fast_greedy_modularity(&g).unwrap_err();
577        assert!(matches!(err, IgraphError::Unsupported(_)));
578    }
579
580    #[test]
581    fn rejects_multi_edges() {
582        let mut g = Graph::with_vertices(3);
583        g.add_edge(0, 1).unwrap();
584        g.add_edge(0, 1).unwrap();
585        g.add_edge(1, 2).unwrap();
586        let err = fast_greedy_modularity(&g).unwrap_err();
587        assert!(matches!(err, IgraphError::InvalidArgument(_)));
588    }
589
590    #[test]
591    fn rejects_negative_weight() {
592        let mut g = Graph::with_vertices(3);
593        g.add_edge(0, 1).unwrap();
594        g.add_edge(1, 2).unwrap();
595        let err = fast_greedy_modularity_weighted(&g, &[1.0, -0.5]).unwrap_err();
596        assert!(matches!(err, IgraphError::InvalidArgument(_)));
597    }
598
599    #[test]
600    fn rejects_weight_length_mismatch() {
601        let mut g = Graph::with_vertices(3);
602        g.add_edge(0, 1).unwrap();
603        g.add_edge(1, 2).unwrap();
604        let err = fast_greedy_modularity_weighted(&g, &[1.0]).unwrap_err();
605        assert!(matches!(err, IgraphError::InvalidArgument(_)));
606    }
607
608    #[test]
609    fn densify_labels_basic() {
610        let (m, k) = densify_labels(&[5, 5, 7, 5, 9, 7]);
611        assert_eq!(m, vec![0, 0, 1, 0, 2, 1]);
612        assert_eq!(k, 3);
613    }
614}