Skip to main content

rust_igraph/algorithms/community/
community_to_membership.rs

1//! ALGO-CM-013 — `community_to_membership` (dendrogram → membership).
2//!
3//! Cut a binary dendrogram (the same `merges` matrix produced by
4//! [`crate::walktrap`], [`crate::fast_greedy_modularity`],
5//! [`crate::edge_betweenness_community`], …) at a chosen number of
6//! merges. Returns a membership vector (per-vertex cluster id) and a
7//! cluster-size vector.
8//!
9//! Mirrors `igraph_community_to_membership` from
10//! `references/igraph/src/community/community_misc.c`. The C function
11//! traverses the requested `steps` from the bottom up, assigning a
12//! cluster id to every leaf reachable from each top-level surviving
13//! dendrogram node; leaves never touched stay as singleton clusters.
14//!
15//! Also provides [`le_community_to_membership`], the generalized form
16//! whose dendrogram leaves are arbitrary initial clusters rather than
17//! individual vertices (mirrors `igraph_le_community_to_membership`).
18
19use crate::core::error::{IgraphError, IgraphResult};
20
21/// Result of [`community_to_membership`]: per-vertex cluster ids
22/// densified to `0..k`, plus the size of each cluster in the same
23/// order.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct CommunityToMembershipResult {
26    /// `membership[v] ∈ [0, k)` — the cluster id of vertex `v` after
27    /// `steps` merges have been applied.
28    pub membership: Vec<u32>,
29    /// `csize[c]` — the number of vertices in cluster `c`.
30    pub csize: Vec<u32>,
31}
32
33/// Cut a binary dendrogram at `steps` merges, returning the resulting
34/// membership and cluster-size vectors.
35///
36/// `merges[i] = [c1, c2]` is the `i`-th merge: it combines dendrogram
37/// nodes `c1` and `c2` into the new node with id `nodes + i`. Leaves
38/// are numbered `0..nodes`; nodes `≥ nodes` are internal merges from
39/// earlier rows.
40///
41/// # Errors
42/// - `InvalidArgument` if `steps > merges.len()`.
43/// - `InvalidArgument` if a merge entry would be merged twice.
44/// - `InvalidArgument` if a merge entry references a dendrogram node
45///   that does not exist (`≥ nodes + i` at row `i`).
46///
47/// # Examples
48/// ```
49/// use rust_igraph::community_to_membership;
50///
51/// // Four leaves, one merge: {0,1} | {2} | {3}.
52/// let r = community_to_membership(&[[0, 1]], 4, 1).unwrap();
53/// assert_eq!(r.membership, vec![0, 0, 1, 2]);
54/// assert_eq!(r.csize, vec![2, 1, 1]);
55/// ```
56pub fn community_to_membership(
57    merges: &[[u32; 2]],
58    nodes: u32,
59    steps: u32,
60) -> IgraphResult<CommunityToMembershipResult> {
61    let n = nodes as usize;
62    let rows = merges.len();
63    let steps_usize = steps as usize;
64
65    if steps_usize > rows {
66        return Err(IgraphError::InvalidArgument(format!(
67            "Number of steps is greater than number of rows in merges matrix: \
68             found {steps} steps, {rows} rows."
69        )));
70    }
71
72    // `tmp[i]` is the 1-based cluster id assigned to the supercluster
73    // formed at merge row `i` (0 = unassigned). Mirrors the C version's
74    // 1-based scheme so the final densification pass can detect "never
75    // touched" leaves via 0.
76    let mut membership = vec![0u32; n];
77    // The maximum dendrogram-node id we may encounter is `n + steps - 1`
78    // (row `steps - 1` produces id `n + steps - 1`). The C version
79    // allocates `steps + nodes` slots — same here.
80    let total_nodes = n
81        .checked_add(steps_usize)
82        .ok_or_else(|| IgraphError::InvalidArgument("dendrogram size overflows usize".into()))?;
83    let mut already_merged = vec![false; total_nodes];
84    let mut tmp = vec![0u32; steps_usize];
85
86    let mut found: u32 = 0;
87
88    // Walk the requested steps from the top down so that the cluster
89    // id assigned at row `i` cascades to every leaf merged into it.
90    for i in (0..steps_usize).rev() {
91        let [c1, c2] = merges[i];
92        let limit = u32::try_from(n + i).map_err(|_| {
93            IgraphError::InvalidArgument(format!(
94                "Merges matrix row {i} dendrogram node id overflows u32."
95            ))
96        })?;
97        if c1 >= limit {
98            return Err(IgraphError::InvalidArgument(format!(
99                "Merges matrix row {i}: child {c1} exceeds the maximum valid \
100                 dendrogram node id ({} = nodes + i).",
101                limit - 1
102            )));
103        }
104        if c2 >= limit {
105            return Err(IgraphError::InvalidArgument(format!(
106                "Merges matrix row {i}: child {c2} exceeds the maximum valid \
107                 dendrogram node id ({} = nodes + i).",
108                limit - 1
109            )));
110        }
111        let c1_idx = c1 as usize;
112        let c2_idx = c2 as usize;
113        if already_merged[c1_idx] {
114            return Err(IgraphError::InvalidArgument(format!(
115                "Merges matrix contains multiple merges of cluster {c1}."
116            )));
117        }
118        already_merged[c1_idx] = true;
119        if already_merged[c2_idx] {
120            return Err(IgraphError::InvalidArgument(format!(
121                "Merges matrix contains multiple merges of cluster {c2}."
122            )));
123        }
124        already_merged[c2_idx] = true;
125
126        // New supercluster? Allocate a fresh 1-based id.
127        if tmp[i] == 0 {
128            found += 1;
129            tmp[i] = found;
130        }
131        let cid_one_based = tmp[i];
132
133        for &child in &[c1, c2] {
134            if (child as usize) < n {
135                membership[child as usize] = cid_one_based;
136            } else {
137                // Internal node — propagate the supercluster id down.
138                tmp[child as usize - n] = cid_one_based;
139            }
140        }
141    }
142
143    // Densify: leaves with cid == 0 stay as singletons; every other leaf
144    // becomes cid - 1.
145    let total_clusters_estimate = (n - usize::min(n, 2 * steps_usize)) + found as usize;
146    let mut csize: Vec<u32> = vec![0; found as usize];
147    csize.reserve(total_clusters_estimate);
148
149    for slot in &mut membership {
150        if *slot == 0 {
151            *slot = found;
152            csize.push(1);
153            found += 1;
154        } else {
155            let cid = *slot - 1;
156            *slot = cid;
157            csize[cid as usize] += 1;
158        }
159    }
160
161    Ok(CommunityToMembershipResult { membership, csize })
162}
163
164/// Cut an incomplete dendrogram after `steps` merges, starting from an
165/// initial (non-singleton) cluster assignment.
166///
167/// This is the more general form of [`community_to_membership`]: instead
168/// of assuming each dendrogram leaf is a single vertex, the leaves are
169/// the `m` contiguous clusters described by `membership` (so
170/// `membership[v] ∈ [0, m)`). The `merges` matrix then merges those
171/// clusters, and `steps` of those merges are applied. The output
172/// `membership` reassigns every vertex to its surviving cluster, and
173/// `csize` gives the vertex count of each.
174///
175/// This dendrogram shape is produced by divisive detectors that stop
176/// before splitting the graph down to individual vertices, such as
177/// igraph's leading-eigenvector method.
178///
179/// Mirrors `igraph_le_community_to_membership` from
180/// `references/igraph/src/community/leading_eigenvector.c`.
181///
182/// # Errors
183/// - `InvalidArgument` if `membership` is empty, or `steps >= m` where
184///   `m` is the number of initial clusters.
185/// - `InvalidArgument` if `membership` is not a contiguous `0..m`
186///   labelling (some cluster index in that range is unused).
187/// - Any error propagated from [`community_to_membership`] when applying
188///   the merges to the `m` initial clusters.
189///
190/// # Examples
191/// ```
192/// use rust_igraph::le_community_to_membership;
193///
194/// // Six vertices in three initial clusters: {0,1}=0, {2,3}=1, {4,5}=2.
195/// // One merge combines clusters 0 and 1.
196/// let membership = [0, 0, 1, 1, 2, 2];
197/// let r = le_community_to_membership(&[[0, 1]], 1, &membership).unwrap();
198/// // Clusters 0 and 1 fuse; cluster 2 stays separate.
199/// assert_eq!(r.membership, vec![0, 0, 0, 0, 1, 1]);
200/// assert_eq!(r.csize, vec![4, 2]);
201/// ```
202pub fn le_community_to_membership(
203    merges: &[[u32; 2]],
204    steps: u32,
205    membership: &[u32],
206) -> IgraphResult<CommunityToMembershipResult> {
207    let no_of_nodes = membership.len();
208
209    if no_of_nodes == 0 {
210        return Err(IgraphError::InvalidArgument(
211            "le_community_to_membership: membership vector must not be empty.".into(),
212        ));
213    }
214
215    let components = membership
216        .iter()
217        .copied()
218        .max()
219        .map_or(0u32, |m| m.saturating_add(1));
220
221    if components as usize > no_of_nodes {
222        return Err(IgraphError::InvalidArgument(format!(
223            "Invalid membership vector: number of components ({components}) must not be \
224             greater than the number of nodes ({no_of_nodes})."
225        )));
226    }
227
228    if steps >= components {
229        return Err(IgraphError::InvalidArgument(format!(
230            "Number of steps ({steps}) must be smaller than number of components ({components})."
231        )));
232    }
233
234    // Verify the initial assignment uses every cluster id in `0..components`
235    // (no empty cluster), mirroring the C reference's validation.
236    let mut seen = vec![false; components as usize];
237    for &c in membership {
238        seen[c as usize] = true;
239    }
240    if let Some(empty) = seen.iter().position(|&s| !s) {
241        return Err(IgraphError::InvalidArgument(format!(
242            "Invalid membership vector, empty cluster found at id {empty}."
243        )));
244    }
245
246    // Merge the `components` initial clusters as if they were leaves.
247    let comp = community_to_membership(merges, components, steps)?;
248
249    // Remap each vertex from its old cluster to its new merged cluster.
250    let new_cluster_count = comp.csize.len();
251    let mut new_membership = vec![0u32; no_of_nodes];
252    let mut csize = vec![0u32; new_cluster_count];
253    for (i, &old_cluster) in membership.iter().enumerate() {
254        let new_cluster = comp.membership[old_cluster as usize];
255        new_membership[i] = new_cluster;
256        csize[new_cluster as usize] += 1;
257    }
258
259    Ok(CommunityToMembershipResult {
260        membership: new_membership,
261        csize,
262    })
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn zero_steps_yields_singletons() {
271        let r = community_to_membership(&[], 4, 0).unwrap();
272        assert_eq!(r.membership, vec![0, 1, 2, 3]);
273        assert_eq!(r.csize, vec![1, 1, 1, 1]);
274    }
275
276    #[test]
277    fn full_collapse_to_one_cluster() {
278        // Binary balanced dendrogram on 4 leaves:
279        //   merges[0] = [0, 1] -> 4
280        //   merges[1] = [2, 3] -> 5
281        //   merges[2] = [4, 5] -> 6
282        let merges = vec![[0u32, 1], [2, 3], [4, 5]];
283        let r = community_to_membership(&merges, 4, 3).unwrap();
284        assert_eq!(r.membership, vec![0, 0, 0, 0]);
285        assert_eq!(r.csize, vec![4]);
286    }
287
288    #[test]
289    fn intermediate_cut_two_clusters() {
290        // Same dendrogram cut at 2 steps: {0,1} and {2,3} survive.
291        let merges = vec![[0u32, 1], [2, 3], [4, 5]];
292        let r = community_to_membership(&merges, 4, 2).unwrap();
293        // The C reference numbers clusters in the order it first
294        // touches them while walking merges from the top down — so the
295        // most recent merge (row 1 = [2, 3]) gets cluster 0, the
296        // earlier merge (row 0 = [0, 1]) gets cluster 1.
297        assert_eq!(r.membership, vec![1, 1, 0, 0]);
298        assert_eq!(r.csize, vec![2, 2]);
299    }
300
301    #[test]
302    fn one_merge_with_untouched_leaves() {
303        // 5 leaves, one merge of {0, 2} -> cluster A; 1, 3, 4 stay
304        // singletons.
305        let r = community_to_membership(&[[0u32, 2]], 5, 1).unwrap();
306        // Touched leaves come first (cluster 0); untouched leaves get
307        // ids in encounter order: 1 -> 1, 3 -> 2, 4 -> 3.
308        assert_eq!(r.membership, vec![0, 1, 0, 2, 3]);
309        assert_eq!(r.csize, vec![2, 1, 1, 1]);
310    }
311
312    #[test]
313    fn chained_merges_three_levels() {
314        // Linear chain: ((0,1)->4, (4,2)->5, (5,3)->6) on 4 leaves.
315        // Cut at 3 merges -> one cluster of all 4 leaves.
316        let merges = vec![[0u32, 1], [4, 2], [5, 3]];
317        let r = community_to_membership(&merges, 4, 3).unwrap();
318        assert_eq!(r.membership, vec![0, 0, 0, 0]);
319        assert_eq!(r.csize, vec![4]);
320    }
321
322    #[test]
323    fn empty_graph_zero_steps() {
324        let r = community_to_membership(&[], 0, 0).unwrap();
325        assert!(r.membership.is_empty());
326        assert!(r.csize.is_empty());
327    }
328
329    #[test]
330    fn err_steps_exceeds_rows() {
331        let err = community_to_membership(&[[0u32, 1]], 4, 2).unwrap_err();
332        match err {
333            IgraphError::InvalidArgument(m) => assert!(m.contains("greater than")),
334            other => panic!("unexpected error: {other:?}"),
335        }
336    }
337
338    #[test]
339    fn err_double_merge_of_same_cluster() {
340        // Bogus dendrogram: leaf 0 is "merged" twice.
341        let merges = vec![[0u32, 1], [0, 2]];
342        let err = community_to_membership(&merges, 4, 2).unwrap_err();
343        match err {
344            IgraphError::InvalidArgument(m) => assert!(m.contains("multiple merges")),
345            other => panic!("unexpected error: {other:?}"),
346        }
347    }
348
349    #[test]
350    fn err_merge_entry_references_future_node() {
351        // Row 0 may only reference leaves 0..n (id < n + 0 = n). Here
352        // it points at n + 0 = 4, which is the node row 0 itself
353        // creates — invalid.
354        let merges = vec![[0u32, 4]];
355        let err = community_to_membership(&merges, 4, 1).unwrap_err();
356        match err {
357            IgraphError::InvalidArgument(m) => assert!(m.contains("exceeds")),
358            other => panic!("unexpected error: {other:?}"),
359        }
360    }
361
362    #[test]
363    fn partial_dendrogram_with_fewer_steps_than_rows() {
364        // Three merges given but only cut at 1.
365        let merges = vec![[0u32, 1], [2, 3], [4, 5]];
366        let r = community_to_membership(&merges, 4, 1).unwrap();
367        assert_eq!(r.membership, vec![0, 0, 1, 2]);
368        assert_eq!(r.csize, vec![2, 1, 1]);
369    }
370
371    #[test]
372    fn matches_walktrap_dendrogram_on_two_k4_bridge() {
373        // End-to-end: drive Walktrap on two K4s joined by a bridge,
374        // then re-run the dendrogram through community_to_membership
375        // at the same step count and check the partition matches.
376        use crate::Graph;
377        use crate::walktrap;
378
379        let mut g = Graph::with_vertices(8);
380        for u in 0u32..4 {
381            for v in (u + 1)..4 {
382                g.add_edge(u, v).unwrap();
383            }
384        }
385        for u in 4u32..8 {
386            for v in (u + 1)..8 {
387                g.add_edge(u, v).unwrap();
388            }
389        }
390        g.add_edge(3, 4).unwrap(); // bridge
391
392        let r = walktrap(&g).unwrap();
393        assert_eq!(r.nb_clusters, 2);
394
395        // Best-Q cut is at step `n - nb_clusters`.
396        let step = 8u32 - r.nb_clusters;
397        let cut = community_to_membership(&r.merges, 8, step).unwrap();
398        assert_eq!(cut.membership.len(), 8);
399        // The two K4s should be in distinct clusters under the cut.
400        for u in 0u32..3 {
401            assert_eq!(
402                cut.membership[u as usize],
403                cut.membership[(u + 1) as usize],
404                "vertices {} and {} should share a cluster",
405                u,
406                u + 1
407            );
408        }
409        for u in 4u32..7 {
410            assert_eq!(cut.membership[u as usize], cut.membership[(u + 1) as usize]);
411        }
412        assert_ne!(cut.membership[0], cut.membership[7]);
413        assert_eq!(cut.csize.iter().sum::<u32>(), 8);
414        assert_eq!(cut.csize.len(), 2);
415    }
416
417    // ── le_community_to_membership tests ────────────────────────────
418
419    #[test]
420    fn le_zero_steps_keeps_initial_clusters() {
421        // Three initial clusters, no merges applied.
422        let membership = [0u32, 0, 1, 1, 2, 2];
423        let r = le_community_to_membership(&[], 0, &membership).unwrap();
424        assert_eq!(r.membership, vec![0, 0, 1, 1, 2, 2]);
425        assert_eq!(r.csize, vec![2, 2, 2]);
426    }
427
428    #[test]
429    fn le_one_merge_fuses_two_clusters() {
430        // {0,1}=0, {2,3}=1, {4,5}=2; merge clusters 0 and 1.
431        let membership = [0u32, 0, 1, 1, 2, 2];
432        let r = le_community_to_membership(&[[0, 1]], 1, &membership).unwrap();
433        assert_eq!(r.membership, vec![0, 0, 0, 0, 1, 1]);
434        assert_eq!(r.csize, vec![4, 2]);
435    }
436
437    #[test]
438    fn le_full_collapse() {
439        // Three clusters, two merges: ((0,1)->3, (3,2)->4) -> all one.
440        let membership = [0u32, 0, 1, 2, 2, 2];
441        let merges = [[0u32, 1], [3, 2]];
442        let r = le_community_to_membership(&merges, 2, &membership).unwrap();
443        assert_eq!(r.membership, vec![0, 0, 0, 0, 0, 0]);
444        assert_eq!(r.csize, vec![6]);
445    }
446
447    #[test]
448    fn le_csize_sums_to_node_count() {
449        let membership = [0u32, 1, 2, 3, 0, 1, 2, 3];
450        // Merge clusters 0,1 -> 4; leave 2 and 3 alone (1 step).
451        let r = le_community_to_membership(&[[0, 1]], 1, &membership).unwrap();
452        assert_eq!(r.csize.iter().sum::<u32>(), 8);
453        assert_eq!(r.csize.len(), 3); // 4 components - 1 step
454        for &c in &r.membership {
455            assert!((c as usize) < r.csize.len());
456        }
457    }
458
459    #[test]
460    fn le_err_empty_membership() {
461        let err = le_community_to_membership(&[], 0, &[]).unwrap_err();
462        match err {
463            IgraphError::InvalidArgument(m) => assert!(m.contains("must not be empty")),
464            other => panic!("unexpected error: {other:?}"),
465        }
466    }
467
468    #[test]
469    fn le_err_steps_not_smaller_than_components() {
470        // Two components, asking for 2 steps is invalid.
471        let membership = [0u32, 0, 1, 1];
472        let err = le_community_to_membership(&[[0, 1]], 2, &membership).unwrap_err();
473        match err {
474            IgraphError::InvalidArgument(m) => assert!(m.contains("must be smaller")),
475            other => panic!("unexpected error: {other:?}"),
476        }
477    }
478
479    #[test]
480    fn le_err_empty_cluster() {
481        // Cluster id 1 is skipped: ids present are {0, 2}.
482        let membership = [0u32, 0, 2, 2];
483        let err = le_community_to_membership(&[[0, 1]], 1, &membership).unwrap_err();
484        match err {
485            IgraphError::InvalidArgument(m) => assert!(m.contains("empty cluster")),
486            other => panic!("unexpected error: {other:?}"),
487        }
488    }
489
490    #[test]
491    fn le_matches_singleton_case() {
492        // When every initial cluster is a singleton, the result must
493        // equal community_to_membership on the same dendrogram.
494        let membership = [0u32, 1, 2, 3];
495        let merges = [[0u32, 1], [2, 3]];
496        let le = le_community_to_membership(&merges, 2, &membership).unwrap();
497        let plain = community_to_membership(&merges, 4, 2).unwrap();
498        assert_eq!(le.membership, plain.membership);
499        assert_eq!(le.csize, plain.csize);
500    }
501
502    #[cfg(all(test, feature = "proptest-harness"))]
503    mod prop {
504        use super::*;
505        use proptest::prelude::*;
506
507        // Generate a valid binary dendrogram on `n` leaves with at
508        // most `n - 1` merge rows. Use a SplitMix64-style PRNG to pick
509        // a random pair of live nodes at each step.
510        prop_compose! {
511            fn arb_dendrogram()(n in 2u32..=10u32, seed in any::<u64>())
512                -> (u32, Vec<[u32; 2]>)
513            {
514                let mut rng: u64 = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
515                let mut live: Vec<u32> = (0..n).collect();
516                let mut merges: Vec<[u32; 2]> = Vec::with_capacity((n as usize).saturating_sub(1));
517                let mut next_id: u32 = n;
518                while live.len() >= 2 {
519                    rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
520                    let i = (rng >> 32) as usize % live.len();
521                    rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
522                    let mut j = (rng >> 32) as usize % live.len();
523                    if j == i { j = (j + 1) % live.len(); }
524                    let (i, j) = if i < j { (i, j) } else { (j, i) };
525                    let c2 = live.remove(j);
526                    let c1 = live.remove(i);
527                    merges.push([c1, c2]);
528                    live.push(next_id);
529                    next_id += 1;
530                }
531                (n, merges)
532            }
533        }
534
535        proptest! {
536            #![proptest_config(ProptestConfig { cases: 80, ..ProptestConfig::default() })]
537
538            // Cutting at 0 steps reproduces the singleton partition.
539            #[test]
540            fn zero_steps_singletons((n, merges) in arb_dendrogram()) {
541                let r = community_to_membership(&merges, n, 0).unwrap();
542                let expected: Vec<u32> = (0..n).collect();
543                prop_assert_eq!(r.membership, expected);
544                prop_assert_eq!(r.csize, vec![1u32; n as usize]);
545            }
546
547            // Cutting at the full step count collapses to one cluster.
548            #[test]
549            fn full_collapse((n, merges) in arb_dendrogram()) {
550                let steps = u32::try_from(merges.len()).unwrap();
551                let r = community_to_membership(&merges, n, steps).unwrap();
552                let expected = vec![0u32; n as usize];
553                prop_assert_eq!(r.membership, expected);
554                prop_assert_eq!(r.csize, vec![n]);
555            }
556
557            // For every cut: csize sums to n, csize.len() == n - steps,
558            // and every membership entry is < csize.len().
559            #[test]
560            fn cut_invariants((n, merges) in arb_dendrogram(), step_frac in 0u32..=100) {
561                let rows = u32::try_from(merges.len()).unwrap();
562                let steps = (rows * step_frac) / 100;
563                let r = community_to_membership(&merges, n, steps).unwrap();
564                let expected_k = n - steps;
565                prop_assert_eq!(r.csize.len() as u32, expected_k);
566                prop_assert_eq!(r.csize.iter().sum::<u32>(), n);
567                for &c in &r.membership {
568                    prop_assert!(c < expected_k);
569                }
570                for c in 0..expected_k {
571                    let count = r.membership.iter().filter(|&&x| x == c).count() as u32;
572                    prop_assert_eq!(count, r.csize[c as usize]);
573                }
574            }
575        }
576    }
577}