Skip to main content

rust_igraph/algorithms/community/
reindex_membership.rs

1//! ALGO-CM-014 — `reindex_membership` (densify membership labels).
2//!
3//! Take any `membership[v] = c` vector with arbitrary cluster ids
4//! (including gaps and negatives in the C version's `int` typing —
5//! here, any `u32`) and produce a new, densified `0..k-1` labelling
6//! such that the *partition* is identical. Optionally also report the
7//! old cluster id that each new id replaced, in encounter order.
8//!
9//! Mirrors `igraph_reindex_membership` /
10//! `igraph_i_reindex_membership_large` in
11//! `references/igraph/src/community/community_misc.c`.
12//!
13//! The C implementation has a fast path when every id is already in
14//! `0..n`, and a sort-based fallback otherwise. Because Rust's
15//! `Vec<u32>` already has cheap key-set operations via `HashMap`, we
16//! use a single `BTreeMap`-free first-occurrence scan: O(n) when the
17//! id space fits in a `Vec` (length `max_id + 1` is bounded by `n`),
18//! and `O(n log k)` via `BTreeMap` otherwise. Both branches preserve
19//! the *encounter order* — the new id of an old id is the index where
20//! it was first seen.
21
22use std::collections::BTreeMap;
23
24use crate::core::error::IgraphResult;
25
26/// Result of [`reindex_membership`]: a densified membership vector
27/// `[0, k)`, the old cluster id behind each new id (so `new_to_old[i]`
28/// is the original id that now maps to `i`), and the number of
29/// distinct clusters `k = new_to_old.len()`.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct ReindexMembershipResult {
32    /// `membership[v] ∈ [0, k)` — the densified cluster id of vertex
33    /// `v`.
34    pub membership: Vec<u32>,
35    /// `new_to_old[i]` — the original cluster id that vertex
36    /// encounters with this id now map to. `nb_clusters = new_to_old.len()`.
37    pub new_to_old: Vec<u32>,
38}
39
40impl ReindexMembershipResult {
41    /// Number of distinct clusters in the densified membership.
42    ///
43    /// # Examples
44    ///
45    /// ```
46    /// use rust_igraph::reindex_membership;
47    ///
48    /// let r = reindex_membership(&[5, 5, 10, 10, 5]).unwrap();
49    /// assert_eq!(r.nb_clusters(), 2);
50    /// ```
51    #[must_use]
52    pub fn nb_clusters(&self) -> u32 {
53        // `new_to_old` cannot exceed `membership.len()`, which is a
54        // `Vec` length bounded by `isize::MAX`. The cast is safe for
55        // any practical input.
56        u32::try_from(self.new_to_old.len()).unwrap_or(u32::MAX)
57    }
58}
59
60/// Relabel a membership vector so cluster ids run contiguously over
61/// `0..k`, where `k` is the number of distinct clusters. The *partition*
62/// is unchanged: two vertices share a cluster in the output iff they
63/// shared one in the input. New ids are assigned in *first-occurrence
64/// order* — the first cluster id you encounter when sweeping left to
65/// right becomes `0`, the next new one becomes `1`, and so on.
66///
67/// # Examples
68/// ```
69/// use rust_igraph::reindex_membership;
70///
71/// // Gaps and out-of-order ids are densified by first occurrence.
72/// let r = reindex_membership(&[7, 7, 3, 7, 9, 3]).unwrap();
73/// assert_eq!(r.membership, vec![0, 0, 1, 0, 2, 1]);
74/// assert_eq!(r.new_to_old, vec![7, 3, 9]);
75/// assert_eq!(r.nb_clusters(), 3);
76/// ```
77///
78/// # Errors
79/// Currently infallible — returns `IgraphResult` for API symmetry with
80/// other community helpers and to allow adding fallible checks later
81/// without a breaking change.
82pub fn reindex_membership(membership: &[u32]) -> IgraphResult<ReindexMembershipResult> {
83    let n = membership.len();
84
85    // Empty input is the trivial 0-cluster case.
86    if n == 0 {
87        return Ok(ReindexMembershipResult {
88            membership: Vec::new(),
89            new_to_old: Vec::new(),
90        });
91    }
92
93    // Mirror the C implementation: fast path when every id is in
94    // `0..n` — we can use a flat `Vec<u32>` of size `n` as the lookup,
95    // where `lookup[old] = new + 1` and 0 means "not yet seen".
96    let mut max_id: u32 = 0;
97    for &c in membership {
98        if c > max_id {
99            max_id = c;
100        }
101    }
102
103    if (max_id as usize) < n {
104        let mut lookup: Vec<u32> = vec![0; n];
105        let mut new_to_old: Vec<u32> = Vec::new();
106        let mut out: Vec<u32> = vec![0; n];
107        let mut next_new: u32 = 0;
108
109        for (i, &old) in membership.iter().enumerate() {
110            let slot = &mut lookup[old as usize];
111            if *slot == 0 {
112                *slot = next_new + 1;
113                new_to_old.push(old);
114                next_new = next_new.saturating_add(1);
115            }
116            out[i] = *slot - 1;
117        }
118
119        return Ok(ReindexMembershipResult {
120            membership: out,
121            new_to_old,
122        });
123    }
124
125    // Sparse / large-id fallback: BTreeMap keyed by old cluster id.
126    let mut lookup: BTreeMap<u32, u32> = BTreeMap::new();
127    let mut new_to_old: Vec<u32> = Vec::new();
128    let mut out: Vec<u32> = vec![0; n];
129    let mut next_new: u32 = 0;
130
131    for (i, &old) in membership.iter().enumerate() {
132        let new_id = if let Some(&nid) = lookup.get(&old) {
133            nid
134        } else {
135            let nid = next_new;
136            lookup.insert(old, nid);
137            new_to_old.push(old);
138            next_new = next_new.saturating_add(1);
139            nid
140        };
141        out[i] = new_id;
142    }
143
144    Ok(ReindexMembershipResult {
145        membership: out,
146        new_to_old,
147    })
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn empty_membership() {
156        let r = reindex_membership(&[]).unwrap();
157        assert!(r.membership.is_empty());
158        assert!(r.new_to_old.is_empty());
159        assert_eq!(r.nb_clusters(), 0);
160    }
161
162    #[test]
163    fn already_densified_identity() {
164        let r = reindex_membership(&[0, 1, 2, 0, 1, 2]).unwrap();
165        assert_eq!(r.membership, vec![0, 1, 2, 0, 1, 2]);
166        assert_eq!(r.new_to_old, vec![0, 1, 2]);
167    }
168
169    #[test]
170    fn singleton_cluster_collapses_to_zero() {
171        let r = reindex_membership(&[42, 42, 42, 42]).unwrap();
172        assert_eq!(r.membership, vec![0, 0, 0, 0]);
173        assert_eq!(r.new_to_old, vec![42]);
174        assert_eq!(r.nb_clusters(), 1);
175    }
176
177    #[test]
178    fn gaps_are_compressed() {
179        // Old ids: 5, 5, 10, 5, 0 -> first-encounter: 5 -> 0, 10 -> 1, 0 -> 2.
180        let r = reindex_membership(&[5, 5, 10, 5, 0]).unwrap();
181        assert_eq!(r.membership, vec![0, 0, 1, 0, 2]);
182        assert_eq!(r.new_to_old, vec![5, 10, 0]);
183    }
184
185    #[test]
186    fn reverse_order_relabels_to_descending_in_new_to_old() {
187        // 3 vertices, ids 2, 1, 0. New ids assigned in first-encounter:
188        // 2 -> 0, 1 -> 1, 0 -> 2.
189        let r = reindex_membership(&[2, 1, 0]).unwrap();
190        assert_eq!(r.membership, vec![0, 1, 2]);
191        assert_eq!(r.new_to_old, vec![2, 1, 0]);
192    }
193
194    #[test]
195    fn singletons() {
196        let r = reindex_membership(&[7, 8, 9, 10]).unwrap();
197        assert_eq!(r.membership, vec![0, 1, 2, 3]);
198        assert_eq!(r.new_to_old, vec![7, 8, 9, 10]);
199    }
200
201    #[test]
202    fn large_id_takes_btreemap_fallback() {
203        // max_id (1_000_000) >> n (4), forces the sparse branch.
204        let r = reindex_membership(&[1_000_000, 7, 1_000_000, 7]).unwrap();
205        assert_eq!(r.membership, vec![0, 1, 0, 1]);
206        assert_eq!(r.new_to_old, vec![1_000_000, 7]);
207    }
208
209    #[test]
210    fn fast_path_max_id_equals_n_minus_1() {
211        // Edge of the fast-path branch: max_id == n - 1.
212        let r = reindex_membership(&[3, 0, 1, 2]).unwrap();
213        assert_eq!(r.membership, vec![0, 1, 2, 3]);
214        assert_eq!(r.new_to_old, vec![3, 0, 1, 2]);
215    }
216
217    #[test]
218    fn fast_path_max_id_equals_n_takes_sparse_branch() {
219        // max_id (4) == n (4), so max_id < n is false -> sparse path.
220        // The output is still correct.
221        let r = reindex_membership(&[4, 0, 1, 2]).unwrap();
222        assert_eq!(r.membership, vec![0, 1, 2, 3]);
223        assert_eq!(r.new_to_old, vec![4, 0, 1, 2]);
224    }
225
226    #[test]
227    fn preserves_partition_under_relabel() {
228        // Two vertices share a cluster in the input iff they share one
229        // in the output, regardless of label values.
230        let inputs: &[&[u32]] = &[
231            &[0, 0, 1, 2, 1, 2, 0],
232            &[5, 5, 1_000, 7, 1_000, 7, 5],
233            &[u32::MAX, 0, u32::MAX, 1, 0],
234        ];
235        for input in inputs {
236            let r = reindex_membership(input).unwrap();
237            for i in 0..input.len() {
238                for j in 0..input.len() {
239                    assert_eq!(
240                        input[i] == input[j],
241                        r.membership[i] == r.membership[j],
242                        "partition diverged at ({i}, {j}) for input {input:?}",
243                    );
244                }
245            }
246            // Densified labels really do run 0..k.
247            let k = r.nb_clusters();
248            for &c in &r.membership {
249                assert!(c < k);
250            }
251            for c in 0..k {
252                assert!(r.membership.contains(&c));
253            }
254            assert_eq!(r.new_to_old.len(), k as usize);
255        }
256    }
257
258    #[test]
259    fn fast_path_matches_sparse_path_for_in_range_ids() {
260        // Same input under both branches should yield the same result.
261        // The fast path triggers when max_id < n. We can force the
262        // sparse path by appending an out-of-range tail value, then
263        // truncating the result, and comparing the in-range prefix.
264        let input: Vec<u32> = vec![3, 0, 3, 1, 2, 0, 1];
265        let fast = reindex_membership(&input).unwrap();
266        let len_u32 = u32::try_from(input.len()).expect("test input length fits u32");
267        assert!(input.iter().copied().max().unwrap_or(0) < len_u32);
268        // Build a "sparse-branch" check by manually constructing the
269        // expected first-occurrence mapping and comparing.
270        let mut expected_new_to_old: Vec<u32> = Vec::new();
271        let mut expected: Vec<u32> = Vec::with_capacity(input.len());
272        for &old in &input {
273            let pos = expected_new_to_old
274                .iter()
275                .position(|&x| x == old)
276                .unwrap_or_else(|| {
277                    expected_new_to_old.push(old);
278                    expected_new_to_old.len() - 1
279                });
280            expected.push(u32::try_from(pos).expect("test bookkeeping fits u32"));
281        }
282        assert_eq!(fast.membership, expected);
283        assert_eq!(fast.new_to_old, expected_new_to_old);
284    }
285
286    #[cfg(all(test, feature = "proptest-harness"))]
287    mod prop {
288        use super::*;
289        use proptest::prelude::*;
290
291        prop_compose! {
292            fn arb_membership()(
293                n in 1usize..=64,
294                seed in any::<u64>(),
295                max_id_kind in 0u32..3,
296            ) -> Vec<u32> {
297                let mut rng: u64 = seed.wrapping_add(0xA5A5_5A5A_DEAD_BEEF);
298                let max_id: u32 = match max_id_kind {
299                    0 => (n as u32).saturating_sub(1).max(1),       // fast path
300                    1 => (n as u32).saturating_mul(4).max(1),       // straddles boundary
301                    _ => 1_000_000,                                  // sparse path
302                };
303                (0..n).map(|_| {
304                    rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
305                    (rng >> 32) as u32 % (max_id + 1)
306                }).collect()
307            }
308        }
309
310        proptest! {
311            #![proptest_config(ProptestConfig { cases: 80, ..ProptestConfig::default() })]
312
313            #[test]
314            fn partition_preserved(input in arb_membership()) {
315                let r = reindex_membership(&input).unwrap();
316                prop_assert_eq!(r.membership.len(), input.len());
317                for i in 0..input.len() {
318                    for j in (i + 1)..input.len() {
319                        prop_assert_eq!(
320                            input[i] == input[j],
321                            r.membership[i] == r.membership[j],
322                        );
323                    }
324                }
325            }
326
327            #[test]
328            fn ids_are_contiguous(input in arb_membership()) {
329                let r = reindex_membership(&input).unwrap();
330                let k = r.nb_clusters();
331                for &c in &r.membership {
332                    prop_assert!(c < k);
333                }
334                // Every id in 0..k appears at least once.
335                for c in 0..k {
336                    prop_assert!(r.membership.contains(&c));
337                }
338            }
339
340            #[test]
341            fn new_to_old_round_trips(input in arb_membership()) {
342                let r = reindex_membership(&input).unwrap();
343                prop_assert_eq!(r.new_to_old.len(), r.nb_clusters() as usize);
344                for (i, &old) in input.iter().enumerate() {
345                    let new = r.membership[i] as usize;
346                    prop_assert_eq!(r.new_to_old[new], old);
347                }
348            }
349
350            #[test]
351            fn idempotent_when_already_dense(input in arb_membership()) {
352                let r1 = reindex_membership(&input).unwrap();
353                let r2 = reindex_membership(&r1.membership).unwrap();
354                prop_assert_eq!(r1.membership.clone(), r2.membership);
355                // After one pass, new_to_old of the second call is just
356                // (0..k) since the input is already 0..k in first-occurrence
357                // order.
358                let expected_n2o: Vec<u32> = (0..r1.nb_clusters()).collect();
359                prop_assert_eq!(r2.new_to_old, expected_n2o);
360            }
361        }
362    }
363}