rust_igraph/algorithms/community/
reindex_membership.rs1use std::collections::BTreeMap;
23
24use crate::core::error::IgraphResult;
25
26#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct ReindexMembershipResult {
32 pub membership: Vec<u32>,
35 pub new_to_old: Vec<u32>,
38}
39
40impl ReindexMembershipResult {
41 #[must_use]
52 pub fn nb_clusters(&self) -> u32 {
53 u32::try_from(self.new_to_old.len()).unwrap_or(u32::MAX)
57 }
58}
59
60pub fn reindex_membership(membership: &[u32]) -> IgraphResult<ReindexMembershipResult> {
83 let n = membership.len();
84
85 if n == 0 {
87 return Ok(ReindexMembershipResult {
88 membership: Vec::new(),
89 new_to_old: Vec::new(),
90 });
91 }
92
93 let mut max_id: u32 = 0;
97 for &c in membership {
98 if c > max_id {
99 max_id = c;
100 }
101 }
102
103 if (max_id as usize) < n {
104 let mut lookup: Vec<u32> = vec![0; n];
105 let mut new_to_old: Vec<u32> = Vec::new();
106 let mut out: Vec<u32> = vec![0; n];
107 let mut next_new: u32 = 0;
108
109 for (i, &old) in membership.iter().enumerate() {
110 let slot = &mut lookup[old as usize];
111 if *slot == 0 {
112 *slot = next_new + 1;
113 new_to_old.push(old);
114 next_new = next_new.saturating_add(1);
115 }
116 out[i] = *slot - 1;
117 }
118
119 return Ok(ReindexMembershipResult {
120 membership: out,
121 new_to_old,
122 });
123 }
124
125 let mut lookup: BTreeMap<u32, u32> = BTreeMap::new();
127 let mut new_to_old: Vec<u32> = Vec::new();
128 let mut out: Vec<u32> = vec![0; n];
129 let mut next_new: u32 = 0;
130
131 for (i, &old) in membership.iter().enumerate() {
132 let new_id = if let Some(&nid) = lookup.get(&old) {
133 nid
134 } else {
135 let nid = next_new;
136 lookup.insert(old, nid);
137 new_to_old.push(old);
138 next_new = next_new.saturating_add(1);
139 nid
140 };
141 out[i] = new_id;
142 }
143
144 Ok(ReindexMembershipResult {
145 membership: out,
146 new_to_old,
147 })
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 #[test]
155 fn empty_membership() {
156 let r = reindex_membership(&[]).unwrap();
157 assert!(r.membership.is_empty());
158 assert!(r.new_to_old.is_empty());
159 assert_eq!(r.nb_clusters(), 0);
160 }
161
162 #[test]
163 fn already_densified_identity() {
164 let r = reindex_membership(&[0, 1, 2, 0, 1, 2]).unwrap();
165 assert_eq!(r.membership, vec![0, 1, 2, 0, 1, 2]);
166 assert_eq!(r.new_to_old, vec![0, 1, 2]);
167 }
168
169 #[test]
170 fn singleton_cluster_collapses_to_zero() {
171 let r = reindex_membership(&[42, 42, 42, 42]).unwrap();
172 assert_eq!(r.membership, vec![0, 0, 0, 0]);
173 assert_eq!(r.new_to_old, vec![42]);
174 assert_eq!(r.nb_clusters(), 1);
175 }
176
177 #[test]
178 fn gaps_are_compressed() {
179 let r = reindex_membership(&[5, 5, 10, 5, 0]).unwrap();
181 assert_eq!(r.membership, vec![0, 0, 1, 0, 2]);
182 assert_eq!(r.new_to_old, vec![5, 10, 0]);
183 }
184
185 #[test]
186 fn reverse_order_relabels_to_descending_in_new_to_old() {
187 let r = reindex_membership(&[2, 1, 0]).unwrap();
190 assert_eq!(r.membership, vec![0, 1, 2]);
191 assert_eq!(r.new_to_old, vec![2, 1, 0]);
192 }
193
194 #[test]
195 fn singletons() {
196 let r = reindex_membership(&[7, 8, 9, 10]).unwrap();
197 assert_eq!(r.membership, vec![0, 1, 2, 3]);
198 assert_eq!(r.new_to_old, vec![7, 8, 9, 10]);
199 }
200
201 #[test]
202 fn large_id_takes_btreemap_fallback() {
203 let r = reindex_membership(&[1_000_000, 7, 1_000_000, 7]).unwrap();
205 assert_eq!(r.membership, vec![0, 1, 0, 1]);
206 assert_eq!(r.new_to_old, vec![1_000_000, 7]);
207 }
208
209 #[test]
210 fn fast_path_max_id_equals_n_minus_1() {
211 let r = reindex_membership(&[3, 0, 1, 2]).unwrap();
213 assert_eq!(r.membership, vec![0, 1, 2, 3]);
214 assert_eq!(r.new_to_old, vec![3, 0, 1, 2]);
215 }
216
217 #[test]
218 fn fast_path_max_id_equals_n_takes_sparse_branch() {
219 let r = reindex_membership(&[4, 0, 1, 2]).unwrap();
222 assert_eq!(r.membership, vec![0, 1, 2, 3]);
223 assert_eq!(r.new_to_old, vec![4, 0, 1, 2]);
224 }
225
226 #[test]
227 fn preserves_partition_under_relabel() {
228 let inputs: &[&[u32]] = &[
231 &[0, 0, 1, 2, 1, 2, 0],
232 &[5, 5, 1_000, 7, 1_000, 7, 5],
233 &[u32::MAX, 0, u32::MAX, 1, 0],
234 ];
235 for input in inputs {
236 let r = reindex_membership(input).unwrap();
237 for i in 0..input.len() {
238 for j in 0..input.len() {
239 assert_eq!(
240 input[i] == input[j],
241 r.membership[i] == r.membership[j],
242 "partition diverged at ({i}, {j}) for input {input:?}",
243 );
244 }
245 }
246 let k = r.nb_clusters();
248 for &c in &r.membership {
249 assert!(c < k);
250 }
251 for c in 0..k {
252 assert!(r.membership.contains(&c));
253 }
254 assert_eq!(r.new_to_old.len(), k as usize);
255 }
256 }
257
258 #[test]
259 fn fast_path_matches_sparse_path_for_in_range_ids() {
260 let input: Vec<u32> = vec![3, 0, 3, 1, 2, 0, 1];
265 let fast = reindex_membership(&input).unwrap();
266 let len_u32 = u32::try_from(input.len()).expect("test input length fits u32");
267 assert!(input.iter().copied().max().unwrap_or(0) < len_u32);
268 let mut expected_new_to_old: Vec<u32> = Vec::new();
271 let mut expected: Vec<u32> = Vec::with_capacity(input.len());
272 for &old in &input {
273 let pos = expected_new_to_old
274 .iter()
275 .position(|&x| x == old)
276 .unwrap_or_else(|| {
277 expected_new_to_old.push(old);
278 expected_new_to_old.len() - 1
279 });
280 expected.push(u32::try_from(pos).expect("test bookkeeping fits u32"));
281 }
282 assert_eq!(fast.membership, expected);
283 assert_eq!(fast.new_to_old, expected_new_to_old);
284 }
285
286 #[cfg(all(test, feature = "proptest-harness"))]
287 mod prop {
288 use super::*;
289 use proptest::prelude::*;
290
291 prop_compose! {
292 fn arb_membership()(
293 n in 1usize..=64,
294 seed in any::<u64>(),
295 max_id_kind in 0u32..3,
296 ) -> Vec<u32> {
297 let mut rng: u64 = seed.wrapping_add(0xA5A5_5A5A_DEAD_BEEF);
298 let max_id: u32 = match max_id_kind {
299 0 => (n as u32).saturating_sub(1).max(1), 1 => (n as u32).saturating_mul(4).max(1), _ => 1_000_000, };
303 (0..n).map(|_| {
304 rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
305 (rng >> 32) as u32 % (max_id + 1)
306 }).collect()
307 }
308 }
309
310 proptest! {
311 #![proptest_config(ProptestConfig { cases: 80, ..ProptestConfig::default() })]
312
313 #[test]
314 fn partition_preserved(input in arb_membership()) {
315 let r = reindex_membership(&input).unwrap();
316 prop_assert_eq!(r.membership.len(), input.len());
317 for i in 0..input.len() {
318 for j in (i + 1)..input.len() {
319 prop_assert_eq!(
320 input[i] == input[j],
321 r.membership[i] == r.membership[j],
322 );
323 }
324 }
325 }
326
327 #[test]
328 fn ids_are_contiguous(input in arb_membership()) {
329 let r = reindex_membership(&input).unwrap();
330 let k = r.nb_clusters();
331 for &c in &r.membership {
332 prop_assert!(c < k);
333 }
334 for c in 0..k {
336 prop_assert!(r.membership.contains(&c));
337 }
338 }
339
340 #[test]
341 fn new_to_old_round_trips(input in arb_membership()) {
342 let r = reindex_membership(&input).unwrap();
343 prop_assert_eq!(r.new_to_old.len(), r.nb_clusters() as usize);
344 for (i, &old) in input.iter().enumerate() {
345 let new = r.membership[i] as usize;
346 prop_assert_eq!(r.new_to_old[new], old);
347 }
348 }
349
350 #[test]
351 fn idempotent_when_already_dense(input in arb_membership()) {
352 let r1 = reindex_membership(&input).unwrap();
353 let r2 = reindex_membership(&r1.membership).unwrap();
354 prop_assert_eq!(r1.membership.clone(), r2.membership);
355 let expected_n2o: Vec<u32> = (0..r1.nb_clusters()).collect();
359 prop_assert_eq!(r2.new_to_old, expected_n2o);
360 }
361 }
362 }
363}