Skip to main content

rust_igraph/algorithms/community/
split_join_distance.rs

1//! ALGO-CM-016 — `split_join_distance` (asymmetric projection distances).
2//!
3//! Given two membership vectors over the same vertex set, returns the
4//! **pair** of projection distances `(d12, d21)` where:
5//!
6//! - `d12 = n − Σ_i max_j |R_i ∩ C_j|` (project `comm1` onto `comm2`)
7//! - `d21 = n − Σ_j max_i |R_i ∩ C_j|` (project `comm2` onto `comm1`)
8//!
9//! The total split-join distance (van Dongen 2000) is `d12 + d21`. One
10//! component being zero means one partition is a *sub-partition* of the
11//! other; both being zero means the partitions are identical.
12//!
13//! Mirrors `igraph_split_join_distance` in
14//! `references/igraph/src/community/community_misc.c`. The C reference
15//! exposes the asymmetric pair specifically so callers can detect the
16//! sub-partition relationship — [`crate::compare_communities`] with
17//! [`crate::CommunityComparison::SplitJoin`] returns the sum.
18//!
19//! Reuses the densification (via [`crate::reindex_membership`]) and
20//! sparse-confusion-matrix machinery from CM-015 — both inputs are
21//! reindexed once, then a single `HashMap<(u32, u32), u32>` pass yields
22//! the row- and column-max sums.
23
24#![allow(
25    clippy::cast_precision_loss,
26    clippy::cast_possible_truncation,
27    clippy::cast_sign_loss
28)]
29
30use crate::core::error::{IgraphError, IgraphResult};
31
32use super::compare_communities::split_join_distances;
33use super::reindex_membership::reindex_membership;
34
35/// Asymmetric split-join distances between two community partitions.
36///
37/// `d12` is the projection distance of `comm1` from `comm2`; `d21` is
38/// the projection distance in the other direction. The (symmetric)
39/// split-join distance is [`SplitJoinDistance::total`] = `d12 + d21`.
40#[derive(Debug, Copy, Clone, Eq, PartialEq)]
41pub struct SplitJoinDistance {
42    /// Projection distance of `comm1` from `comm2`.
43    pub d12: u64,
44    /// Projection distance of `comm2` from `comm1`.
45    pub d21: u64,
46}
47
48impl SplitJoinDistance {
49    /// Total (symmetric) split-join distance, `d12 + d21`.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// use rust_igraph::split_join_distance;
55    ///
56    /// let r = split_join_distance(&[0, 0, 1, 1], &[0, 1, 1, 1]).unwrap();
57    /// assert_eq!(r.total(), r.d12 + r.d21);
58    /// ```
59    #[must_use]
60    pub fn total(self) -> u64 {
61        self.d12 + self.d21
62    }
63}
64
65/// Compute the two asymmetric projection distances between `comm1` and
66/// `comm2`.
67///
68/// Both vectors must have the same length. Cluster ids do not have to
69/// be densified — they are reindexed internally via
70/// [`crate::reindex_membership`]. Empty inputs return `(0, 0)`.
71///
72/// # Examples
73/// ```
74/// use rust_igraph::split_join_distance;
75///
76/// // comm1 is a sub-partition of comm2: every cluster in comm1 fits
77/// // entirely inside some cluster of comm2, so d12 == 0.
78/// let r = split_join_distance(&[0, 0, 1, 2], &[0, 0, 0, 1]).unwrap();
79/// assert_eq!(r.d12, 0);
80/// assert_eq!(r.d21, 1);
81/// assert_eq!(r.total(), 1);
82///
83/// // Identical partitions: both distances are 0.
84/// let r = split_join_distance(&[0, 0, 1, 1], &[5, 5, 7, 7]).unwrap();
85/// assert_eq!(r.d12, 0);
86/// assert_eq!(r.d21, 0);
87/// ```
88///
89/// # Errors
90/// - [`IgraphError::InvalidArgument`] if `comm1.len() != comm2.len()`.
91pub fn split_join_distance(comm1: &[u32], comm2: &[u32]) -> IgraphResult<SplitJoinDistance> {
92    if comm1.len() != comm2.len() {
93        return Err(IgraphError::InvalidArgument(format!(
94            "community membership vectors have different lengths: {} and {}",
95            comm1.len(),
96            comm2.len(),
97        )));
98    }
99    let n = comm1.len();
100    if n == 0 {
101        return Ok(SplitJoinDistance { d12: 0, d21: 0 });
102    }
103    let c1 = reindex_membership(comm1)?;
104    let c2 = reindex_membership(comm2)?;
105    let (d12, d21) = split_join_distances(&c1.membership, &c2.membership, n);
106    Ok(SplitJoinDistance { d12, d21 })
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn empty_inputs_yield_zero_zero() {
115        let r = split_join_distance(&[], &[]).unwrap();
116        assert_eq!(r.d12, 0);
117        assert_eq!(r.d21, 0);
118        assert_eq!(r.total(), 0);
119    }
120
121    #[test]
122    fn length_mismatch_errors() {
123        let err = split_join_distance(&[0, 0], &[0, 0, 0]).unwrap_err();
124        match err {
125            IgraphError::InvalidArgument(_) => (),
126            other => panic!("expected InvalidArgument, got {other:?}"),
127        }
128    }
129
130    #[test]
131    fn identical_partition_is_zero() {
132        let r = split_join_distance(&[0, 0, 1, 1, 2, 2], &[3, 3, 5, 5, 7, 7]).unwrap();
133        assert_eq!(r.d12, 0);
134        assert_eq!(r.d21, 0);
135    }
136
137    #[test]
138    fn subpartition_asymmetry() {
139        // comm1 = {{0,1},{2},{3}} is a sub-partition of comm2 = {{0,1,2},{3}}.
140        // Every comm1 cluster fits inside a comm2 cluster ⇒ d12 = 0.
141        // In the reverse direction: comm2[0]={0,1,2} overlaps comm1
142        // clusters with sizes (2, 1) → max=2 → contributes 2; comm2[1]={3}
143        // → contributes 1; sum = 3, n = 4 ⇒ d21 = 1.
144        let r = split_join_distance(&[0, 0, 1, 2], &[0, 0, 0, 1]).unwrap();
145        assert_eq!(r.d12, 0);
146        assert_eq!(r.d21, 1);
147        assert_eq!(r.total(), 1);
148    }
149
150    #[test]
151    fn full_disagreement_2x2() {
152        // Canonical worst case for n=4: [0,0,1,1] vs [0,1,0,1].
153        // Each comm1 cluster overlaps comm2 clusters by (1, 1) → max=1
154        // each → row sum = 2 → d12 = 4 - 2 = 2. Same for d21 by symmetry.
155        // Total = 4 (the known SplitJoin from CM-015 unit tests).
156        let r = split_join_distance(&[0, 0, 1, 1], &[0, 1, 0, 1]).unwrap();
157        assert_eq!(r.d12, 2);
158        assert_eq!(r.d21, 2);
159        assert_eq!(r.total(), 4);
160    }
161
162    #[test]
163    fn matches_compare_communities_split_join_sum() {
164        use crate::{CommunityComparison, compare_communities};
165        let a = [0u32, 0, 1, 2, 2, 3, 3, 3];
166        let b = [0u32, 1, 1, 2, 3, 3, 0, 2];
167        let r = split_join_distance(&a, &b).unwrap();
168        let total = compare_communities(&a, &b, CommunityComparison::SplitJoin).unwrap();
169        assert!((total - (r.total() as f64)).abs() < 1e-12);
170    }
171
172    #[test]
173    fn relabel_invariant() {
174        // Distances depend only on the partition, not the cluster ids.
175        let a = [0u32, 0, 1, 1, 2, 2];
176        let b = [0u32, 1, 0, 1, 0, 1];
177        let r1 = split_join_distance(&a, &b).unwrap();
178        // Relabel a: 0 -> 17, 1 -> 4, 2 -> 999. Relabel b: 0 -> 1_000_000, 1 -> 5.
179        let a2: Vec<u32> = a
180            .iter()
181            .map(|&c| match c {
182                0 => 17,
183                1 => 4,
184                _ => 999,
185            })
186            .collect();
187        let b2: Vec<u32> = b
188            .iter()
189            .map(|&c| if c == 0 { 1_000_000 } else { 5 })
190            .collect();
191        let r2 = split_join_distance(&a2, &b2).unwrap();
192        assert_eq!(r1, r2);
193    }
194
195    #[test]
196    fn single_singleton_block() {
197        // n=5, both partitions all singletons. Every cluster maps 1:1 ⇒
198        // d12 = d21 = 0.
199        let v: Vec<u32> = (0..5).collect();
200        let r = split_join_distance(&v, &v).unwrap();
201        assert_eq!(r.d12, 0);
202        assert_eq!(r.d21, 0);
203    }
204}
205
206#[cfg(all(test, feature = "proptest-harness"))]
207mod proptests {
208    use super::*;
209    use proptest::collection::vec;
210    use proptest::prelude::*;
211
212    proptest! {
213        #[test]
214        fn non_negative_components(
215            (a, b) in (1usize..40).prop_flat_map(|n| (vec(0u32..5, n), vec(0u32..5, n)))
216        ) {
217            let r = split_join_distance(&a, &b).unwrap();
218            // d12, d21 ∈ [0, n] always.
219            let n = a.len() as u64;
220            prop_assert!(r.d12 <= n);
221            prop_assert!(r.d21 <= n);
222        }
223
224        #[test]
225        fn total_equals_compare_communities_split_join(
226            (a, b) in (1usize..40).prop_flat_map(|n| (vec(0u32..5, n), vec(0u32..5, n)))
227        ) {
228            use crate::{CommunityComparison, compare_communities};
229            let r = split_join_distance(&a, &b).unwrap();
230            let total = compare_communities(&a, &b, CommunityComparison::SplitJoin).unwrap();
231            prop_assert!((total - (r.total() as f64)).abs() < 1e-12);
232        }
233
234        #[test]
235        fn relabel_invariant_prop(
236            (a, b) in (2usize..30).prop_flat_map(|n| (vec(0u32..6, n), vec(0u32..6, n))),
237            offset in 1u32..1000,
238        ) {
239            let r1 = split_join_distance(&a, &b).unwrap();
240            let a2: Vec<u32> = a.iter().map(|&c| c.wrapping_add(offset).wrapping_mul(31)).collect();
241            let b2: Vec<u32> = b.iter().map(|&c| c.wrapping_add(offset.wrapping_mul(7))).collect();
242            let r2 = split_join_distance(&a2, &b2).unwrap();
243            prop_assert_eq!(r1, r2);
244        }
245
246        #[test]
247        fn identical_partitions_are_zero(
248            a in vec(0u32..7, 1..40),
249        ) {
250            let r = split_join_distance(&a, &a).unwrap();
251            prop_assert_eq!(r.d12, 0);
252            prop_assert_eq!(r.d21, 0);
253        }
254
255        #[test]
256        fn subpartition_one_side_is_zero(
257            a in vec(0u32..4, 2..25),
258        ) {
259            // Coarsen `a` into `b` by collapsing every cluster c -> c/2.
260            // The result `b` is strictly coarser than `a`, so `a` is a
261            // sub-partition of `b` and d12 = 0 (project a -> b: every
262            // a-cluster fits entirely in one b-cluster).
263            let b: Vec<u32> = a.iter().map(|&c| c / 2).collect();
264            let r = split_join_distance(&a, &b).unwrap();
265            prop_assert_eq!(r.d12, 0);
266        }
267    }
268}