Skip to main content

rust_igraph/algorithms/properties/
homophily.rs

1//! Graph homophily metrics (ALGO-TR-015).
2//!
3//! Measures the tendency of edges to connect vertices with the same label.
4//! These metrics are central to understanding when message-passing GNNs
5//! will succeed (high homophily) vs. struggle (low homophily / heterophily).
6//!
7//! Implements three standard variants from the GNN literature:
8//! - Edge homophily ratio (Zhu et al., 2020)
9//! - Node homophily ratio (Pei et al., 2020)
10//! - Class homophily ratio (Lim et al., 2021)
11
12#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
13
14use crate::core::{Graph, IgraphError, IgraphResult, VertexId};
15
16/// Edge homophily ratio: fraction of edges connecting same-label vertices.
17///
18/// `h_edge = |{(u,v) ∈ E : y_u = y_v}| / |E|`
19///
20/// Range: [0, 1]. Higher values indicate stronger homophily.
21/// This is the simplest and most common homophily metric in GNN papers.
22///
23/// # Examples
24///
25/// ```
26/// use rust_igraph::{Graph, edge_homophily};
27///
28/// // Triangle with uniform labels: all edges connect same label
29/// let g = Graph::from_edges(&[(0,1),(1,2),(0,2)], false, Some(3)).unwrap();
30/// let h = edge_homophily(&g, &[0, 0, 0]).unwrap();
31/// assert!((h - 1.0).abs() < 1e-10);
32/// ```
33pub fn edge_homophily(graph: &Graph, labels: &[u32]) -> IgraphResult<f64> {
34    validate_labels(graph, labels)?;
35
36    let ne = graph.ecount();
37    if ne == 0 {
38        return Ok(0.0);
39    }
40
41    let mut same_count = 0usize;
42    for (u, v) in graph.edges() {
43        if labels[u as usize] == labels[v as usize] {
44            same_count += 1;
45        }
46    }
47
48    Ok(same_count as f64 / ne as f64)
49}
50
51/// Node homophily ratio: average proportion of same-label neighbors.
52///
53/// `h_node = (1/|V|) Σ_v |{u ∈ N(v) : y_u = y_v}| / |N(v)|`
54///
55/// Isolated vertices (degree 0) are excluded from the average.
56/// Range: [0, 1]. This per-node definition better captures local structure
57/// than the edge-level metric.
58///
59/// # Examples
60///
61/// ```
62/// use rust_igraph::{Graph, node_homophily};
63///
64/// // Path 0-1-2 with labels [0, 0, 1]:
65/// // Node 0: 1 neighbor (1, same label) → 1.0
66/// // Node 1: 2 neighbors (0=same, 2=diff) → 0.5
67/// // Node 2: 1 neighbor (1, diff label) → 0.0
68/// // Average = (1.0 + 0.5 + 0.0) / 3 = 0.5
69/// let g = Graph::from_edges(&[(0,1),(1,2)], false, Some(3)).unwrap();
70/// let h = node_homophily(&g, &[0, 0, 1]).unwrap();
71/// assert!((h - 0.5).abs() < 1e-10);
72/// ```
73pub fn node_homophily(graph: &Graph, labels: &[u32]) -> IgraphResult<f64> {
74    validate_labels(graph, labels)?;
75
76    let nv = graph.vcount() as usize;
77    let mut sum = 0.0;
78    let mut count = 0usize;
79
80    for v in 0..nv {
81        let neighbors = graph.neighbors(v as VertexId)?;
82        let deg = neighbors.len();
83        if deg == 0 {
84            continue;
85        }
86        let same = neighbors
87            .iter()
88            .filter(|&&u| labels[u as usize] == labels[v])
89            .count();
90        sum += same as f64 / deg as f64;
91        count += 1;
92    }
93
94    if count == 0 {
95        return Ok(0.0);
96    }
97
98    Ok(sum / count as f64)
99}
100
101/// Class-balanced homophily ratio (adjusted for class imbalance).
102///
103/// `h_class = (1/C) Σ_k [ h_k - |C_k|/|V| ] / (1 - |C_k|/|V|)`
104///
105/// where `h_k` is the proportion of edges from class-k vertices that
106/// connect to other class-k vertices, and `|C_k|/|V|` is the class
107/// proportion (baseline under random connectivity).
108///
109/// Range: approximately [-1, 1]. Values near 0 indicate behavior
110/// consistent with random label assignment. Negative values indicate
111/// heterophily beyond random expectation.
112///
113/// # Examples
114///
115/// ```
116/// use rust_igraph::{Graph, class_homophily};
117///
118/// // Complete bipartite K_{2,2} with labels [0,0,1,1]
119/// // All edges cross classes → strong heterophily
120/// let g = Graph::from_edges(&[(0,2),(0,3),(1,2),(1,3)], false, Some(4)).unwrap();
121/// let h = class_homophily(&g, &[0, 0, 1, 1]).unwrap();
122/// assert!(h < 0.0);
123/// ```
124pub fn class_homophily(graph: &Graph, labels: &[u32]) -> IgraphResult<f64> {
125    validate_labels(graph, labels)?;
126
127    let nv = graph.vcount() as usize;
128    if nv == 0 {
129        return Ok(0.0);
130    }
131
132    let num_classes = labels.iter().max().map_or(0, |&m| m + 1) as usize;
133    if num_classes == 0 {
134        return Ok(0.0);
135    }
136
137    // Count class sizes
138    let mut class_size = vec![0usize; num_classes];
139    for &l in labels {
140        class_size[l as usize] += 1;
141    }
142
143    // For each class k, count edges from class-k vertices to same class
144    let mut intra_edges = vec![0usize; num_classes];
145    let mut total_from_class = vec![0usize; num_classes];
146
147    for v in 0..nv {
148        let label_v = labels[v] as usize;
149        let neighbors = graph.neighbors(v as VertexId)?;
150        total_from_class[label_v] += neighbors.len();
151        for &u in &neighbors {
152            if labels[u as usize] == labels[v] {
153                intra_edges[label_v] += 1;
154            }
155        }
156    }
157
158    // For undirected graphs, each edge is counted twice in the above
159    // but the ratio remains the same since both numerator and denominator
160    // are doubled.
161
162    let mut sum = 0.0;
163    let mut valid_classes = 0usize;
164
165    for k in 0..num_classes {
166        let proportion = class_size[k] as f64 / nv as f64;
167        if (1.0 - proportion).abs() < 1e-15 || total_from_class[k] == 0 {
168            continue;
169        }
170        let h_k = intra_edges[k] as f64 / total_from_class[k] as f64;
171        sum += (h_k - proportion) / (1.0 - proportion);
172        valid_classes += 1;
173    }
174
175    if valid_classes == 0 {
176        return Ok(0.0);
177    }
178
179    Ok(sum / valid_classes as f64)
180}
181
182/// Heterophily ratio: `1 - edge_homophily`.
183///
184/// Convenience function returning the complement of edge homophily.
185/// Higher values indicate edges predominantly connect different-label vertices.
186///
187/// # Examples
188///
189/// ```
190/// use rust_igraph::{Graph, edge_heterophily};
191///
192/// let g = Graph::from_edges(&[(0,1),(1,2)], false, Some(3)).unwrap();
193/// let h = edge_heterophily(&g, &[0, 1, 0]).unwrap();
194/// // Both edges cross labels → heterophily = 1.0
195/// assert!((h - 1.0).abs() < 1e-10);
196/// ```
197pub fn edge_heterophily(graph: &Graph, labels: &[u32]) -> IgraphResult<f64> {
198    Ok(1.0 - edge_homophily(graph, labels)?)
199}
200
201// --- Internal helpers ---
202
203fn validate_labels(graph: &Graph, labels: &[u32]) -> IgraphResult<()> {
204    let nv = graph.vcount() as usize;
205    if labels.len() != nv {
206        return Err(IgraphError::InvalidArgument(format!(
207            "labels length {} does not match vcount {}",
208            labels.len(),
209            nv
210        )));
211    }
212    Ok(())
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    fn path3() -> Graph {
220        Graph::from_edges(&[(0, 1), (1, 2)], false, Some(3)).unwrap()
221    }
222
223    fn triangle() -> Graph {
224        Graph::from_edges(&[(0, 1), (1, 2), (0, 2)], false, Some(3)).unwrap()
225    }
226
227    fn bipartite_k22() -> Graph {
228        Graph::from_edges(&[(0, 2), (0, 3), (1, 2), (1, 3)], false, Some(4)).unwrap()
229    }
230
231    // --- edge_homophily tests ---
232
233    #[test]
234    fn edge_homo_all_same() {
235        let g = triangle();
236        let h = edge_homophily(&g, &[0, 0, 0]).unwrap();
237        assert!((h - 1.0).abs() < 1e-10);
238    }
239
240    #[test]
241    fn edge_homo_all_different() {
242        let g = triangle();
243        let h = edge_homophily(&g, &[0, 1, 2]).unwrap();
244        assert!(h.abs() < 1e-10);
245    }
246
247    #[test]
248    fn edge_homo_mixed() {
249        let g = path3();
250        // Labels [0, 0, 1]: edge(0,1)=same, edge(1,2)=diff → 0.5
251        let h = edge_homophily(&g, &[0, 0, 1]).unwrap();
252        assert!((h - 0.5).abs() < 1e-10);
253    }
254
255    #[test]
256    fn edge_homo_empty_graph() {
257        let g = Graph::with_vertices(3);
258        let h = edge_homophily(&g, &[0, 1, 2]).unwrap();
259        assert!(h.abs() < 1e-10);
260    }
261
262    #[test]
263    fn edge_homo_invalid_labels() {
264        let g = triangle();
265        assert!(edge_homophily(&g, &[0, 1]).is_err());
266    }
267
268    // --- node_homophily tests ---
269
270    #[test]
271    fn node_homo_path() {
272        let g = path3();
273        // Labels [0, 0, 1]:
274        // Node 0: N={1}, same=1 → 1.0
275        // Node 1: N={0,2}, same=1 → 0.5
276        // Node 2: N={1}, same=0 → 0.0
277        // Average = (1.0 + 0.5 + 0.0) / 3 = 0.5
278        let h = node_homophily(&g, &[0, 0, 1]).unwrap();
279        assert!((h - 0.5).abs() < 1e-10);
280    }
281
282    #[test]
283    fn node_homo_all_same() {
284        let g = triangle();
285        let h = node_homophily(&g, &[0, 0, 0]).unwrap();
286        assert!((h - 1.0).abs() < 1e-10);
287    }
288
289    #[test]
290    fn node_homo_all_different() {
291        let g = triangle();
292        let h = node_homophily(&g, &[0, 1, 2]).unwrap();
293        assert!(h.abs() < 1e-10);
294    }
295
296    #[test]
297    fn node_homo_isolated() {
298        let g = Graph::from_edges(&[(0, 1)], false, Some(3)).unwrap();
299        // Vertex 2 is isolated, excluded from average
300        let h = node_homophily(&g, &[0, 0, 1]).unwrap();
301        // Node 0: N={1}, same=1 → 1.0
302        // Node 1: N={0}, same=1 → 1.0
303        // Average = (1.0 + 1.0) / 2 = 1.0
304        assert!((h - 1.0).abs() < 1e-10);
305    }
306
307    #[test]
308    fn node_homo_empty() {
309        let g = Graph::with_vertices(3);
310        let h = node_homophily(&g, &[0, 1, 2]).unwrap();
311        assert!(h.abs() < 1e-10);
312    }
313
314    // --- class_homophily tests ---
315
316    #[test]
317    fn class_homo_perfect_homophily() {
318        // Two components, each with same label
319        let g = Graph::from_edges(&[(0, 1), (2, 3)], false, Some(4)).unwrap();
320        let h = class_homophily(&g, &[0, 0, 1, 1]).unwrap();
321        // Each class has h_k=1.0, proportion=0.5
322        // adjusted = (1.0 - 0.5) / (1.0 - 0.5) = 1.0
323        assert!((h - 1.0).abs() < 1e-10);
324    }
325
326    #[test]
327    fn class_homo_perfect_heterophily() {
328        let g = bipartite_k22();
329        let h = class_homophily(&g, &[0, 0, 1, 1]).unwrap();
330        // Each class has h_k=0.0, proportion=0.5
331        // adjusted = (0.0 - 0.5) / (1.0 - 0.5) = -1.0
332        assert!((h - (-1.0)).abs() < 1e-10);
333    }
334
335    #[test]
336    fn class_homo_random_like() {
337        // Diamond graph where labels match class proportions
338        let g = Graph::from_edges(
339            &[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)],
340            false,
341            Some(4),
342        )
343        .unwrap();
344        // K4 with labels [0,0,1,1]: 6 edges, intra={0-1, 2-3}=2, cross=4
345        let h = class_homophily(&g, &[0, 0, 1, 1]).unwrap();
346        // h_k = 2/6 = 1/3 for each class (each has 6 total edges from its vertices,
347        // actually: vertex 0 has 3 edges, vertex 1 has 3 edges → total_from_class[0]=6,
348        // intra[0]=2 (0→1 and 1→0) → h_0 = 2/6 = 1/3
349        // proportion = 0.5, adjusted = (1/3 - 0.5) / (1 - 0.5) = -1/3
350        assert!((h - (-1.0 / 3.0)).abs() < 1e-10);
351    }
352
353    #[test]
354    fn class_homo_single_class() {
355        let g = triangle();
356        let h = class_homophily(&g, &[0, 0, 0]).unwrap();
357        // Only one class with proportion=1.0 → skipped → 0.0
358        assert!(h.abs() < 1e-10);
359    }
360
361    #[test]
362    fn class_homo_empty() {
363        let g = Graph::with_vertices(0);
364        let h = class_homophily(&g, &[]).unwrap();
365        assert!(h.abs() < 1e-10);
366    }
367
368    // --- edge_heterophily tests ---
369
370    #[test]
371    fn heterophily_complement() {
372        let g = path3();
373        let homo = edge_homophily(&g, &[0, 1, 0]).unwrap();
374        let hetero = edge_heterophily(&g, &[0, 1, 0]).unwrap();
375        assert!((homo + hetero - 1.0).abs() < 1e-10);
376    }
377
378    #[test]
379    fn heterophily_all_cross() {
380        let g = Graph::from_edges(&[(0, 1), (1, 2)], false, Some(3)).unwrap();
381        let h = edge_heterophily(&g, &[0, 1, 0]).unwrap();
382        // Both edges cross: heterophily = 1.0
383        assert!((h - 1.0).abs() < 1e-10);
384    }
385}