rust_igraph/algorithms/games/barabasi.rs
1//! Barabási–Albert preferential-attachment random graph (ALGO-GN-002,
2//! BAG variant).
3//!
4//! Counterpart of the `IGRAPH_BARABASI_BAG` branch of
5//! `igraph_barabasi_game()` in
6//! `references/igraph/src/games/barabasi.c:67-178`.
7//!
8//! The "bag" mechanism (Newman 2003, originally implemented by Albert
9//! and Barabási themselves) maintains a multiset whose multiplicity of
10//! vertex `v` equals `deg(v) + 1`. Drawing a vertex uniformly from the
11//! bag therefore yields a vertex proportional to its degree — the
12//! preferential-attachment kernel of the classical BA model with
13//! `power = 1` and constant attractiveness `A = 1`.
14//!
15//! Algorithm:
16//!
17//! 1. Initial bag = `[0]` (vertex `0` is the seed).
18//! 2. For each new vertex `i = 1 .. n`:
19//! * Draw `m` neighbours from the bag uniformly with replacement.
20//! * Emit `(i, to)` for each neighbour.
21//! * Push `i` onto the bag once (so the new vertex itself is
22//! sample-able next round).
23//! * Push each chosen neighbour back onto the bag (their degree just
24//! went up by one).
25//! * If `outpref`, also push `m` copies of `i` (so the new vertex's
26//! *out*-degree also drives future sampling).
27//!
28//! Because draws are with replacement, two of the `m` neighbours of a
29//! given vertex can collide → the BAG variant can produce multi-edges
30//! and (when `i` itself is in the bag at sampling time, only possible
31//! with `outpref=true`) self-loops. Use the PSUMTREE variant
32//! ([`crate::erdos_renyi_gnp`] is *not* a substitute) once that AWU
33//! lands if a simple graph is required.
34//!
35//! Total cost: `O(n · m)` work, `O(n · m)` memory for the bag and edge
36//! list.
37//!
38//! ## Scope
39//!
40//! MVP scope only covers the most-used dispatch path:
41//! * `power = 1`, `A = 1` (hardcoded — the BAG model only supports
42//! these per upstream `barabasi.c:567-574`).
43//! * `m` is constant across all new vertices (no `outseq`).
44//! * No `start_from` graph — we always seed with the singleton
45//! `[0]`.
46//! * `outpref` defaults to `false` for directed graphs, and is
47//! *forced* to `true` for undirected graphs (matching upstream
48//! `barabasi.c:83-85`).
49
50#![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
51
52use crate::core::rng::SplitMix64;
53use crate::core::{Graph, IgraphError, IgraphResult, VertexId};
54
55fn validate_inputs(n: u32, m: u32) -> IgraphResult<()> {
56 // n = 0 is allowed (returns an empty graph).
57 // n = 1 is allowed (returns a single-vertex graph with no edges).
58 // m·(n-1) must not overflow u64 — easily checked via checked_mul.
59 let n_u64 = u64::from(n);
60 let m_u64 = u64::from(m);
61 let steps = n_u64.saturating_sub(1);
62 let new_edges = steps
63 .checked_mul(m_u64)
64 .ok_or(IgraphError::Internal("barabasi_game edge count overflow"))?;
65 // Total bag size we will need: n + new_edges + (outpref ? new_edges : 0).
66 // The +outpref case (2 · new_edges) bounds both. Use that worst case.
67 let _bag = n_u64
68 .checked_add(new_edges)
69 .and_then(|s| s.checked_add(new_edges))
70 .ok_or(IgraphError::Internal("barabasi_game bag size overflow"))?;
71 Ok(())
72}
73
74/// Generate a graph with `n` vertices via Barabási–Albert preferential
75/// attachment, using the original "bag" mechanism (BAG variant).
76///
77/// Each new vertex attaches `m` outgoing edges to existing vertices,
78/// chosen with probability proportional to their degree. Because draws
79/// are with replacement, the result may contain multi-edges (and
80/// self-loops when `outpref = true`).
81///
82/// * `n` — vertex count. `n = 0` returns an empty graph.
83/// * `m` — number of edges each newly-added vertex contributes.
84/// `m = 0` yields an edgeless graph on `n` vertices.
85/// * `outpref` — when `true`, the new vertex's outgoing edges also
86/// bias subsequent sampling (i.e., out-degree counts toward
87/// attractiveness). Forced to `true` if `directed = false`
88/// (preferential attachment on an undirected graph naturally uses
89/// total degree).
90/// * `directed` — generate a directed graph (edges point from new
91/// vertex to older).
92/// * `seed` — initialises an internal `SplitMix64` PRNG. Same
93/// `(n, m, outpref, directed, seed)` always yields the same graph.
94///
95/// # Errors
96///
97/// Returns [`IgraphError::Internal`] if `(n - 1) · m` or the bag size
98/// overflows `u64` (only possible on absurdly large inputs;
99/// `(n, m) = (u32::MAX, u32::MAX)` is the only realistic failure mode).
100///
101/// # Examples
102///
103/// ```
104/// use rust_igraph::barabasi_game_bag;
105///
106/// // 100-vertex directed BA graph with m = 2 outgoing edges per new vertex.
107/// // The first vertex (seed) has no outgoing edges, so the total edge
108/// // count is exactly (n - 1) · m = 99 · 2 = 198.
109/// let g = barabasi_game_bag(100, 2, false, true, 0xCAFEBEEF).unwrap();
110/// assert_eq!(g.vcount(), 100);
111/// assert_eq!(g.ecount(), 198);
112/// assert!(g.is_directed());
113/// ```
114pub fn barabasi_game_bag(
115 n: u32,
116 m: u32,
117 outpref: bool,
118 directed: bool,
119 seed: u64,
120) -> IgraphResult<Graph> {
121 validate_inputs(n, m)?;
122
123 // Upstream rule: undirected ⇒ outpref forced true.
124 let outpref = outpref || !directed;
125
126 if n == 0 {
127 return Graph::new(0, directed);
128 }
129 if n == 1 || m == 0 {
130 // Single vertex, or no edges per step — just return n isolated vertices.
131 return Graph::new(n, directed);
132 }
133
134 let m_usize = m as usize;
135 let steps = (n as usize).saturating_sub(1);
136 let total_edges = steps
137 .checked_mul(m_usize)
138 .ok_or(IgraphError::Internal("barabasi_game edge total overflow"))?;
139 let bag_capacity = (n as usize)
140 .checked_add(total_edges)
141 .and_then(|s| s.checked_add(if outpref { total_edges } else { 0 }))
142 .ok_or(IgraphError::Internal("barabasi_game bag capacity overflow"))?;
143
144 let mut bag: Vec<VertexId> = Vec::with_capacity(bag_capacity);
145 let mut edges: Vec<(VertexId, VertexId)> = Vec::with_capacity(total_edges);
146
147 // Seed bag with vertex 0.
148 bag.push(0);
149
150 let mut rng = SplitMix64::new(seed);
151
152 for i in 1..n {
153 // Draw m neighbours uniformly from the current bag (with replacement).
154 // Sample into a scratch buffer first so we can push to `bag` after
155 // emitting all m edges of this round — matches upstream's two-pass
156 // structure in barabasi.c:158-170.
157 let bag_len = bag.len();
158 debug_assert!(bag_len > 0);
159 let edges_start = edges.len();
160 for _ in 0..m {
161 let pick = rng.gen_index(bag_len);
162 let to = bag[pick];
163 edges.push((i, to));
164 }
165 // Update bag for next round.
166 bag.push(i);
167 for k in 0..m_usize {
168 let neighbour = edges[edges_start + k].1;
169 bag.push(neighbour);
170 if outpref {
171 bag.push(i);
172 }
173 }
174 }
175
176 let mut g = Graph::new(n, directed)?;
177 g.add_edges(edges)?;
178 Ok(g)
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 fn collect_edges(g: &Graph) -> Vec<(VertexId, VertexId)> {
186 let n_edges = u32::try_from(g.ecount()).expect("ecount fits in u32 in tests");
187 (0..n_edges)
188 .map(|eid| g.edge(eid).expect("edge id in bounds"))
189 .collect()
190 }
191
192 #[test]
193 fn n_zero_returns_empty_graph() {
194 let g = barabasi_game_bag(0, 3, false, true, 1).unwrap();
195 assert_eq!(g.vcount(), 0);
196 assert_eq!(g.ecount(), 0);
197 }
198
199 #[test]
200 fn n_one_returns_singleton() {
201 let g = barabasi_game_bag(1, 5, false, true, 1).unwrap();
202 assert_eq!(g.vcount(), 1);
203 assert_eq!(g.ecount(), 0);
204 }
205
206 #[test]
207 fn m_zero_returns_edgeless() {
208 let g = barabasi_game_bag(10, 0, false, true, 1).unwrap();
209 assert_eq!(g.vcount(), 10);
210 assert_eq!(g.ecount(), 0);
211 }
212
213 #[test]
214 fn exact_edge_count_directed_constant_m() {
215 // Exactly (n - 1) · m edges.
216 for &(n, m) in &[(10u32, 1u32), (10, 3), (50, 2), (200, 5)] {
217 let g = barabasi_game_bag(n, m, false, true, 0x00C0_FFEE).unwrap();
218 assert_eq!(g.vcount(), n);
219 assert_eq!(g.ecount(), (n as usize - 1) * m as usize);
220 }
221 }
222
223 #[test]
224 fn undirected_forces_outpref() {
225 // outpref=false but directed=false ⇒ should be promoted to true.
226 // We verify by ensuring the degree-sum invariant of the bag holds:
227 // |bag| = n + 2·(n-1)·m for outpref, n + (n-1)·m otherwise.
228 // We can't observe `bag` directly, but the edge count is the same
229 // either way (it depends only on m and n), so we check that the
230 // generation does not error.
231 let g = barabasi_game_bag(50, 3, false, false, 0x0BAD_C0DE).unwrap();
232 assert_eq!(g.vcount(), 50);
233 assert_eq!(g.ecount(), 49 * 3);
234 assert!(!g.is_directed());
235 }
236
237 #[test]
238 fn deterministic_with_seed() {
239 let a = barabasi_game_bag(100, 4, true, true, 0xDEAD).unwrap();
240 let b = barabasi_game_bag(100, 4, true, true, 0xDEAD).unwrap();
241 assert_eq!(a.vcount(), b.vcount());
242 assert_eq!(a.ecount(), b.ecount());
243 // Compare edges as sequences — same seed must yield identical
244 // ordering.
245 let ea: Vec<_> = collect_edges(&a);
246 let eb: Vec<_> = collect_edges(&b);
247 assert_eq!(ea, eb);
248 }
249
250 #[test]
251 fn different_seeds_yield_different_graphs() {
252 let a = barabasi_game_bag(100, 4, false, true, 1).unwrap();
253 let b = barabasi_game_bag(100, 4, false, true, 2).unwrap();
254 let ea: Vec<_> = collect_edges(&a);
255 let eb: Vec<_> = collect_edges(&b);
256 assert_ne!(ea, eb, "different seeds must produce different graphs");
257 }
258
259 #[test]
260 fn first_vertex_has_no_outgoing_in_directed_graph() {
261 // Vertex 0 is the seed; no new edge ever has 0 as its source.
262 let g = barabasi_game_bag(50, 3, false, true, 0x42).unwrap();
263 for (src, _dst) in collect_edges(&g) {
264 assert_ne!(src, 0, "seed vertex must never be the source");
265 }
266 }
267
268 #[test]
269 fn edges_only_point_to_earlier_vertices() {
270 // Preferential attachment: (i, to) with to < i for every edge.
271 let g = barabasi_game_bag(80, 2, false, true, 0xBEEF).unwrap();
272 for (src, dst) in collect_edges(&g) {
273 assert!(
274 dst < src,
275 "edge ({src} -> {dst}) violates BA temporal ordering"
276 );
277 }
278 }
279
280 #[test]
281 fn high_degree_concentration_around_early_vertices() {
282 // Empirical sanity check — in BA the early vertices accumulate
283 // most of the degree. We check that the top-5 highest-degree
284 // vertices include vertex 0.
285 let g = barabasi_game_bag(500, 3, false, true, 0xABCD).unwrap();
286 let mut deg = vec![0u32; g.vcount() as usize];
287 for (src, dst) in collect_edges(&g) {
288 deg[src as usize] += 1;
289 deg[dst as usize] += 1;
290 }
291 let mut indexed: Vec<(usize, u32)> = deg.iter().copied().enumerate().collect();
292 indexed.sort_by_key(|b| std::cmp::Reverse(b.1));
293 let top5: Vec<usize> = indexed.iter().take(5).map(|(v, _)| *v).collect();
294 assert!(
295 top5.contains(&0),
296 "expected vertex 0 in top-5, got {top5:?}"
297 );
298 }
299
300 #[test]
301 fn outpref_true_increases_self_degree_role() {
302 // With outpref=true, the new vertex's *out*-degree also feeds
303 // future sampling. We check that the graph is well-formed and
304 // that the edge count matches the formula (independent of
305 // outpref).
306 let g = barabasi_game_bag(40, 2, true, true, 7).unwrap();
307 assert_eq!(g.ecount(), 39 * 2);
308 }
309
310 #[test]
311 fn validate_huge_overflow_is_rejected() {
312 // (u32::MAX - 1) · u32::MAX overflows u64? No — u32::MAX² ≈ 1.8e19
313 // which is below u64::MAX ≈ 1.8e19. Tight. The bag-size check
314 // (2 · new_edges + n) is the one that flips. Verify that
315 // worst-case actually errors.
316 let huge = u32::MAX;
317 let res = barabasi_game_bag(huge, huge, true, true, 0);
318 // Either it errors (overflow) or it succeeds (n=1 step on a
319 // billion-vertex graph would blow memory long before u64 trouble).
320 // We just assert it returns *something* without panicking.
321 let _ = res;
322 }
323}