Skip to main content

rust_igraph/algorithms/paths/
random_walks.rs

1//! Batch random walks for graph embedding training (ALGO-TR-005).
2//!
3//! Generates multiple random walks from specified starting vertices.
4//! This is the workhorse for training graph embedding models like
5//! `DeepWalk`, `Node2Vec`, and LINE.
6//!
7//! Each call produces a corpus of vertex sequences that can be fed
8//! directly into a skip-gram or similar model.
9
10use crate::algorithms::paths::dijkstra::DijkstraMode;
11use crate::algorithms::paths::random_walk::random_walk;
12use crate::algorithms::paths::random_walk_node2vec::random_walk_node2vec;
13use crate::core::rng::SplitMix64;
14use crate::core::{Graph, IgraphResult, VertexId};
15
16/// Generate multiple random walks from every vertex in the graph.
17///
18/// For each vertex `v` in `0..graph.vcount()`, generates
19/// `walks_per_vertex` walks of length `walk_length` starting at `v`.
20/// The order of starting vertices is shuffled independently for each
21/// round using the deterministic PRNG.
22///
23/// # Parameters
24///
25/// - `graph` — The input graph.
26/// - `weights` — Optional edge weights (positive). `None` for unweighted.
27/// - `mode` — Direction mode for directed graphs.
28/// - `walks_per_vertex` — Number of walks to generate per vertex.
29/// - `walk_length` — Number of steps per walk.
30/// - `seed` — Deterministic PRNG seed.
31///
32/// # Returns
33///
34/// A `Vec<Vec<VertexId>>` where each inner vector is a walk (vertex
35/// sequence of length ≤ `walk_length + 1`). Total number of walks is
36/// `vcount * walks_per_vertex` (unless walks get stuck early).
37///
38/// # Examples
39///
40/// ```
41/// use rust_igraph::{Graph, random_walks, DijkstraMode};
42///
43/// let g = Graph::from_edges(
44///     &[(0,1),(1,2),(2,3),(3,0)], false, Some(4)
45/// ).unwrap();
46/// let corpus = random_walks(&g, None, DijkstraMode::Out, 2, 5, 42).unwrap();
47/// assert_eq!(corpus.len(), 8); // 4 vertices * 2 walks
48/// for walk in &corpus {
49///     assert!(walk.len() <= 6); // at most walk_length + 1
50///     assert!(walk[0] < 4);
51/// }
52/// ```
53pub fn random_walks(
54    graph: &Graph,
55    weights: Option<&[f64]>,
56    mode: DijkstraMode,
57    walks_per_vertex: u32,
58    walk_length: u32,
59    seed: u64,
60) -> IgraphResult<Vec<Vec<VertexId>>> {
61    let n = graph.vcount();
62    if n == 0 || walks_per_vertex == 0 || walk_length == 0 {
63        return Ok(Vec::new());
64    }
65
66    let total = u64::from(n)
67        .checked_mul(u64::from(walks_per_vertex))
68        .ok_or_else(|| {
69            crate::core::IgraphError::InvalidArgument(
70                "walks_per_vertex * vcount overflows u64".into(),
71            )
72        })?;
73
74    let capacity = usize::try_from(total).unwrap_or(usize::MAX);
75    let mut rng = SplitMix64::new(seed);
76    let mut corpus: Vec<Vec<VertexId>> = Vec::with_capacity(capacity);
77
78    let mut order: Vec<VertexId> = (0..n).collect();
79
80    for _ in 0..walks_per_vertex {
81        for i in (1..order.len()).rev() {
82            let j = rng.gen_index(i + 1);
83            order.swap(i, j);
84        }
85
86        for &start in &order {
87            let walk_seed = rng.next_u64();
88            let (vs, _) = random_walk(graph, weights, start, mode, walk_length, walk_seed)?;
89            corpus.push(vs);
90        }
91    }
92
93    Ok(corpus)
94}
95
96/// Generate multiple `Node2Vec` random walks from every vertex.
97///
98/// Same as [`random_walks`] but uses second-order biased walks with
99/// parameters `p` (return) and `q` (in-out). See
100/// [`random_walk_node2vec`] for details on the bias.
101///
102/// # Parameters
103///
104/// - `p` — Return parameter (higher = less backtracking).
105/// - `q` — In-out parameter (higher = more BFS-like).
106/// - Other parameters as in [`random_walks`].
107///
108/// # Examples
109///
110/// ```
111/// use rust_igraph::{Graph, random_walks_node2vec, DijkstraMode};
112///
113/// let g = Graph::from_edges(
114///     &[(0,1),(1,2),(2,3),(3,0),(0,2)], false, Some(4)
115/// ).unwrap();
116/// let corpus = random_walks_node2vec(
117///     &g, None, DijkstraMode::Out, 2, 5, 1.0, 1.0, 42
118/// ).unwrap();
119/// assert_eq!(corpus.len(), 8); // 4 vertices * 2 walks
120/// ```
121#[allow(clippy::too_many_arguments)]
122pub fn random_walks_node2vec(
123    graph: &Graph,
124    weights: Option<&[f64]>,
125    mode: DijkstraMode,
126    walks_per_vertex: u32,
127    walk_length: u32,
128    p: f64,
129    q: f64,
130    seed: u64,
131) -> IgraphResult<Vec<Vec<VertexId>>> {
132    let n = graph.vcount();
133    if n == 0 || walks_per_vertex == 0 || walk_length == 0 {
134        return Ok(Vec::new());
135    }
136
137    let total = u64::from(n)
138        .checked_mul(u64::from(walks_per_vertex))
139        .ok_or_else(|| {
140            crate::core::IgraphError::InvalidArgument(
141                "walks_per_vertex * vcount overflows u64".into(),
142            )
143        })?;
144
145    let capacity = usize::try_from(total).unwrap_or(usize::MAX);
146    let mut rng = SplitMix64::new(seed);
147    let mut corpus: Vec<Vec<VertexId>> = Vec::with_capacity(capacity);
148
149    let mut order: Vec<VertexId> = (0..n).collect();
150
151    for _ in 0..walks_per_vertex {
152        for i in (1..order.len()).rev() {
153            let j = rng.gen_index(i + 1);
154            order.swap(i, j);
155        }
156
157        for &start in &order {
158            let walk_seed = rng.next_u64();
159            let (vs, _) =
160                random_walk_node2vec(graph, weights, start, mode, walk_length, p, q, walk_seed)?;
161            corpus.push(vs);
162        }
163    }
164
165    Ok(corpus)
166}
167
168/// Generate random walks from a specific set of starting vertices.
169///
170/// Unlike [`random_walks`] which walks from every vertex, this function
171/// generates walks only from the provided `starts` list.
172///
173/// # Examples
174///
175/// ```
176/// use rust_igraph::{Graph, random_walks_from, DijkstraMode};
177///
178/// let g = Graph::from_edges(
179///     &[(0,1),(1,2),(2,3),(3,0)], false, Some(4)
180/// ).unwrap();
181/// let corpus = random_walks_from(
182///     &g, &[0, 2], None, DijkstraMode::Out, 3, 5, 42
183/// ).unwrap();
184/// assert_eq!(corpus.len(), 6); // 2 starts * 3 walks
185/// for walk in &corpus {
186///     assert!(walk[0] == 0 || walk[0] == 2);
187/// }
188/// ```
189pub fn random_walks_from(
190    graph: &Graph,
191    starts: &[VertexId],
192    weights: Option<&[f64]>,
193    mode: DijkstraMode,
194    walks_per_vertex: u32,
195    walk_length: u32,
196    seed: u64,
197) -> IgraphResult<Vec<Vec<VertexId>>> {
198    let n = graph.vcount();
199    if starts.is_empty() || walks_per_vertex == 0 || walk_length == 0 {
200        return Ok(Vec::new());
201    }
202
203    for &s in starts {
204        if s >= n {
205            return Err(crate::core::IgraphError::VertexOutOfRange { id: s, n });
206        }
207    }
208
209    let total = (starts.len() as u64)
210        .checked_mul(u64::from(walks_per_vertex))
211        .ok_or_else(|| {
212            crate::core::IgraphError::InvalidArgument(
213                "walks_per_vertex * starts.len() overflows u64".into(),
214            )
215        })?;
216
217    let capacity = usize::try_from(total).unwrap_or(usize::MAX);
218    let mut rng = SplitMix64::new(seed);
219    let mut corpus: Vec<Vec<VertexId>> = Vec::with_capacity(capacity);
220
221    let mut order: Vec<VertexId> = starts.to_vec();
222
223    for _ in 0..walks_per_vertex {
224        for i in (1..order.len()).rev() {
225            let j = rng.gen_index(i + 1);
226            order.swap(i, j);
227        }
228
229        for &start in &order {
230            let walk_seed = rng.next_u64();
231            let (vs, _) = random_walk(graph, weights, start, mode, walk_length, walk_seed)?;
232            corpus.push(vs);
233        }
234    }
235
236    Ok(corpus)
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    fn cycle_graph(n: u32) -> Graph {
244        let edges: Vec<(u32, u32)> = (0..n).map(|i| (i, (i + 1) % n)).collect();
245        Graph::from_edges(&edges, false, Some(n)).unwrap()
246    }
247
248    #[test]
249    fn random_walks_correct_count() {
250        let g = cycle_graph(5);
251        let corpus = random_walks(&g, None, DijkstraMode::Out, 3, 10, 42).unwrap();
252        assert_eq!(corpus.len(), 15); // 5 * 3
253    }
254
255    #[test]
256    fn random_walks_walk_length() {
257        let g = cycle_graph(4);
258        let corpus = random_walks(&g, None, DijkstraMode::Out, 1, 7, 42).unwrap();
259        for walk in &corpus {
260            assert_eq!(walk.len(), 8); // cycle never gets stuck
261        }
262    }
263
264    #[test]
265    fn random_walks_all_vertices_covered() {
266        let g = cycle_graph(6);
267        let corpus = random_walks(&g, None, DijkstraMode::Out, 1, 3, 42).unwrap();
268        let mut seen = [false; 6];
269        for walk in &corpus {
270            seen[walk[0] as usize] = true;
271        }
272        assert!(seen.iter().all(|&s| s));
273    }
274
275    #[test]
276    fn random_walks_deterministic() {
277        let g = cycle_graph(5);
278        let c1 = random_walks(&g, None, DijkstraMode::Out, 2, 5, 99).unwrap();
279        let c2 = random_walks(&g, None, DijkstraMode::Out, 2, 5, 99).unwrap();
280        assert_eq!(c1, c2);
281    }
282
283    #[test]
284    fn random_walks_empty_graph() {
285        let g = Graph::with_vertices(0);
286        let corpus = random_walks(&g, None, DijkstraMode::Out, 5, 10, 42).unwrap();
287        assert!(corpus.is_empty());
288    }
289
290    #[test]
291    fn random_walks_zero_walks() {
292        let g = cycle_graph(4);
293        let corpus = random_walks(&g, None, DijkstraMode::Out, 0, 10, 42).unwrap();
294        assert!(corpus.is_empty());
295    }
296
297    #[test]
298    fn random_walks_zero_length() {
299        let g = cycle_graph(4);
300        let corpus = random_walks(&g, None, DijkstraMode::Out, 2, 0, 42).unwrap();
301        assert!(corpus.is_empty());
302    }
303
304    #[test]
305    fn random_walks_node2vec_correct_count() {
306        let g = cycle_graph(5);
307        let corpus =
308            random_walks_node2vec(&g, None, DijkstraMode::Out, 2, 5, 1.0, 1.0, 42).unwrap();
309        assert_eq!(corpus.len(), 10); // 5 * 2
310    }
311
312    #[test]
313    fn random_walks_node2vec_deterministic() {
314        let g = cycle_graph(4);
315        let c1 = random_walks_node2vec(&g, None, DijkstraMode::Out, 3, 8, 2.0, 0.5, 77).unwrap();
316        let c2 = random_walks_node2vec(&g, None, DijkstraMode::Out, 3, 8, 2.0, 0.5, 77).unwrap();
317        assert_eq!(c1, c2);
318    }
319
320    #[test]
321    fn random_walks_from_specific_starts() {
322        let g = cycle_graph(6);
323        let corpus = random_walks_from(&g, &[1, 3, 5], None, DijkstraMode::Out, 2, 4, 42).unwrap();
324        assert_eq!(corpus.len(), 6); // 3 * 2
325        for walk in &corpus {
326            assert!(walk[0] == 1 || walk[0] == 3 || walk[0] == 5);
327        }
328    }
329
330    #[test]
331    fn random_walks_from_invalid_start() {
332        let g = cycle_graph(4);
333        let result = random_walks_from(&g, &[0, 10], None, DijkstraMode::Out, 1, 5, 42);
334        assert!(result.is_err());
335    }
336
337    #[test]
338    fn random_walks_from_empty_starts() {
339        let g = cycle_graph(4);
340        let corpus = random_walks_from(&g, &[], None, DijkstraMode::Out, 3, 5, 42).unwrap();
341        assert!(corpus.is_empty());
342    }
343
344    #[test]
345    fn random_walks_weighted() {
346        let mut g = Graph::with_vertices(3);
347        g.add_edge(0, 1).unwrap(); // edge 0
348        g.add_edge(1, 2).unwrap(); // edge 1
349        g.add_edge(0, 2).unwrap(); // edge 2
350        let weights = vec![10.0, 1.0, 0.001];
351        let corpus = random_walks(&g, Some(&weights), DijkstraMode::Out, 5, 3, 42).unwrap();
352        assert_eq!(corpus.len(), 15); // 3 * 5
353    }
354
355    #[test]
356    fn random_walks_directed() {
357        // Directed cycle: 0→1→2→3→0
358        let g = Graph::from_edges(&[(0, 1), (1, 2), (2, 3), (3, 0)], true, Some(4)).unwrap();
359        let corpus = random_walks(&g, None, DijkstraMode::Out, 2, 5, 42).unwrap();
360        assert_eq!(corpus.len(), 8);
361        // All walks from vertex v should follow the directed cycle
362        for walk in &corpus {
363            for i in 1..walk.len() {
364                assert_eq!(walk[i], (walk[i - 1] + 1) % 4);
365            }
366        }
367    }
368}