Skip to main content

rust_igraph/algorithms/io/
pajek.rs

1//! Pajek (.net) I/O (ALGO-IO-005).
2//!
3//! Reads and writes graphs in a subset of the Pajek `.net` format.
4//! The format has a `*Vertices N` header followed by optional vertex
5//! labels, then `*Edges` (undirected) or `*Arcs` (directed) sections
6//! listing edges with optional weights. Vertex labels are stored as the
7//! `"name"` vertex attribute and edge weights as the `"weight"` edge
8//! attribute via the [`Graph`] attribute system.
9//!
10//! ```text
11//! *Vertices 4
12//! 1 "Alice"
13//! 2 "Bob"
14//! 3 "Carol"
15//! 4 "Dave"
16//! *Edges
17//! 1 2 1.0
18//! 2 3
19//! 3 4 2.5
20//! ```
21//!
22//! Limitations:
23//! - Only `.net` files (not `.paj` project files)
24//! - No temporal networks
25//! - Mixed directed/undirected not supported
26//! - Bipartite mode not supported (vertex types ignored)
27//!
28//! Counterpart of `igraph_read_graph_pajek` / `igraph_write_graph_pajek`.
29
30use std::io::{BufRead, BufReader, Read, Write};
31
32use crate::core::attributes::AttributeValue;
33use crate::core::{Graph, IgraphError, IgraphResult};
34
35/// Result of reading a Pajek file.
36#[derive(Debug, Clone)]
37pub struct PajekGraph {
38    /// The parsed graph.
39    pub graph: Graph,
40    /// Vertex labels (one per vertex, in index order). `None` if no
41    /// vertex had a label.
42    pub labels: Option<Vec<String>>,
43    /// Edge weights (one per edge, in edge order). `None` if no edge
44    /// had a weight.
45    pub weights: Option<Vec<f64>>,
46}
47
48/// Read a graph from Pajek (`.net`) format.
49///
50/// Supports `*Vertices`, `*Edges` (undirected), and `*Arcs` (directed)
51/// sections. Vertex IDs in the file are 1-based.
52///
53/// # Examples
54///
55/// ```
56/// use rust_igraph::read_pajek;
57///
58/// let pajek = b"*Vertices 3\n1 \"A\"\n2 \"B\"\n3 \"C\"\n*Edges\n1 2\n2 3\n1 3\n";
59/// let result = read_pajek(&pajek[..]).unwrap();
60/// assert_eq!(result.graph.vcount(), 3);
61/// assert_eq!(result.graph.ecount(), 3);
62/// assert!(!result.graph.is_directed());
63/// ```
64#[allow(clippy::too_many_lines)]
65pub fn read_pajek<R: Read>(input: R) -> IgraphResult<PajekGraph> {
66    #[derive(PartialEq)]
67    enum Section {
68        None,
69        Vertices,
70        Edges,
71    }
72
73    let reader = BufReader::new(input);
74
75    let mut n_vertices: Option<u32> = None;
76    let mut directed = false;
77    let mut labels: Vec<String> = Vec::new();
78    let mut has_any_label = false;
79    let mut edges: Vec<(u32, u32)> = Vec::new();
80    let mut weights: Vec<f64> = Vec::new();
81    let mut has_any_weight = false;
82    let mut section = Section::None;
83
84    for (line_idx, line_result) in reader.lines().enumerate() {
85        let line = line_result?;
86        let trimmed = line.trim();
87
88        if trimmed.is_empty() || trimmed.starts_with('%') {
89            continue;
90        }
91
92        let lower = trimmed.to_ascii_lowercase();
93
94        // Section headers
95        if lower.starts_with("*vertices") {
96            let parts: Vec<&str> = trimmed.split_whitespace().collect();
97            if parts.len() < 2 {
98                return Err(IgraphError::Parse {
99                    line: line_idx.wrapping_add(1),
100                    message: "*Vertices line needs vertex count".into(),
101                });
102            }
103            let n: u32 = parts[1].parse().map_err(|e| IgraphError::Parse {
104                line: line_idx.wrapping_add(1),
105                message: format!("invalid vertex count: {e}"),
106            })?;
107            n_vertices = Some(n);
108            labels = vec![String::new(); n as usize];
109            section = Section::Vertices;
110            continue;
111        }
112
113        if lower.starts_with("*edges") || lower.starts_with("*edgeslist") {
114            directed = false;
115            section = Section::Edges;
116            continue;
117        }
118
119        if lower.starts_with("*arcs") || lower.starts_with("*arcslist") {
120            directed = true;
121            section = Section::Edges;
122            continue;
123        }
124
125        // Skip other section headers we don't handle
126        if lower.starts_with('*') {
127            section = Section::None;
128            continue;
129        }
130
131        match section {
132            Section::Vertices => {
133                // Format: ID "label" [x y z] [other attrs...]
134                // We only extract the ID and label
135                let (vid, label) = parse_vertex_line(trimmed, line_idx)?;
136                if let Some(n) = n_vertices {
137                    if vid == 0 || vid > n {
138                        return Err(IgraphError::Parse {
139                            line: line_idx.wrapping_add(1),
140                            message: format!("vertex id {vid} out of range [1, {n}]"),
141                        });
142                    }
143                }
144                if let Some(lbl) = label {
145                    has_any_label = true;
146                    let idx = (vid.wrapping_sub(1)) as usize;
147                    if idx < labels.len() {
148                        labels[idx] = lbl;
149                    }
150                }
151            }
152            Section::Edges => {
153                // Format: FROM TO [weight] [other attrs...]
154                let parts: Vec<&str> = trimmed.split_whitespace().collect();
155                if parts.len() < 2 {
156                    return Err(IgraphError::Parse {
157                        line: line_idx.wrapping_add(1),
158                        message: "edge line needs at least: FROM TO".into(),
159                    });
160                }
161                let from: u32 = parts[0].parse().map_err(|e| IgraphError::Parse {
162                    line: line_idx.wrapping_add(1),
163                    message: format!("invalid source id: {e}"),
164                })?;
165                let to: u32 = parts[1].parse().map_err(|e| IgraphError::Parse {
166                    line: line_idx.wrapping_add(1),
167                    message: format!("invalid target id: {e}"),
168                })?;
169
170                if from == 0 || to == 0 {
171                    return Err(IgraphError::Parse {
172                        line: line_idx.wrapping_add(1),
173                        message: "vertex IDs are 1-based in Pajek format".into(),
174                    });
175                }
176
177                edges.push((from.wrapping_sub(1), to.wrapping_sub(1)));
178
179                let weight = if parts.len() >= 3 {
180                    match parts[2].parse::<f64>() {
181                        Ok(w) => {
182                            has_any_weight = true;
183                            w
184                        }
185                        Err(_) => 0.0,
186                    }
187                } else {
188                    0.0
189                };
190                weights.push(weight);
191            }
192            Section::None => {
193                // Ignore lines in unknown sections
194            }
195        }
196    }
197
198    let n = n_vertices.ok_or_else(|| IgraphError::Parse {
199        line: 0,
200        message: "no *Vertices line found".into(),
201    })?;
202
203    let mut graph = Graph::new(n, directed)?;
204    graph.add_edges(edges)?;
205
206    // Store labels and weights as graph attributes for I/O interop
207    if has_any_label {
208        graph.set_vertex_attribute_all(
209            "name",
210            labels
211                .iter()
212                .map(|l| AttributeValue::String(l.clone()))
213                .collect(),
214        )?;
215    }
216    if has_any_weight {
217        graph.set_edge_attribute_all(
218            "weight",
219            weights
220                .iter()
221                .map(|&w| AttributeValue::Numeric(w))
222                .collect(),
223        )?;
224    }
225
226    Ok(PajekGraph {
227        graph,
228        labels: if has_any_label { Some(labels) } else { None },
229        weights: if has_any_weight { Some(weights) } else { None },
230    })
231}
232
233/// Write a graph in Pajek (`.net`) format.
234///
235/// Writes `*Vertices` section (with optional labels), then `*Edges`
236/// (undirected) or `*Arcs` (directed) section with optional weights.
237///
238/// When `labels` is `None`, falls back to the `"name"` vertex attribute
239/// if present. When `weights` is `None`, falls back to the `"weight"`
240/// edge attribute if present.
241///
242/// # Examples
243///
244/// ```
245/// use rust_igraph::{Graph, write_pajek, AttributeValue};
246///
247/// let mut g = Graph::with_vertices(3);
248/// g.add_edge(0, 1).unwrap();
249/// g.add_edge(1, 2).unwrap();
250/// g.set_vertex_attribute_all("name", vec![
251///     AttributeValue::String("A".into()),
252///     AttributeValue::String("B".into()),
253///     AttributeValue::String("C".into()),
254/// ]).unwrap();
255///
256/// let mut buf = Vec::new();
257/// write_pajek(&g, None, None, &mut buf).unwrap();
258/// let s = String::from_utf8(buf).unwrap();
259/// assert!(s.contains("*Vertices 3"));
260/// assert!(s.contains("\"A\""));
261/// assert!(s.contains("*Edges"));
262/// ```
263pub fn write_pajek<W: Write>(
264    graph: &Graph,
265    labels: Option<&[String]>,
266    weights: Option<&[f64]>,
267    writer: &mut W,
268) -> IgraphResult<()> {
269    if let Some(l) = labels {
270        if l.len() != graph.vcount() as usize {
271            return Err(IgraphError::InvalidArgument(format!(
272                "labels length {} does not match vcount {}",
273                l.len(),
274                graph.vcount()
275            )));
276        }
277    }
278    if let Some(w) = weights {
279        if w.len() != graph.ecount() {
280            return Err(IgraphError::InvalidArgument(format!(
281                "weights length {} does not match ecount {}",
282                w.len(),
283                graph.ecount()
284            )));
285        }
286    }
287
288    writeln!(writer, "*Vertices {}", graph.vcount())?;
289    for v in 0..graph.vcount() {
290        let label = if let Some(l) = labels {
291            format!("\"{}\"", escape_pajek_string(&l[v as usize]))
292        } else if let Some(name) = graph
293            .vertex_attribute("name", v)
294            .and_then(AttributeValue::as_str)
295        {
296            format!("\"{}\"", escape_pajek_string(name))
297        } else {
298            format!("\"{}\"", v + 1)
299        };
300        writeln!(writer, "{} {label}", v + 1)?;
301    }
302
303    if graph.is_directed() {
304        writeln!(writer, "*Arcs")?;
305    } else {
306        writeln!(writer, "*Edges")?;
307    }
308
309    for eid in 0..graph.ecount() {
310        #[allow(clippy::cast_possible_truncation)]
311        let eid_u32 = eid as u32;
312        let (from, to) = graph.edge(eid_u32)?;
313        if let Some(w) = weights {
314            writeln!(writer, "{} {} {}", from + 1, to + 1, w[eid])?;
315        } else if let Some(w) = graph
316            .edge_attribute("weight", eid_u32)
317            .and_then(AttributeValue::as_f64)
318        {
319            writeln!(writer, "{} {} {w}", from + 1, to + 1)?;
320        } else {
321            writeln!(writer, "{} {}", from + 1, to + 1)?;
322        }
323    }
324
325    Ok(())
326}
327
328fn parse_vertex_line(line: &str, line_idx: usize) -> IgraphResult<(u32, Option<String>)> {
329    let mut chars = line.chars().peekable();
330
331    // Skip leading whitespace
332    while chars.peek().is_some_and(|c| c.is_whitespace()) {
333        chars.next();
334    }
335
336    // Read vertex ID (digits)
337    let mut id_str = String::new();
338    while chars.peek().is_some_and(char::is_ascii_digit) {
339        let Some(c) = chars.next() else { break };
340        id_str.push(c);
341    }
342
343    if id_str.is_empty() {
344        return Err(IgraphError::Parse {
345            line: line_idx.wrapping_add(1),
346            message: "expected vertex ID".into(),
347        });
348    }
349
350    let vid: u32 = id_str.parse().map_err(|e| IgraphError::Parse {
351        line: line_idx.wrapping_add(1),
352        message: format!("invalid vertex id: {e}"),
353    })?;
354
355    // Skip whitespace
356    while chars.peek().is_some_and(|c| c.is_whitespace()) {
357        chars.next();
358    }
359
360    // Try to read a quoted label
361    let label = if chars.peek() == Some(&'"') {
362        chars.next(); // consume opening quote
363        let mut lbl = String::new();
364        loop {
365            match chars.next() {
366                Some('"') | None => break,
367                Some('\\') => {
368                    if let Some(c) = chars.next() {
369                        lbl.push(c);
370                    }
371                }
372                Some(c) => lbl.push(c),
373            }
374        }
375        Some(lbl)
376    } else {
377        // Unquoted label: take next non-whitespace token
378        let mut lbl = String::new();
379        while chars.peek().is_some_and(|c| !c.is_whitespace()) {
380            let Some(c) = chars.next() else { break };
381            lbl.push(c);
382        }
383        if lbl.is_empty() { None } else { Some(lbl) }
384    };
385
386    Ok((vid, label))
387}
388
389fn escape_pajek_string(s: &str) -> String {
390    let mut out = String::with_capacity(s.len());
391    for c in s.chars() {
392        match c {
393            '"' => out.push_str("\\\""),
394            '\\' => out.push_str("\\\\"),
395            _ => out.push(c),
396        }
397    }
398    out
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404
405    #[test]
406    fn test_basic_undirected() {
407        let input = b"*Vertices 3\n1 \"A\"\n2 \"B\"\n3 \"C\"\n*Edges\n1 2\n2 3\n1 3\n";
408        let result = read_pajek(&input[..]).unwrap();
409        assert_eq!(result.graph.vcount(), 3);
410        assert_eq!(result.graph.ecount(), 3);
411        assert!(!result.graph.is_directed());
412        let labels = result.labels.unwrap();
413        assert_eq!(labels, vec!["A", "B", "C"]);
414    }
415
416    #[test]
417    fn test_directed_arcs() {
418        let input = b"*Vertices 2\n*Arcs\n1 2\n2 1\n";
419        let result = read_pajek(&input[..]).unwrap();
420        assert!(result.graph.is_directed());
421        assert_eq!(result.graph.ecount(), 2);
422    }
423
424    #[test]
425    fn test_weighted_edges() {
426        let input = b"*Vertices 3\n*Edges\n1 2 1.5\n2 3 2.5\n";
427        let result = read_pajek(&input[..]).unwrap();
428        let w = result.weights.unwrap();
429        assert!((w[0] - 1.5).abs() < 1e-10);
430        assert!((w[1] - 2.5).abs() < 1e-10);
431    }
432
433    #[test]
434    fn test_no_labels() {
435        let input = b"*Vertices 2\n*Edges\n1 2\n";
436        let result = read_pajek(&input[..]).unwrap();
437        assert!(result.labels.is_none());
438    }
439
440    #[test]
441    fn test_no_weights() {
442        let input = b"*Vertices 2\n*Edges\n1 2\n";
443        let result = read_pajek(&input[..]).unwrap();
444        assert!(result.weights.is_none());
445    }
446
447    #[test]
448    fn test_mixed_weights() {
449        let input = b"*Vertices 3\n*Edges\n1 2 3.0\n2 3\n";
450        let result = read_pajek(&input[..]).unwrap();
451        let w = result.weights.unwrap();
452        assert!((w[0] - 3.0).abs() < 1e-10);
453        assert!((w[1] - 0.0).abs() < 1e-10);
454    }
455
456    #[test]
457    fn test_comment_lines_skipped() {
458        let input = b"% comment\n*Vertices 2\n% another\n*Edges\n1 2\n";
459        let result = read_pajek(&input[..]).unwrap();
460        assert_eq!(result.graph.vcount(), 2);
461        assert_eq!(result.graph.ecount(), 1);
462    }
463
464    #[test]
465    fn test_case_insensitive_sections() {
466        let input = b"*vertices 2\n*edges\n1 2\n";
467        let result = read_pajek(&input[..]).unwrap();
468        assert_eq!(result.graph.vcount(), 2);
469    }
470
471    #[test]
472    fn test_no_vertices_error() {
473        let input = b"*Edges\n1 2\n";
474        let result = read_pajek(&input[..]);
475        assert!(result.is_err());
476    }
477
478    #[test]
479    fn test_zero_vertex_id_error() {
480        let input = b"*Vertices 2\n*Edges\n0 1\n";
481        let result = read_pajek(&input[..]);
482        assert!(result.is_err());
483    }
484
485    #[test]
486    fn test_vertex_id_out_of_range_error() {
487        let input = b"*Vertices 2\n1 \"A\"\n5 \"E\"\n*Edges\n1 2\n";
488        let result = read_pajek(&input[..]);
489        assert!(result.is_err());
490    }
491
492    #[test]
493    fn test_self_loop() {
494        let input = b"*Vertices 2\n*Edges\n1 1\n";
495        let result = read_pajek(&input[..]).unwrap();
496        assert_eq!(result.graph.ecount(), 1);
497    }
498
499    #[test]
500    fn test_empty_graph() {
501        let input = b"*Vertices 5\n*Edges\n";
502        let result = read_pajek(&input[..]).unwrap();
503        assert_eq!(result.graph.vcount(), 5);
504        assert_eq!(result.graph.ecount(), 0);
505    }
506
507    #[test]
508    fn test_quoted_label_with_escape() {
509        let input = b"*Vertices 1\n1 \"hello \\\"world\\\"\"\n*Edges\n";
510        let result = read_pajek(&input[..]).unwrap();
511        let labels = result.labels.unwrap();
512        assert_eq!(labels[0], "hello \"world\"");
513    }
514
515    #[test]
516    fn test_write_undirected() {
517        let mut g = Graph::with_vertices(3);
518        g.add_edge(0, 1).unwrap();
519        g.add_edge(1, 2).unwrap();
520
521        let labels = vec!["A".to_string(), "B".to_string(), "C".to_string()];
522        let mut buf = Vec::new();
523        write_pajek(&g, Some(&labels), None, &mut buf).unwrap();
524        let s = String::from_utf8(buf).unwrap();
525        assert!(s.contains("*Vertices 3"));
526        assert!(s.contains("1 \"A\""));
527        assert!(s.contains("2 \"B\""));
528        assert!(s.contains("3 \"C\""));
529        assert!(s.contains("*Edges"));
530        assert!(s.contains("1 2\n"));
531        assert!(s.contains("2 3\n"));
532    }
533
534    #[test]
535    fn test_write_directed() {
536        let mut g = Graph::new(2, true).unwrap();
537        g.add_edge(0, 1).unwrap();
538
539        let mut buf = Vec::new();
540        write_pajek(&g, None, None, &mut buf).unwrap();
541        let s = String::from_utf8(buf).unwrap();
542        assert!(s.contains("*Arcs"));
543    }
544
545    #[test]
546    fn test_write_weighted() {
547        let mut g = Graph::with_vertices(2);
548        g.add_edge(0, 1).unwrap();
549
550        let weights = vec![4.5];
551        let mut buf = Vec::new();
552        write_pajek(&g, None, Some(&weights), &mut buf).unwrap();
553        let s = String::from_utf8(buf).unwrap();
554        assert!(s.contains("1 2 4.5"));
555    }
556
557    #[test]
558    fn test_write_labels_mismatch_error() {
559        let g = Graph::with_vertices(3);
560        let labels = vec!["A".to_string()];
561        let mut buf = Vec::new();
562        assert!(write_pajek(&g, Some(&labels), None, &mut buf).is_err());
563    }
564
565    #[test]
566    fn test_write_weights_mismatch_error() {
567        let mut g = Graph::with_vertices(2);
568        g.add_edge(0, 1).unwrap();
569        let weights = vec![1.0, 2.0];
570        let mut buf = Vec::new();
571        assert!(write_pajek(&g, None, Some(&weights), &mut buf).is_err());
572    }
573
574    #[test]
575    fn test_round_trip() {
576        let input = b"*Vertices 4\n1 \"Alice\"\n2 \"Bob\"\n3 \"Carol\"\n4 \"Dave\"\n*Edges\n1 2 1.0\n2 3 2.0\n3 4 3.0\n";
577        let result = read_pajek(&input[..]).unwrap();
578
579        let mut buf = Vec::new();
580        write_pajek(
581            &result.graph,
582            result.labels.as_deref(),
583            result.weights.as_deref(),
584            &mut buf,
585        )
586        .unwrap();
587
588        let result2 = read_pajek(&buf[..]).unwrap();
589        assert_eq!(result2.graph.vcount(), result.graph.vcount());
590        assert_eq!(result2.graph.ecount(), result.graph.ecount());
591        assert_eq!(result2.labels, result.labels);
592    }
593
594    #[test]
595    fn test_blank_lines_skipped() {
596        let input = b"\n*Vertices 2\n\n1 \"X\"\n\n*Edges\n\n1 2\n\n";
597        let result = read_pajek(&input[..]).unwrap();
598        assert_eq!(result.graph.vcount(), 2);
599        assert_eq!(result.graph.ecount(), 1);
600    }
601
602    // --- attribute integration tests ---
603
604    #[test]
605    fn read_stores_name_attribute() {
606        let input = b"*Vertices 3\n1 \"Alice\"\n2 \"Bob\"\n3 \"Carol\"\n*Edges\n1 2\n";
607        let result = read_pajek(&input[..]).unwrap();
608        assert_eq!(
609            result
610                .graph
611                .vertex_attribute("name", 0)
612                .and_then(AttributeValue::as_str),
613            Some("Alice"),
614        );
615        assert_eq!(
616            result
617                .graph
618                .vertex_attribute("name", 2)
619                .and_then(AttributeValue::as_str),
620            Some("Carol"),
621        );
622    }
623
624    #[test]
625    fn read_stores_weight_attribute() {
626        let input = b"*Vertices 3\n*Edges\n1 2 1.5\n2 3 2.5\n";
627        let result = read_pajek(&input[..]).unwrap();
628        assert_eq!(
629            result
630                .graph
631                .edge_attribute("weight", 0)
632                .and_then(AttributeValue::as_f64),
633            Some(1.5),
634        );
635        assert_eq!(
636            result
637                .graph
638                .edge_attribute("weight", 1)
639                .and_then(AttributeValue::as_f64),
640            Some(2.5),
641        );
642    }
643
644    #[test]
645    fn read_no_labels_no_name_attribute() {
646        let input = b"*Vertices 2\n*Edges\n1 2\n";
647        let result = read_pajek(&input[..]).unwrap();
648        assert!(!result.graph.has_vertex_attribute("name"));
649    }
650
651    #[test]
652    fn write_uses_name_attribute_fallback() {
653        let mut g = Graph::with_vertices(2);
654        g.add_edge(0, 1).unwrap();
655        g.set_vertex_attribute_all(
656            "name",
657            vec![
658                AttributeValue::String("X".into()),
659                AttributeValue::String("Y".into()),
660            ],
661        )
662        .unwrap();
663
664        let mut buf = Vec::new();
665        write_pajek(&g, None, None, &mut buf).unwrap();
666        let s = String::from_utf8(buf).unwrap();
667        assert!(s.contains("\"X\""));
668        assert!(s.contains("\"Y\""));
669    }
670
671    #[test]
672    fn write_uses_weight_attribute_fallback() {
673        let mut g = Graph::with_vertices(2);
674        g.add_edge(0, 1).unwrap();
675        g.set_edge_attribute_all("weight", vec![AttributeValue::Numeric(4.25)])
676            .unwrap();
677
678        let mut buf = Vec::new();
679        write_pajek(&g, None, None, &mut buf).unwrap();
680        let s = String::from_utf8(buf).unwrap();
681        assert!(s.contains("1 2 4.25"));
682    }
683
684    #[test]
685    fn roundtrip_with_attributes() {
686        let input = b"*Vertices 3\n1 \"Alice\"\n2 \"Bob\"\n3 \"Carol\"\n*Edges\n1 2 1.5\n2 3 2.0\n";
687        let result = read_pajek(&input[..]).unwrap();
688
689        // Write using attribute fallback (no explicit labels/weights)
690        let mut buf = Vec::new();
691        write_pajek(&result.graph, None, None, &mut buf).unwrap();
692
693        let result2 = read_pajek(&buf[..]).unwrap();
694        assert_eq!(result2.graph.vcount(), 3);
695        assert_eq!(result2.graph.ecount(), 2);
696        assert_eq!(
697            result2
698                .graph
699                .vertex_attribute("name", 0)
700                .and_then(AttributeValue::as_str),
701            Some("Alice"),
702        );
703        assert_eq!(
704            result2
705                .graph
706                .edge_attribute("weight", 0)
707                .and_then(AttributeValue::as_f64),
708            Some(1.5),
709        );
710    }
711}