1use std::collections::HashMap;
2
3use anyhow::{anyhow, Result};
4use log::*;
5
6use crate::layout::Config;
7
8use super::sources::*;
9use super::{BookID, IdGraph, IdNode};
10use polars::prelude::*;
11
12type NodeMap = HashMap<i32, IdNode>;
13
14struct GraphBuilder {
15 graph: IdGraph,
16 nodes: NodeMap,
17}
18
19impl GraphBuilder {
20 fn add_vertices<R: NodeRead>(&mut self, src: R) -> Result<()> {
21 info!("scanning vertices from {:?}", src);
22 let node_df = src.read_node_ids()?;
23 debug!("node schema: {:?}", node_df.schema());
24 let mut node_df = node_df.collect()?;
25 let ninit = self.nodes.len();
26
27 let code_s = node_df.drop_in_place("code")?;
29 let code_s = code_s.cast(&DataType::Int32)?;
30 let codes = code_s.i32()?;
31 let labels = node_df.column("label").ok().map(|c| c.str()).transpose()?;
32 for i in 0..codes.len() {
33 let code = codes.get(i).unwrap();
34 let label = labels.map(|c| c.get(i)).flatten();
35 let label = label.map(|s| s.to_string());
36 let entry = self.nodes.entry(code);
37 entry.or_insert_with(|| {
38 self.graph.add_node(BookID {
39 code,
40 label,
41 cluster: 0,
42 })
43 });
44 }
45
46 info!(
47 "loaded {} new vertices from {:?}",
48 self.nodes.len() - ninit,
49 src
50 );
51
52 Ok(())
53 }
54
55 fn add_edges<R: EdgeRead>(&mut self, src: R) -> Result<()> {
56 info!("scanning edges from {:?}", src);
57 let edge_df = src.read_edges()?;
58 debug!("edge schema: {:?}", edge_df.schema());
59 let edge_df = edge_df.collect()?;
60 let src_s = edge_df.column("src")?.cast(&DataType::Int32)?;
61 let srcs = src_s.i32()?;
62 let dst_s = edge_df.column("dst")?.cast(&DataType::Int32)?;
63 let dsts = dst_s.i32()?;
64
65 let iter = srcs.into_iter().zip(dsts.into_iter());
66 let mut n = 0;
67
68 for pair in iter {
69 if let (Some(sn), Some(dn)) = pair {
70 let sid = self
71 .nodes
72 .get(&sn)
73 .ok_or_else(|| anyhow!("unknown source node {}", sn))?;
74 let did = self
75 .nodes
76 .get(&dn)
77 .ok_or_else(|| anyhow!("unknown destination node {}", sn))?;
78 self.graph.add_edge(*sid, *did, ());
79 n += 1;
80 }
81 }
82
83 info!("added {} edges from {:?}", n, src);
84
85 Ok(())
86 }
87}
88
89pub fn construct_graph(cfg: &Config) -> Result<IdGraph> {
90 let graph = IdGraph::new_undirected();
91 let nodes = NodeMap::new();
92 let mut gb = GraphBuilder { graph, nodes };
93
94 info!("loading nodes");
95 gb.add_vertices(ISBN)?;
96 gb.add_vertices(LOC)?;
97 gb.add_vertices(OLEditions)?;
98 gb.add_vertices(OLWorks)?;
99 if cfg.goodreads.enabled {
100 gb.add_vertices(GRBooks)?;
101 gb.add_vertices(GRWorks)?;
102 }
103
104 info!("loading edges");
105 gb.add_edges(LOC)?;
106 gb.add_edges(OLEditions)?;
107 gb.add_edges(OLWorks)?;
108 if cfg.goodreads.enabled {
109 gb.add_edges(GRBooks)?;
110 gb.add_edges(GRWorks)?;
111 }
112
113 let graph = gb.graph;
114 info!(
115 "graph has {} nodes, {} edges",
116 graph.node_count(),
117 graph.edge_count()
118 );
119 Ok(graph)
120}