bookdata/graph/
load.rs

1use std::collections::HashMap;
2
3use anyhow::{anyhow, Result};
4use log::*;
5
6use crate::layout::Config;
7
8use super::sources::*;
9use super::{BookID, IdGraph, IdNode};
10use polars::prelude::*;
11
12type NodeMap = HashMap<i32, IdNode>;
13
14struct GraphBuilder {
15    graph: IdGraph,
16    nodes: NodeMap,
17}
18
19impl GraphBuilder {
20    fn add_vertices<R: NodeRead>(&mut self, src: R) -> Result<()> {
21        info!("scanning vertices from {:?}", src);
22        let node_df = src.read_node_ids()?;
23        debug!("node schema: {:?}", node_df.schema());
24        let mut node_df = node_df.collect()?;
25        let ninit = self.nodes.len();
26
27        // pull out the column to reduce memory
28        let code_s = node_df.drop_in_place("code")?;
29        let code_s = code_s.cast(&DataType::Int32)?;
30        let codes = code_s.i32()?;
31        let labels = node_df.column("label").ok().map(|c| c.str()).transpose()?;
32        for i in 0..codes.len() {
33            let code = codes.get(i).unwrap();
34            let label = labels.map(|c| c.get(i)).flatten();
35            let label = label.map(|s| s.to_string());
36            let entry = self.nodes.entry(code);
37            entry.or_insert_with(|| {
38                self.graph.add_node(BookID {
39                    code,
40                    label,
41                    cluster: 0,
42                })
43            });
44        }
45
46        info!(
47            "loaded {} new vertices from {:?}",
48            self.nodes.len() - ninit,
49            src
50        );
51
52        Ok(())
53    }
54
55    fn add_edges<R: EdgeRead>(&mut self, src: R) -> Result<()> {
56        info!("scanning edges from {:?}", src);
57        let edge_df = src.read_edges()?;
58        debug!("edge schema: {:?}", edge_df.schema());
59        let edge_df = edge_df.collect()?;
60        let src_s = edge_df.column("src")?.cast(&DataType::Int32)?;
61        let srcs = src_s.i32()?;
62        let dst_s = edge_df.column("dst")?.cast(&DataType::Int32)?;
63        let dsts = dst_s.i32()?;
64
65        let iter = srcs.into_iter().zip(dsts.into_iter());
66        let mut n = 0;
67
68        for pair in iter {
69            if let (Some(sn), Some(dn)) = pair {
70                let sid = self
71                    .nodes
72                    .get(&sn)
73                    .ok_or_else(|| anyhow!("unknown source node {}", sn))?;
74                let did = self
75                    .nodes
76                    .get(&dn)
77                    .ok_or_else(|| anyhow!("unknown destination node {}", sn))?;
78                self.graph.add_edge(*sid, *did, ());
79                n += 1;
80            }
81        }
82
83        info!("added {} edges from {:?}", n, src);
84
85        Ok(())
86    }
87}
88
89pub fn construct_graph(cfg: &Config) -> Result<IdGraph> {
90    let graph = IdGraph::new_undirected();
91    let nodes = NodeMap::new();
92    let mut gb = GraphBuilder { graph, nodes };
93
94    info!("loading nodes");
95    gb.add_vertices(ISBN)?;
96    gb.add_vertices(LOC)?;
97    gb.add_vertices(OLEditions)?;
98    gb.add_vertices(OLWorks)?;
99    if cfg.goodreads.enabled {
100        gb.add_vertices(GRBooks)?;
101        gb.add_vertices(GRWorks)?;
102    }
103
104    info!("loading edges");
105    gb.add_edges(LOC)?;
106    gb.add_edges(OLEditions)?;
107    gb.add_edges(OLWorks)?;
108    if cfg.goodreads.enabled {
109        gb.add_edges(GRBooks)?;
110        gb.add_edges(GRWorks)?;
111    }
112
113    let graph = gb.graph;
114    info!(
115        "graph has {} nodes, {} edges",
116        graph.node_count(),
117        graph.edge_count()
118    );
119    Ok(graph)
120}