bookdata/cli/goodreads/
scan.rs1use crate::goodreads::*;
2use crate::io::object::{ChunkWriter, ThreadObjectWriter, UnchunkWriter};
3use crate::prelude::*;
4use crate::util::logging::data_progress;
5use serde::de::DeserializeOwned;
6
7#[derive(clap::Subcommand, Debug)]
8pub enum GRScan {
9 Works(ScanInput),
11 Books(ScanInput),
13 Genres(ScanInput),
15 Authors(ScanInput),
17 Interactions(ScanInput),
19 Reviews(ScanInput),
21}
22
23#[derive(Args, Debug)]
24pub struct ScanInput {
25 #[arg(name = "INPUT")]
27 infile: PathBuf,
28}
29
30fn scan_gr<R, W>(path: &Path, proc: W) -> Result<()>
31where
32 W: ObjectWriter<R> + DataSink + Send + Sync + 'static,
33 R: DeserializeOwned + Send + Sync + 'static,
34{
35 let outs: Vec<_> = proc.output_files();
36
37 info!("reading data from {}", path.display());
38 let pb = data_progress(0);
39 let read = LineProcessor::open_gzip(path, pb.clone())?;
40 let proc = ChunkWriter::new(proc);
41 let writer = ThreadObjectWriter::wrap(proc).with_name("output").spawn();
42 let mut writer = UnchunkWriter::new(writer);
43 read.process_json(&mut writer)?;
44 pb.finish_and_clear();
45
46 writer.finish()?;
47
48 for out in outs {
49 let outf = out.as_path();
50 info!(
51 "output {} is {}",
52 outf.display(),
53 friendly::bytes(file_size(outf)?)
54 );
55 }
56
57 Ok(())
58}
59
60impl GRScan {
61 pub fn exec(&self) -> Result<()> {
62 match self {
63 GRScan::Works(opts) => {
64 info!("scanning GoodReads works");
65 scan_gr(&opts.infile, work::WorkWriter::open()?)?;
66 }
67 GRScan::Books(opts) => {
68 info!("scanning GoodReads books");
69 scan_gr(&opts.infile, book::BookWriter::open()?)?;
70 }
71 GRScan::Genres(opts) => {
72 info!("scanning GoodReads book genres");
73 scan_gr(&opts.infile, genres::BookGenreWriter::open()?)?;
74 }
75 GRScan::Authors(opts) => {
76 info!("scanning GoodReads book genres");
77 scan_gr(&opts.infile, author::AuthorWriter::open()?)?;
78 }
79 GRScan::Interactions(opts) => {
80 info!("scanning GoodReads interactions");
81 scan_gr(&opts.infile, interaction::IntWriter::open()?)?;
82 }
83 GRScan::Reviews(opts) => {
84 info!("scanning GoodReads reviews");
85 scan_gr(&opts.infile, review::ReviewWriter::open()?)?;
86 }
87 };
88
89 Ok(())
90 }
91}