bookdata/cli/goodreads/
scan.rs

1use crate::goodreads::*;
2use crate::io::object::{ChunkWriter, ThreadObjectWriter, UnchunkWriter};
3use crate::prelude::*;
4use crate::util::logging::data_progress;
5use serde::de::DeserializeOwned;
6
7#[derive(clap::Subcommand, Debug)]
8pub enum GRScan {
9    /// Scan GoodReads works.
10    Works(ScanInput),
11    /// Scan GoodReads books.
12    Books(ScanInput),
13    /// Scan GoodReads genres.
14    Genres(ScanInput),
15    /// Scan GoodReads authors.
16    Authors(ScanInput),
17    /// Scan GoodReads interactions.
18    Interactions(ScanInput),
19    /// Scan GoodReads reviews.
20    Reviews(ScanInput),
21}
22
23#[derive(Args, Debug)]
24pub struct ScanInput {
25    /// Input file
26    #[arg(name = "INPUT")]
27    infile: PathBuf,
28}
29
30fn scan_gr<R, W>(path: &Path, proc: W) -> Result<()>
31where
32    W: ObjectWriter<R> + DataSink + Send + Sync + 'static,
33    R: DeserializeOwned + Send + Sync + 'static,
34{
35    let outs: Vec<_> = proc.output_files();
36
37    info!("reading data from {}", path.display());
38    let pb = data_progress(0);
39    let read = LineProcessor::open_gzip(path, pb.clone())?;
40    let proc = ChunkWriter::new(proc);
41    let writer = ThreadObjectWriter::wrap(proc).with_name("output").spawn();
42    let mut writer = UnchunkWriter::new(writer);
43    read.process_json(&mut writer)?;
44    pb.finish_and_clear();
45
46    writer.finish()?;
47
48    for out in outs {
49        let outf = out.as_path();
50        info!(
51            "output {} is {}",
52            outf.display(),
53            friendly::bytes(file_size(outf)?)
54        );
55    }
56
57    Ok(())
58}
59
60impl GRScan {
61    pub fn exec(&self) -> Result<()> {
62        match self {
63            GRScan::Works(opts) => {
64                info!("scanning GoodReads works");
65                scan_gr(&opts.infile, work::WorkWriter::open()?)?;
66            }
67            GRScan::Books(opts) => {
68                info!("scanning GoodReads books");
69                scan_gr(&opts.infile, book::BookWriter::open()?)?;
70            }
71            GRScan::Genres(opts) => {
72                info!("scanning GoodReads book genres");
73                scan_gr(&opts.infile, genres::BookGenreWriter::open()?)?;
74            }
75            GRScan::Authors(opts) => {
76                info!("scanning GoodReads book genres");
77                scan_gr(&opts.infile, author::AuthorWriter::open()?)?;
78            }
79            GRScan::Interactions(opts) => {
80                info!("scanning GoodReads interactions");
81                scan_gr(&opts.infile, interaction::IntWriter::open()?)?;
82            }
83            GRScan::Reviews(opts) => {
84                info!("scanning GoodReads reviews");
85                scan_gr(&opts.infile, review::ReviewWriter::open()?)?;
86            }
87        };
88
89        Ok(())
90    }
91}