bookdata/cli/goodreads/
scan.rs

1use crate::goodreads::*;
2use crate::io::object::{ChunkWriter, ThreadObjectWriter, UnchunkWriter};
3use crate::prelude::*;
4use crate::util::logging::data_progress;
5use serde::de::DeserializeOwned;
6
7#[derive(clap::Subcommand, Debug)]
8pub enum GRScan {
9    /// Scan GoodReads works.
10    Works(ScanInput),
11    /// Scan GoodReads books.
12    Books(ScanInput),
13    /// Scan GoodReads genres.
14    Genres(ScanInput),
15    /// Scan GoodReads authors.
16    Authors(ScanInput),
17    /// Scan GoodReads interactions.
18    Interactions(ScanInput),
19    /// Scan GoodReads reviews.
20    Reviews(ScanInput),
21}
22
23#[derive(Args, Debug)]
24pub struct ScanInput {
25    /// Input file
26    #[arg(name = "INPUT")]
27    infile: PathBuf,
28}
29
30fn scan_gr<R, W>(path: &Path, proc: W) -> Result<()>
31where
32    W: ObjectWriter<R> + Send + Sync + 'static,
33    R: DeserializeOwned + Send + Sync + 'static,
34{
35    info!("reading data from {}", path.display());
36    let pb = data_progress(0);
37    let read = LineProcessor::open_gzip(path, pb.clone())?;
38    let proc = ChunkWriter::new(proc);
39    let writer = ThreadObjectWriter::wrap(proc).with_name("output").spawn();
40    let mut writer = UnchunkWriter::new(writer);
41    read.process_json(&mut writer)?;
42    pb.finish_and_clear();
43
44    writer.finish_objects()?;
45
46    Ok(())
47}
48
49impl GRScan {
50    pub fn exec(&self) -> Result<()> {
51        match self {
52            GRScan::Works(opts) => {
53                info!("scanning GoodReads works");
54                scan_gr(&opts.infile, work::WorkWriter::open()?)?;
55            }
56            GRScan::Books(opts) => {
57                info!("scanning GoodReads books");
58                scan_gr(&opts.infile, book::BookWriter::open()?)?;
59            }
60            GRScan::Genres(opts) => {
61                info!("scanning GoodReads book genres");
62                scan_gr(&opts.infile, genres::BookGenreWriter::open()?)?;
63            }
64            GRScan::Authors(opts) => {
65                info!("scanning GoodReads book genres");
66                scan_gr(&opts.infile, author::AuthorWriter::open()?)?;
67            }
68            GRScan::Interactions(opts) => {
69                info!("scanning GoodReads interactions");
70                scan_gr(&opts.infile, interaction::IntWriter::open()?)?;
71            }
72            GRScan::Reviews(opts) => {
73                info!("scanning GoodReads reviews");
74                scan_gr(&opts.infile, review::ReviewWriter::open()?)?;
75            }
76        };
77
78        Ok(())
79    }
80}