bookdata/cli/goodreads/
scan.rs1use crate::goodreads::*;
2use crate::io::object::{ChunkWriter, ThreadObjectWriter, UnchunkWriter};
3use crate::prelude::*;
4use crate::util::logging::data_progress;
5use serde::de::DeserializeOwned;
6
7#[derive(clap::Subcommand, Debug)]
8pub enum GRScan {
9 Works(ScanInput),
11 Books(ScanInput),
13 Genres(ScanInput),
15 Authors(ScanInput),
17 Interactions(ScanInput),
19 Reviews(ScanInput),
21}
22
23#[derive(Args, Debug)]
24pub struct ScanInput {
25 #[arg(name = "INPUT")]
27 infile: PathBuf,
28}
29
30fn scan_gr<R, W>(path: &Path, proc: W) -> Result<()>
31where
32 W: ObjectWriter<R> + Send + Sync + 'static,
33 R: DeserializeOwned + Send + Sync + 'static,
34{
35 info!("reading data from {}", path.display());
36 let pb = data_progress(0);
37 let read = LineProcessor::open_gzip(path, pb.clone())?;
38 let proc = ChunkWriter::new(proc);
39 let writer = ThreadObjectWriter::wrap(proc).with_name("output").spawn();
40 let mut writer = UnchunkWriter::new(writer);
41 read.process_json(&mut writer)?;
42 pb.finish_and_clear();
43
44 writer.finish_objects()?;
45
46 Ok(())
47}
48
49impl GRScan {
50 pub fn exec(&self) -> Result<()> {
51 match self {
52 GRScan::Works(opts) => {
53 info!("scanning GoodReads works");
54 scan_gr(&opts.infile, work::WorkWriter::open()?)?;
55 }
56 GRScan::Books(opts) => {
57 info!("scanning GoodReads books");
58 scan_gr(&opts.infile, book::BookWriter::open()?)?;
59 }
60 GRScan::Genres(opts) => {
61 info!("scanning GoodReads book genres");
62 scan_gr(&opts.infile, genres::BookGenreWriter::open()?)?;
63 }
64 GRScan::Authors(opts) => {
65 info!("scanning GoodReads book genres");
66 scan_gr(&opts.infile, author::AuthorWriter::open()?)?;
67 }
68 GRScan::Interactions(opts) => {
69 info!("scanning GoodReads interactions");
70 scan_gr(&opts.infile, interaction::IntWriter::open()?)?;
71 }
72 GRScan::Reviews(opts) => {
73 info!("scanning GoodReads reviews");
74 scan_gr(&opts.infile, review::ReviewWriter::open()?)?;
75 }
76 };
77
78 Ok(())
79 }
80}