bookdata/cli/
mod.rs

1//! Command line interface for book data.
2//!
3//! The book data tools are implemented as a single monolithic executable (to reduce
4//! compilation time and disk space in common configurations, with different tools
5//! implemented as subcommands.  Each subcommand implements the [Command] trait, which
6//! exposes the command line arguments and invocation.
7pub mod amazon;
8pub mod bx;
9pub mod cluster;
10pub mod cluster_books;
11pub mod collect_isbns;
12pub mod extract_graph;
13pub mod filter_marc;
14pub mod goodreads;
15pub mod index_names;
16pub mod kcore;
17pub mod link_isbns;
18pub mod openlib;
19pub mod pqinfo;
20pub mod scan_marc;
21pub mod stats;
22
23use anyhow::Result;
24use clap::{Parser, Subcommand};
25use cpu_time::ProcessTime;
26use enum_dispatch::enum_dispatch;
27use happylog::clap::LogOpts;
28use log::*;
29use paste::paste;
30
31use crate::util::process;
32use crate::util::Timer;
33
34/// Macro to generate wrappers for subcommand enums.
35///
36/// This is for subcommands that only exist to contain further subcommands,
37/// to make it easier to implement their wrapper classes.
38macro_rules! wrap_subcommands {
39    ($name:ty) => {
40        paste! {
41          #[derive(clap::Args, Debug)]
42          pub struct [<$name Wrapper>] {
43            #[command(subcommand)]
44            command: $name
45          }
46
47          impl Command for [<$name Wrapper>] {
48            fn exec(&self) -> Result<()> {
49              self.command.exec()
50            }
51          }
52        }
53    };
54}
55
56/// Trait implemented by book data commands.
57#[enum_dispatch]
58pub trait Command {
59    /// Run the command with options
60    fn exec(&self) -> Result<()>;
61}
62
63/// Enum to collect and dispatch CLI commands.
64#[enum_dispatch(Command)]
65#[derive(Subcommand, Debug)]
66pub enum RootCommand {
67    ScanMARC(scan_marc::ScanMARC),
68    FilterMARC(filter_marc::FilterMARC),
69    ClusterBooks(cluster_books::ClusterBooks),
70    IndexNames(index_names::IndexNames),
71    ExtractGraph(extract_graph::ExtractGraph),
72    CollectISBNS(collect_isbns::CollectISBNs),
73    LinkISBNIds(link_isbns::LinkISBNIds),
74    /// Commands for processing Amazon data.
75    Amazon(AmazonCommandWrapper),
76    /// Commands for processing OpenLibrary data.
77    Openlib(openlib::OpenLib),
78    /// Commands for processing BookCrossing data.
79    BX(BXCommandWrapper),
80    /// Commands for processing GoodReads data.
81    Goodreads(goodreads::Goodreads),
82    /// Commands for working with clusters.
83    Cluster(ClusterCommandWrapper),
84    Kcore(kcore::Kcore),
85    PQInfo(pqinfo::PQInfo),
86    IntegrationStats(stats::IntegrationStats),
87}
88
89wrap_subcommands!(AmazonCommand);
90wrap_subcommands!(BXCommand);
91wrap_subcommands!(ClusterCommand);
92
93#[enum_dispatch(Command)]
94#[derive(Subcommand, Debug)]
95enum AmazonCommand {
96    ScanRatings(amazon::ScanRatings),
97    ScanReviews(amazon::ScanReviews),
98    ClusterRatings(amazon::ClusterRatings),
99}
100
101#[enum_dispatch(Command)]
102#[derive(Subcommand, Debug)]
103enum BXCommand {
104    /// Extract BX from source data and clean.
105    Extract(bx::Extract),
106    /// Match BX interactions with clusters.
107    ClusterActions(bx::Cluster),
108}
109
110#[enum_dispatch(Command)]
111#[derive(Subcommand, Debug)]
112pub enum ClusterCommand {
113    Hash(cluster::hash::HashCmd),
114    ExtractBooks(cluster::books::ExtractBooks),
115    ExtractAuthors(cluster::authors::ClusterAuthors),
116    ExtractAuthorGender(cluster::author_gender::AuthorGender),
117}
118
119/// Entry point for the Book Data Tools.
120///
121/// This program runs the various book data tools, exposed as subcommands.
122#[derive(Parser, Debug)]
123#[command(name = "bookdata")]
124pub struct CLI {
125    #[command(flatten)]
126    logging: LogOpts,
127
128    #[command(subcommand)]
129    command: RootCommand,
130}
131
132impl CLI {
133    pub fn exec(self) -> Result<()> {
134        self.logging.init()?;
135
136        process::maybe_exit_early()?;
137
138        let timer = Timer::new();
139
140        let res = self.command.exec();
141        if let Err(e) = &res {
142            error!("command failed: {}", e);
143            return res;
144        }
145
146        info!("work completed in {}", timer.human_elapsed());
147        match ProcessTime::try_now() {
148            Ok(pt) => info!("used {} CPU time", friendly::duration(pt.as_duration())),
149            Err(e) => error!("error fetching CPU time: {}", e),
150        };
151
152        process::log_process_stats();
153        res
154    }
155}