bookdata/cli/bx/
cluster.rs

1//! BookCrossing interaction clustering.
2use std::path::PathBuf;
3
4use crate::prelude::*;
5use polars::prelude::*;
6
7#[derive(Args, Debug)]
8#[command(name = "cluster-actions")]
9pub struct Cluster {
10    /// Cluster ratings.
11    #[arg(long = "ratings")]
12    ratings: bool,
13
14    /// Cluster actions (implicit feedback).
15    #[arg(long = "add-actions")]
16    add_actions: bool,
17
18    /// The output file.
19    #[arg(short = 'o', long = "output", name = "FILE")]
20    outfile: PathBuf,
21}
22
23impl Command for Cluster {
24    fn exec(&self) -> Result<()> {
25        if !self.ratings && !self.add_actions {
26            error!("one of --ratings or --add-actions must be specified");
27            return Err(anyhow!("no mode specified"));
28        }
29        require_working_dir("bx")?;
30
31        let isbns = LazyFrame::scan_parquet("../book-links/isbn-clusters.parquet", default())?;
32        let isbns = isbns.select(&[col("isbn"), col("cluster")]);
33
34        let ratings = LazyCsvReader::new("cleaned-ratings.csv")
35            .has_header(true)
36            .finish()?;
37        let ratings = if self.ratings {
38            ratings.filter(col("rating").gt(0))
39        } else {
40            ratings
41        };
42        let joined = ratings.join(
43            isbns,
44            &[col("isbn")],
45            &[col("isbn")],
46            JoinType::Inner.into(),
47        );
48        let grouped = joined.group_by(&[col("user_id"), col("cluster").alias("item_id")]);
49        let agg = if self.ratings {
50            grouped.agg(&[
51                col("rating").median().alias("rating"),
52                col("cluster").count().alias("nratings"),
53            ])
54        } else {
55            grouped.agg(&[col("cluster").count().alias("nactions")])
56        };
57
58        info!("collecting results");
59        let results = agg.collect()?;
60
61        info!("writing to {:?}", &self.outfile);
62        save_df_parquet(results, &self.outfile)?;
63
64        Ok(())
65    }
66}