bookdata/goodreads/
work.rs1use parquet_derive::ParquetRecordWriter;
3use serde::Deserialize;
4
5use crate::arrow::*;
6use crate::parsing::*;
7use crate::prelude::*;
8
9const OUT_FILE: &'static str = "gr-work-info.parquet";
10
11#[derive(Deserialize)]
13pub struct RawWork {
14 pub work_id: String,
15 #[serde(default)]
16 pub original_title: String,
17 #[serde(default)]
18 pub original_publication_year: String,
19 #[serde(default)]
20 pub original_publication_month: String,
21 #[serde(default)]
22 #[allow(unused)]
23 pub original_publication_day: String,
24}
25
26#[derive(ParquetRecordWriter)]
28pub struct WorkRecord {
29 pub work_id: i32,
30 pub title: Option<String>,
31 pub pub_year: Option<i16>,
32 pub pub_month: Option<u8>,
33}
34
35pub struct WorkWriter {
37 writer: TableWriter<WorkRecord>,
38 n_recs: usize,
39}
40
41impl WorkWriter {
42 pub fn open() -> Result<WorkWriter> {
44 let writer = TableWriter::open(OUT_FILE)?;
45 Ok(WorkWriter { writer, n_recs: 0 })
46 }
47}
48
49impl ObjectWriter<RawWork> for WorkWriter {
50 fn write_object(&mut self, row: RawWork) -> Result<()> {
51 let work_id: i32 = row.work_id.parse()?;
52
53 let pub_year = parse_opt(&row.original_publication_year)?;
54 let pub_month = parse_opt(&row.original_publication_month)?;
55
56 self.writer.write_object(WorkRecord {
57 work_id,
58 title: trim_owned(&row.original_title),
59 pub_year,
60 pub_month,
61 })?;
62 self.n_recs += 1;
63 Ok(())
64 }
65
66 fn finish_objects(self) -> Result<usize> {
67 self.writer.finish_objects()?;
68 Ok(self.n_recs)
69 }
70}