bookdata/goodreads/
author.rs

1//! GoodReads work schemas and record processing.
2use parquet_derive::ParquetRecordWriter;
3use serde::Deserialize;
4
5use crate::arrow::*;
6use crate::parsing::*;
7use crate::prelude::*;
8
9const OUT_FILE: &'static str = "gr-author-info.parquet";
10
11/// Author records as parsed from JSON.
12#[derive(Deserialize)]
13pub struct RawAuthor {
14    pub author_id: String,
15    pub name: String,
16}
17
18/// Rows in the processed work Parquet table.
19#[derive(ParquetRecordWriter)]
20pub struct AuthorRecord {
21    pub author_id: i32,
22    pub name: Option<String>,
23}
24
25/// Object writer to transform and write GoodReads works
26pub struct AuthorWriter {
27    writer: TableWriter<AuthorRecord>,
28    n_recs: usize,
29}
30
31impl AuthorWriter {
32    /// Open a new output
33    pub fn open() -> Result<AuthorWriter> {
34        let writer = TableWriter::open(OUT_FILE)?;
35        Ok(AuthorWriter { writer, n_recs: 0 })
36    }
37}
38
39impl ObjectWriter<RawAuthor> for AuthorWriter {
40    fn write_object(&mut self, row: RawAuthor) -> Result<()> {
41        let author_id: i32 = row.author_id.parse()?;
42
43        self.writer.write_object(AuthorRecord {
44            author_id,
45            name: trim_owned(&row.name),
46        })?;
47
48        self.n_recs += 1;
49        Ok(())
50    }
51
52    fn finish_objects(self) -> Result<usize> {
53        self.writer.finish_objects()?;
54        Ok(self.n_recs)
55    }
56}