bookdata/openlib/
author.rs1use parquet_derive::ParquetRecordWriter;
3
4use crate::arrow::*;
5use crate::cleaning::names::clean_name;
6use crate::cleaning::strings::norm_unicode;
7use crate::prelude::*;
8
9use super::key::{parse_ol_key, KS_AUTHOR};
10pub use super::source::OLAuthorSource;
11use super::source::Row;
12
13#[derive(ParquetRecordWriter)]
15pub struct AuthorRec {
16 pub id: u32,
17 pub key: String,
18 pub name: Option<String>,
19}
20
21#[derive(ParquetRecordWriter)]
23pub struct AuthorNameRec {
24 pub id: u32,
25 pub source: u8,
26 pub name: String,
27}
28
29pub fn author_name_records(src: &OLAuthorSource, id: u32) -> Vec<AuthorNameRec> {
31 let mut names = Vec::new();
32
33 if let Some(n) = &src.name {
34 names.push(AuthorNameRec {
35 id,
36 source: b'n',
37 name: clean_name(&n),
38 });
39 }
40
41 if let Some(n) = &src.personal_name {
42 names.push(AuthorNameRec {
43 id,
44 source: b'p',
45 name: clean_name(&n),
46 });
47 }
48
49 for n in &src.alternate_names {
50 names.push(AuthorNameRec {
51 id,
52 source: b'a',
53 name: clean_name(&n),
54 });
55 }
56
57 names
58}
59
60pub struct AuthorProcessor {
62 rec_writer: TableWriter<AuthorRec>,
63 name_writer: TableWriter<AuthorNameRec>,
64}
65
66impl AuthorProcessor {
67 pub fn new() -> Result<AuthorProcessor> {
68 Ok(AuthorProcessor {
69 rec_writer: TableWriter::open("authors.parquet")?,
70 name_writer: TableWriter::open("author-names.parquet")?,
71 })
72 }
73}
74
75impl ObjectWriter<Row<OLAuthorSource>> for AuthorProcessor {
76 fn write_object(&mut self, row: Row<OLAuthorSource>) -> Result<()> {
77 let id = parse_ol_key(&row.key, KS_AUTHOR)?;
78
79 self.rec_writer.write_object(AuthorRec {
80 id,
81 key: row.key,
82 name: row
83 .record
84 .name
85 .as_ref()
86 .map(|s| norm_unicode(s).into_owned()),
87 })?;
88
89 for name in author_name_records(&row.record, id) {
90 self.name_writer.write_object(name)?;
91 }
92
93 Ok(())
94 }
95
96 fn finish(self) -> Result<usize> {
97 let nr = self.rec_writer.finish()?;
98 self.name_writer.finish()?;
99 Ok(nr)
100 }
101}