bookdata/marc/
flat_fields.rs

1use std::path::Path;
2
3use anyhow::Result;
4use parquet_derive::{ParquetRecordReader, ParquetRecordWriter};
5
6use super::record::*;
7use crate::arrow::*;
8use crate::io::*;
9
10/// Flat MARC field record.
11#[derive(ParquetRecordWriter, ParquetRecordReader, Debug, Default)]
12pub struct FieldRecord {
13    pub rec_id: u32,
14    pub fld_no: u32,
15    pub tag: i16,
16    pub ind1: u8,
17    pub ind2: u8,
18    pub sf_code: u8,
19    pub contents: String,
20}
21
22/// Output for writing flat MARC fields to Parquet.
23pub struct FieldOutput {
24    rec_count: u32,
25    writer: TableWriter<FieldRecord>,
26}
27
28impl FieldOutput {
29    /// Create a new output.
30    pub fn new(writer: TableWriter<FieldRecord>) -> FieldOutput {
31        FieldOutput {
32            rec_count: 0,
33            writer,
34        }
35    }
36
37    /// Open a field output going to a file.
38    pub fn open<P: AsRef<Path>>(path: P) -> Result<FieldOutput> {
39        let writer = TableWriter::open(path)?;
40        Ok(Self::new(writer))
41    }
42}
43
44impl DataSink for FieldOutput {
45    fn output_files(&self) -> Vec<std::path::PathBuf> {
46        self.writer.output_files()
47    }
48}
49
50impl ObjectWriter<MARCRecord> for FieldOutput {
51    fn write_object(&mut self, rec: MARCRecord) -> Result<()> {
52        self.rec_count += 1;
53        let rec_id = self.rec_count;
54        let mut fld_no = 0;
55
56        // write the leader
57        self.writer.write_object(FieldRecord {
58            rec_id,
59            fld_no,
60            tag: -1,
61            ind1: 0.into(),
62            ind2: 0.into(),
63            sf_code: 0.into(),
64            contents: rec.leader,
65        })?;
66
67        // write the control fields
68        for cf in rec.control {
69            fld_no += 1;
70            self.writer.write_object(FieldRecord {
71                rec_id,
72                fld_no,
73                tag: cf.tag.into(),
74                ind1: 0.into(),
75                ind2: 0.into(),
76                sf_code: 0.into(),
77                contents: cf.content,
78            })?;
79        }
80
81        // write the data fields
82        for df in rec.fields {
83            for sf in df.subfields {
84                fld_no += 1;
85                self.writer.write_object(FieldRecord {
86                    rec_id,
87                    fld_no,
88                    tag: df.tag,
89                    ind1: df.ind1.into(),
90                    ind2: df.ind2.into(),
91                    sf_code: sf.code.into(),
92                    contents: sf.content,
93                })?;
94            }
95        }
96
97        Ok(())
98    }
99
100    fn finish(self) -> Result<usize> {
101        self.writer.finish()
102    }
103}