bookdata/marc/
flat_fields.rs

1use std::path::Path;
2
3use anyhow::Result;
4use parquet_derive::{ParquetRecordReader, ParquetRecordWriter};
5
6use super::record::*;
7use crate::arrow::*;
8use crate::io::*;
9
10/// Flat MARC field record.
11#[derive(ParquetRecordWriter, ParquetRecordReader, Debug, Default)]
12pub struct FieldRecord {
13    pub rec_id: u32,
14    pub fld_no: u32,
15    pub tag: i16,
16    pub ind1: u8,
17    pub ind2: u8,
18    pub sf_code: u8,
19    pub contents: String,
20}
21
22/// Output for writing flat MARC fields to Parquet.
23pub struct FieldOutput {
24    rec_count: u32,
25    writer: TableWriter<FieldRecord>,
26}
27
28impl FieldOutput {
29    /// Create a new output.
30    pub fn new(writer: TableWriter<FieldRecord>) -> FieldOutput {
31        FieldOutput {
32            rec_count: 0,
33            writer,
34        }
35    }
36
37    /// Open a field output going to a file.
38    pub fn open<P: AsRef<Path>>(path: P) -> Result<FieldOutput> {
39        let writer = TableWriter::open(path)?;
40        Ok(Self::new(writer))
41    }
42}
43
44impl ObjectWriter<MARCRecord> for FieldOutput {
45    fn write_object(&mut self, rec: MARCRecord) -> Result<()> {
46        self.rec_count += 1;
47        let rec_id = self.rec_count;
48        let mut fld_no = 0;
49
50        // write the leader
51        self.writer.write_object(FieldRecord {
52            rec_id,
53            fld_no,
54            tag: -1,
55            ind1: 0.into(),
56            ind2: 0.into(),
57            sf_code: 0.into(),
58            contents: rec.leader,
59        })?;
60
61        // write the control fields
62        for cf in rec.control {
63            fld_no += 1;
64            self.writer.write_object(FieldRecord {
65                rec_id,
66                fld_no,
67                tag: cf.tag.into(),
68                ind1: 0.into(),
69                ind2: 0.into(),
70                sf_code: 0.into(),
71                contents: cf.content,
72            })?;
73        }
74
75        // write the data fields
76        for df in rec.fields {
77            for sf in df.subfields {
78                fld_no += 1;
79                self.writer.write_object(FieldRecord {
80                    rec_id,
81                    fld_no,
82                    tag: df.tag,
83                    ind1: df.ind1.into(),
84                    ind2: df.ind2.into(),
85                    sf_code: sf.code.into(),
86                    contents: sf.content,
87                })?;
88            }
89        }
90
91        Ok(())
92    }
93
94    fn finish_objects(self) -> Result<usize> {
95        self.writer.finish_objects()
96    }
97}