bookdata/cleaning/names/
parse.rs

1//! PEG parser for name variants.
2
3use super::types::*;
4
5peg::parser! {
6    grammar name_parser() for str {
7        rule space() = quiet!{[' ' | '\n' | '\r' | '\t']}
8        rule digit() = quiet!{['0'..='9']}
9
10        rule trailing_junk() = [',' | '.'] space()* ![_]
11
12        rule year_range() -> String
13        = range:$(digit()+ "-" digit()*) { range.to_owned() }
14
15        rule year_tag() -> String
16        = (space()* ",")? space()* "("? y:year_range() ")"? {
17            y
18        }
19
20        rule ending() -> Option<String>
21        = trailing_junk() { None }
22        / y:year_tag() { Some(y) }
23
24        rule cs_name() -> NameFmt
25        = last:$([^',']*) "," space()* rest:$(([_] !ending())* [^',']?) {
26            if rest.trim().is_empty() {
27                NameFmt::Single(last.trim().to_owned())
28            } else {
29                NameFmt::TwoPart(last.trim().to_owned(), rest.trim().to_owned())
30            }
31        }
32
33        rule single_name() -> NameFmt
34        = name:$(([_] !ending())* [^',' | '.']?) { NameFmt::Single(name.trim().to_owned()) }
35
36        rule name() -> NameFmt = ("!!!" ['a'..='z' | 'A'..='Z']+ "!!!" space()*)? n:(cs_name() / single_name()) { n }
37
38        #[no_eof]
39        pub rule name_entry() -> NameEntry
40        = year:year_tag() { NameEntry { name:NameFmt::Empty, year: Some(year) } }
41        / name:name() year:ending()? { NameEntry { name, year: year.flatten() } }
42    }
43}
44
45pub fn parse_name_entry(name: &str) -> Result<NameEntry, NameError> {
46    let res = name_parser::name_entry(name)?;
47    Ok(res)
48}