bookdata/util/unicode/
mod.rs

1#[allow(dead_code)]
2mod tables;
3
4pub static NONSPACING_MARK: TableSet<'static> = TableSet::from_table(tables::NONSPACING_MARK);
5
6#[cfg(test)]
7pub static UPPERCASE_LETTER: TableSet<'static> = TableSet::from_table(tables::UPPERCASE_LETTER);
8
9/// A set of items from a ucd-generate table.
10pub struct TableSet<'a> {
11    char_seqs: &'a [(char, char)],
12}
13
14impl<'a> TableSet<'a> {
15    const fn from_table(seqs: &'a [(char, char)]) -> TableSet<'a> {
16        TableSet { char_seqs: seqs }
17    }
18
19    pub fn contains(&self, c: char) -> bool {
20        let res = self.char_seqs.binary_search_by_key(&c, |(s, _)| *s);
21        if let Err(pos) = res {
22            // we didn't find it, but we have the insert position
23            // the *preceeding* position may have it!
24            if pos > 0 {
25                let (lb, ub) = self.char_seqs[pos - 1];
26                assert!(c > lb);
27                c <= ub // contain it if c is less than upper bound
28            } else {
29                false // character is before lb of first entry
30            }
31        } else {
32            true // character is exactly lower bound
33        }
34    }
35}
36
37#[test]
38pub fn test_contains_lb() {
39    assert!(UPPERCASE_LETTER.contains('A'));
40}
41
42#[test]
43pub fn test_contains_ub() {
44    assert!(UPPERCASE_LETTER.contains('Z'));
45}
46
47#[test]
48pub fn test_contains_mid() {
49    assert!(UPPERCASE_LETTER.contains('Q'));
50}
51
52#[test]
53pub fn test_does_not_contain_early() {
54    assert!(!UPPERCASE_LETTER.contains(' '));
55}
56
57#[test]
58pub fn test_contains_higher_unicode() {
59    assert!(UPPERCASE_LETTER.contains('𝔸'));
60    assert!(UPPERCASE_LETTER.contains('ℚ'));
61    assert!(UPPERCASE_LETTER.contains('𝑍'));
62}
63
64#[test]
65pub fn test_omits_higher_unicode() {
66    assert!(!UPPERCASE_LETTER.contains('ᏺ'));
67}