bookdata/cleaning/
strings.rs

1//! Utilities for cleaning strings.
2use std::iter::FromIterator;
3
4use std::borrow::Cow;
5use unicode_normalization::*;
6
7/// Normalize Unicode character representations in a string.
8pub fn norm_unicode<'a>(s: &'a str) -> Cow<'a, str> {
9    if is_nfd_quick(s.chars()) == IsNormalized::Yes {
10        s.into()
11    } else {
12        String::from_iter(s.nfd()).into()
13    }
14}
15
16#[test]
17fn test_nu_empty() {
18    let text = "";
19    let res = norm_unicode(&text);
20    assert_eq!(res.as_ref(), "");
21}
22
23#[test]
24fn test_nu_basic() {
25    let text = "foo";
26    let res = norm_unicode(&text);
27    assert_eq!(res.as_ref(), "foo");
28}
29
30#[test]
31fn test_nu_metal() {
32    let text = "metäl";
33    let res = norm_unicode(&text);
34    assert_eq!(res.as_ref(), "meta\u{0308}l");
35}