diff --git a/matcher/src/pattern.rs b/matcher/src/pattern.rs index 0c554ba..b5dd55f 100644 --- a/matcher/src/pattern.rs +++ b/matcher/src/pattern.rs @@ -122,17 +122,30 @@ impl Atom { normalize = false; } let needle = if needle.is_ascii() { - let mut needle = if escape_whitespace { - if let Some((start, rem)) = needle.split_once("\\ ") { - let mut needle = start.to_owned(); - for rem in rem.split("\\ ") { - needle.push(' '); - needle.push_str(rem); + let mut needle_string = if escape_whitespace { + let mut needle_bytes = Vec::with_capacity(needle.len()); + let mut saw_backslash = false; + for c in needle.bytes() { + if saw_backslash { + if c.is_ascii_whitespace() { + needle_bytes.push(c); + saw_backslash = false; + continue; + } else { + needle_bytes.push(b'\\'); + } } - needle - } else { - needle.to_owned() + saw_backslash = c == b'\\'; + if !saw_backslash { + needle_bytes.push(c); + } + } + // push the potentially trailing backslash + if saw_backslash { + needle_bytes.push(b'\\'); } + // SAFETY: we just checked that needle is ascii, so each `c` is a valid ASCII byte + unsafe { String::from_utf8_unchecked(needle_bytes) } } else { needle.to_owned() }; @@ -141,18 +154,19 @@ impl Atom { #[cfg(feature = "unicode-casefold")] CaseMatching::Ignore => { ignore_case = true; - needle.make_ascii_lowercase() + needle_string.make_ascii_lowercase() } #[cfg(feature = "unicode-casefold")] CaseMatching::Smart => { - ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase()) + ignore_case = !needle_string.bytes().any(|b| b.is_ascii_uppercase()) } CaseMatching::Respect => ignore_case = false, } + if append_dollar { - needle.push('$'); + needle_string.push('$'); } - Utf32String::Ascii(needle.into_boxed_str()) + Utf32String::Ascii(needle_string.into_boxed_str()) } else { let mut needle_ = Vec::with_capacity(needle.len()); #[cfg(feature = "unicode-casefold")] @@ -171,8 +185,8 @@ impl Atom { let mut saw_backslash = false; for mut c in chars::graphemes(needle) { if saw_backslash { - if c == ' ' { - needle_.push(' '); + if c.is_whitespace() { + needle_.push(c); saw_backslash = false; continue; } else { @@ -180,23 +194,29 @@ impl Atom { } } saw_backslash = c == '\\'; - match case { - #[cfg(feature = "unicode-casefold")] - CaseMatching::Ignore => c = chars::to_lower_case(c), - #[cfg(feature = "unicode-casefold")] - CaseMatching::Smart => { - ignore_case = ignore_case && !chars::is_upper_case(c) + if !saw_backslash { + match case { + #[cfg(feature = "unicode-casefold")] + CaseMatching::Ignore => c = chars::to_lower_case(c), + #[cfg(feature = "unicode-casefold")] + CaseMatching::Smart => { + ignore_case = ignore_case && !chars::is_upper_case(c) + } + CaseMatching::Respect => (), } - CaseMatching::Respect => (), - } - match normalization { - #[cfg(feature = "unicode-normalization")] - Normalization::Smart => { - normalize = normalize && chars::normalize(c) == c; + match normalization { + #[cfg(feature = "unicode-normalization")] + Normalization::Smart => { + normalize = normalize && chars::normalize(c) == c; + } + Normalization::Never => (), } - Normalization::Never => (), + needle_.push(c); } - needle_.push(c); + } + // push the potentially trailing backslash + if saw_backslash { + needle_.push('\\'); } } else { let chars = chars::graphemes(needle).map(|mut c| { diff --git a/matcher/src/pattern/tests.rs b/matcher/src/pattern/tests.rs index 88880ba..f74dbd4 100644 --- a/matcher/src/pattern/tests.rs +++ b/matcher/src/pattern/tests.rs @@ -85,8 +85,28 @@ fn case_matching() { #[test] fn escape() { + // escapes only impact whitespace let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart); assert_eq!(pat.needle.to_string(), "foo bar"); + let pat = Atom::parse("foo\\\tbar", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "foo\tbar"); + let pat = Atom::parse("\\", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "\\"); + let pat = Atom::parse("\\\\", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "\\\\"); + + // some unicode checks + let pat = Atom::parse("foö\\ bar", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "foö bar"); + let pat = Atom::parse("foö\\\\ bar", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "foö\\ bar"); + let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "foo bar"); // double-width IDEOGRAPHIC SPACE + let pat = Atom::parse("ö\\b", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "ö\\b"); + let pat = Atom::parse("ö\\\\", CaseMatching::Smart, Normalization::Smart); + assert_eq!(pat.needle.to_string(), "ö\\\\"); + let pat = Atom::parse("\\!foo", CaseMatching::Smart, Normalization::Smart); assert_eq!(pat.needle.to_string(), "!foo"); assert_eq!(pat.kind, AtomKind::Fuzzy);