From fbd2537a58008c1045bc063b30a3e1f130742aac Mon Sep 17 00:00:00 2001 From: Giacomo Stevanato Date: Fri, 19 Jan 2024 22:03:37 +0100 Subject: [PATCH] safety: guard in Input::new against incorrect AsRef implementations Before this commit, Input::new calls haystack.as_ref() twice, once to get the actual haystack slice and the second time to get its length. It makes the assumption that the second call will return the same slice, but malicious implementations of AsRef can return different slices and thus different lengths. This is important because there's unsafe code relying on the Input's span being inbounds with respect to the haystack, but if the second call to .as_ref() returns a bigger slice this won't be true. For example, this snippet causes Miri to report UB on an unchecked slice access in find_fwd_imp (though it will also panic sometime later when run normally, but at that point the UB already happened): use regex_automata::{Input, meta::{Builder, Config}}; use std::cell::Cell; struct Bad(Cell); impl AsRef<[u8]> for Bad { fn as_ref(&self) -> &[u8] { if self.0.replace(false) { &[] } else { &[0; 1000] } } } let bad = Bad(Cell::new(true)); let input = Input::new(&bad); let regex = Builder::new() // Not setting this causes some checked access to occur before // the unchecked ones, avoiding the UB .configure(Config::new().auto_prefilter(false)) .build("a+") .unwrap(); regex.find(input); This commit fixes the problem by just calling .as_ref() once and use the length of the returned slice as the span's end value. A regression test has also been added. Closes #1154 --- CHANGELOG.md | 4 +++- regex-automata/src/util/search.rs | 28 ++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38da512ed..527950518 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,14 @@ 1.10.3 (TBD) ============ This is a new patch release that fixes the feature configuration of optional -dependencies. +dependencies, and fixes an unsound use of bounds check elision. Bug fixes: * [BUG #1147](https://github.com/rust-lang/regex/issues/1147): Set `default-features=false` for the `memchr` and `aho-corasick` dependencies. +* [BUG #1154](https://github.com/rust-lang/regex/pull/1154): +Fix unsound bounds check elision. 1.10.2 (2023-10-16) diff --git a/regex-automata/src/util/search.rs b/regex-automata/src/util/search.rs index 39aec522b..05b1cff54 100644 --- a/regex-automata/src/util/search.rs +++ b/regex-automata/src/util/search.rs @@ -110,9 +110,14 @@ impl<'h> Input<'h> { /// Create a new search configuration for the given haystack. #[inline] pub fn new>(haystack: &'h H) -> Input<'h> { + // Perform only one call to `haystack.as_ref()` to protect from incorrect + // implementations that return different values from multiple calls. + // This is important because there's code that relies on `span` not being + // out of bounds with respect to the stored `haystack`. + let haystack = haystack.as_ref(); Input { - haystack: haystack.as_ref(), - span: Span { start: 0, end: haystack.as_ref().len() }, + haystack, + span: Span { start: 0, end: haystack.len() }, anchored: Anchored::No, earliest: false, } @@ -1966,4 +1971,23 @@ mod tests { let expected_size = 3 * core::mem::size_of::(); assert_eq!(expected_size, core::mem::size_of::()); } + + #[test] + fn incorrect_asref_guard() { + struct Bad(std::cell::Cell); + + impl AsRef<[u8]> for Bad { + fn as_ref(&self) -> &[u8] { + if self.0.replace(false) { + &[] + } else { + &[0; 1000] + } + } + } + + let bad = Bad(std::cell::Cell::new(true)); + let input = Input::new(&bad); + assert!(input.end() <= input.haystack().len()); + } }