From 028026b9b30f1254b4b965a36989e82906df3a58 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sun, 23 Jun 2024 16:12:39 +1000 Subject: [PATCH 1/3] Remove `__rust_force_expr`. This was added (with a different name) to improve an error message. It is no longer needed -- removing it changes the error message, but overall I think the new message is no worse: - the mention of `#` in the first line is a little worse, - but the extra context makes it very clear what the problem is, perhaps even clearer than the old message, - and the removal of the note about the `expr` fragment (an internal detail of `__rust_force_expr`) is an improvement. Overall I think the error is quite clear and still far better than the old message that prompted #61933, which didn't even mention patterns. The motivation for this is #124141, which will cause pasted metavariables to be tokenized and reparsed instead of the AST node being cached. This change in behaviour occasionally has a non-zero perf cost, and `__rust_force_expr` causes the tokenize/reparse step to occur twice. Removing `__rust_force_expr` greatly reduces the extra overhead for the `deep-vector` benchmark. --- alloc/src/macros.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/alloc/src/macros.rs b/alloc/src/macros.rs index 0f767df6063a3..d5ca5c4ed27a2 100644 --- a/alloc/src/macros.rs +++ b/alloc/src/macros.rs @@ -41,18 +41,18 @@ #[allow_internal_unstable(rustc_attrs, liballoc_internals)] macro_rules! vec { () => ( - $crate::__rust_force_expr!($crate::vec::Vec::new()) + $crate::vec::Vec::new() ); ($elem:expr; $n:expr) => ( - $crate::__rust_force_expr!($crate::vec::from_elem($elem, $n)) + $crate::vec::from_elem($elem, $n) ); ($($x:expr),+ $(,)?) => ( - $crate::__rust_force_expr!(<[_]>::into_vec( + <[_]>::into_vec( // This rustc_box is not required, but it produces a dramatic improvement in compile // time when constructing arrays with many elements. #[rustc_box] $crate::boxed::Box::new([$($x),+]) - )) + ) ); } @@ -126,13 +126,3 @@ macro_rules! format { res }} } - -/// Force AST node to an expression to improve diagnostics in pattern position. -#[doc(hidden)] -#[macro_export] -#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] -macro_rules! __rust_force_expr { - ($e:expr) => { - $e - }; -} From 57c2de81f2b53dc7cb93f8ba0d3e24f80cd61425 Mon Sep 17 00:00:00 2001 From: ash <97464181+Borgerr@users.noreply.github.com> Date: Tue, 25 Jun 2024 23:58:43 -0600 Subject: [PATCH 2/3] set self.is_known_utf8 to false in extend_from_slice --- std/src/sys_common/wtf8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/src/sys_common/wtf8.rs b/std/src/sys_common/wtf8.rs index 708f62f476e73..117a3e23044ea 100644 --- a/std/src/sys_common/wtf8.rs +++ b/std/src/sys_common/wtf8.rs @@ -480,7 +480,7 @@ impl Wtf8Buf { #[inline] pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { self.bytes.extend_from_slice(other); - self.is_known_utf8 = self.is_known_utf8 || self.next_surrogate(0).is_none(); + self.is_known_utf8 = false; } } From 4788a93eee24a3e90becec001fd921e5565330db Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 25 Jun 2024 22:29:37 -0700 Subject: [PATCH 3/3] std: test a variety of ways to extend a Wtf8Buf --- std/src/sys_common/wtf8/tests.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/std/src/sys_common/wtf8/tests.rs b/std/src/sys_common/wtf8/tests.rs index 6a1cc41a8fb04..b57c99a8452a1 100644 --- a/std/src/sys_common/wtf8/tests.rs +++ b/std/src/sys_common/wtf8/tests.rs @@ -725,3 +725,27 @@ fn wtf8_utf8_boundary_between_surrogates() { string.push(CodePoint::from_u32(0xD800).unwrap()); check_utf8_boundary(&string, 3); } + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_utf8_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +}