Rollup merge of #118523 - okaneco:trim_ascii, r=Mark-Simulacrum

Add ASCII whitespace trimming functions to `&str` - Add `trim_ascii_start`, `trim_ascii_end`, and `trim_ascii` functions to `&str` for trimming ASCII whitespace - Add `#[inline]` to `[u8]` `trim_ascii` functions These functions are feature-gated by `#![feature(byte_slice_trim_ascii)]` #94035
rust-lang · Dec 16, 2023 · 15e84eb · 15e84eb
2 parents 5c927ab + e4808af
commit 15e84eb
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 0 deletions.
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
@@ -125,6 +125,7 @@ impl [u8] {
     /// assert_eq!(b"".trim_ascii_start(), b"");
     /// ```
     #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[inline]
     pub const fn trim_ascii_start(&self) -> &[u8] {
         let mut bytes = self;
         // Note: A pattern matching based approach (instead of indexing) allows
@@ -154,6 +155,7 @@ impl [u8] {
     /// assert_eq!(b"".trim_ascii_end(), b"");
     /// ```
     #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[inline]
     pub const fn trim_ascii_end(&self) -> &[u8] {
         let mut bytes = self;
         // Note: A pattern matching based approach (instead of indexing) allows
@@ -184,6 +186,7 @@ impl [u8] {
     /// assert_eq!(b"".trim_ascii(), b"");
     /// ```
     #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[inline]
     pub const fn trim_ascii(&self) -> &[u8] {
         self.trim_ascii_start().trim_ascii_end()
     }

diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
@@ -2423,6 +2423,85 @@ impl str {
         me.make_ascii_lowercase()
     }
 
+    /// Returns a string slice with leading ASCII whitespace removed.
+    ///
+    /// 'Whitespace' refers to the definition used by
+    /// [`u8::is_ascii_whitespace`].
+    ///
+    /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(byte_slice_trim_ascii)]
+    ///
+    /// assert_eq!(" \t \u{3000}hello world\n".trim_ascii_start(), "\u{3000}hello world\n");
+    /// assert_eq!("  ".trim_ascii_start(), "");
+    /// assert_eq!("".trim_ascii_start(), "");
+    /// ```
+    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
+    #[inline]
+    pub const fn trim_ascii_start(&self) -> &str {
+        // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+        // UTF-8.
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_start()) }
+    }
+
+    /// Returns a string slice with trailing ASCII whitespace removed.
+    ///
+    /// 'Whitespace' refers to the definition used by
+    /// [`u8::is_ascii_whitespace`].
+    ///
+    /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(byte_slice_trim_ascii)]
+    ///
+    /// assert_eq!("\r hello world\u{3000}\n ".trim_ascii_end(), "\r hello world\u{3000}");
+    /// assert_eq!("  ".trim_ascii_end(), "");
+    /// assert_eq!("".trim_ascii_end(), "");
+    /// ```
+    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
+    #[inline]
+    pub const fn trim_ascii_end(&self) -> &str {
+        // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+        // UTF-8.
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_end()) }
+    }
+
+    /// Returns a string slice with leading and trailing ASCII whitespace
+    /// removed.
+    ///
+    /// 'Whitespace' refers to the definition used by
+    /// [`u8::is_ascii_whitespace`].
+    ///
+    /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(byte_slice_trim_ascii)]
+    ///
+    /// assert_eq!("\r hello world\n ".trim_ascii(), "hello world");
+    /// assert_eq!("  ".trim_ascii(), "");
+    /// assert_eq!("".trim_ascii(), "");
+    /// ```
+    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
+    #[inline]
+    pub const fn trim_ascii(&self) -> &str {
+        // SAFETY: Removing ASCII characters from a `&str` does not invalidate
+        // UTF-8.
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii()) }
+    }
+
     /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
     ///
     /// Note: only extended grapheme codepoints that begin the string will be