From 465ffc9ca781af9ac0a89643c9971ac807da5766 Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:23:56 -0400 Subject: [PATCH] Refactor some `char`, `u8` ascii functions to be branchless Decompose singular `matches!` with or-patterns to individual `matches!` statements to enable branchless code output. The following functions were changed: - `is_ascii_alphanumeric` - `is_ascii_hexdigit` - `is_ascii_punctuation` Add codegen tests Co-authored-by: George Bateman Co-authored-by: scottmcm --- library/core/src/char/methods.rs | 9 +++-- library/core/src/num/mod.rs | 9 +++-- tests/codegen/char-ascii-branchless.rs | 47 ++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 6 deletions(-) create mode 100644 tests/codegen/char-ascii-branchless.rs diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 4ac956e7b76dc..7ce33bdd41159 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1450,7 +1450,7 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_alphanumeric(&self) -> bool { - matches!(*self, '0'..='9' | 'A'..='Z' | 'a'..='z') + matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z') } /// Checks if the value is an ASCII decimal digit: @@ -1553,7 +1553,7 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { - matches!(*self, '0'..='9' | 'A'..='F' | 'a'..='f') + matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f') } /// Checks if the value is an ASCII punctuation character: @@ -1591,7 +1591,10 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_punctuation(&self) -> bool { - matches!(*self, '!'..='/' | ':'..='@' | '['..='`' | '{'..='~') + matches!(*self, '!'..='/') + | matches!(*self, ':'..='@') + | matches!(*self, '['..='`') + | matches!(*self, '{'..='~') } /// Checks if the value is an ASCII graphic character: diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index a6c1adfac6551..2a0b31404f035 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -791,7 +791,7 @@ impl u8 { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_alphanumeric(&self) -> bool { - matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') + matches!(*self, b'0'..=b'9') | matches!(*self, b'A'..=b'Z') | matches!(*self, b'a'..=b'z') } /// Checks if the value is an ASCII decimal digit: @@ -894,7 +894,7 @@ impl u8 { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { - matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f') + matches!(*self, b'0'..=b'9') | matches!(*self, b'A'..=b'F') | matches!(*self, b'a'..=b'f') } /// Checks if the value is an ASCII punctuation character: @@ -932,7 +932,10 @@ impl u8 { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_punctuation(&self) -> bool { - matches!(*self, b'!'..=b'/' | b':'..=b'@' | b'['..=b'`' | b'{'..=b'~') + matches!(*self, b'!'..=b'/') + | matches!(*self, b':'..=b'@') + | matches!(*self, b'['..=b'`') + | matches!(*self, b'{'..=b'~') } /// Checks if the value is an ASCII graphic character: diff --git a/tests/codegen/char-ascii-branchless.rs b/tests/codegen/char-ascii-branchless.rs new file mode 100644 index 0000000000000..b612b24c7c73b --- /dev/null +++ b/tests/codegen/char-ascii-branchless.rs @@ -0,0 +1,47 @@ +// Checks that these functions are branchless. +// +// compile-flags: -O + +#![crate_type = "lib"] + +// CHECK-LABEL: @is_ascii_alphanumeric_char +#[no_mangle] +pub fn is_ascii_alphanumeric_char(x: char) -> bool { + // CHECK-NOT: br + x.is_ascii_alphanumeric() +} + +// CHECK-LABEL: @is_ascii_alphanumeric_u8 +#[no_mangle] +pub fn is_ascii_alphanumeric_u8(x: u8) -> bool { + // CHECK-NOT: br + x.is_ascii_alphanumeric() +} + +// CHECK-LABEL: @is_ascii_hexdigit_char +#[no_mangle] +pub fn is_ascii_hexdigit_char(x: char) -> bool { + // CHECK-NOT: br + x.is_ascii_hexdigit() +} + +// CHECK-LABEL: @is_ascii_hexdigit_u8 +#[no_mangle] +pub fn is_ascii_hexdigit_u8(x: u8) -> bool { + // CHECK-NOT: br + x.is_ascii_hexdigit() +} + +// CHECK-LABEL: @is_ascii_punctuation_char +#[no_mangle] +pub fn is_ascii_punctuation_char(x: char) -> bool { + // CHECK-NOT: br + x.is_ascii_punctuation() +} + +// CHECK-LABEL: @is_ascii_punctuation_u8 +#[no_mangle] +pub fn is_ascii_punctuation_u8(x: u8) -> bool { + // CHECK-NOT: br + x.is_ascii_punctuation() +}