BurntSushi · redzic · Jul 2, 2022 · Jul 9, 2022 · Jul 9, 2022
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ memchr links to the standard library by default, but you can disable the
 memchr = { version = "2", default-features = false }
 ```
 
-On x86 platforms, when the `std` feature is disabled, the SSE2 accelerated
+On x86-64 platforms, when the `std` feature is disabled, the SSE2 accelerated
 implementations will be used. When `std` is enabled, AVX accelerated
 implementations will be used if the CPU is determined to support it at runtime.
 

diff --git a/build.rs b/build.rs
@@ -10,7 +10,7 @@ fn main() {
 // This can be disabled with RUSTFLAGS="--cfg memchr_disable_auto_simd", but
 // this is generally only intended for testing.
 //
-// On targets which don't feature SSE2, this is disabled, as LLVM wouln't know
+// On targets which don't feature SSE2, this is disabled, as LLVM wouldn't know
 // how to work with SSE2 operands. Enabling SSE4.2 and AVX on SSE2-only targets
 // is not a problem. In that case, the fastest option will be chosen at
 // runtime.
@@ -29,6 +29,12 @@ fn enable_simd_optimizations() {
             println!("cargo:rustc-cfg=memchr_runtime_sse42");
             println!("cargo:rustc-cfg=memchr_runtime_avx");
         }
+        "aarch64" => {
+            if !target_has_feature("neon") {
+                return;
+            }
+            println!("cargo:rustc-cfg=memchr_runtime_neon");
+        }
         "wasm32" | "wasm64" => {
             if !target_has_feature("simd128") {
                 return;

diff --git a/src/lib.rs b/src/lib.rs
@@ -133,9 +133,9 @@ library haven't quite been worked out yet.
 
 **NOTE:** Currently, only `x86_64` targets have highly accelerated
 implementations of substring search. For `memchr`, all targets have
-somewhat-accelerated implementations, while only `x86_64` targets have highly
-accelerated implementations. This limitation is expected to be lifted once the
-standard library exposes a platform independent SIMD API.
+somewhat-accelerated implementations, while `x86_64` and `aarch64` targets
+have highly accelerated implementations. This limitation is expected to be
+lifted once the standard library exposes a platform independent SIMD API.
 
 # Crate features
 
@@ -144,7 +144,7 @@ standard library exposes a platform independent SIMD API.
   from the standard library is runtime SIMD CPU feature detection. This means
   that this feature must be enabled to get AVX accelerated routines. When
   `std` is not enabled, this crate will still attempt to use SSE2 accelerated
-  routines on `x86_64`.
+  routines on `x86_64` and NEON accelerated routines on `aarch64`.
 * **libc** - When enabled (**not** the default), this library will use your
   platform's libc implementation of `memchr` (and `memrchr` on Linux). This
   can be useful on non-`x86_64` targets where the fallback implementation in

diff --git a/src/memchr/aarch64/mod.rs b/src/memchr/aarch64/mod.rs
@@ -0,0 +1,79 @@
+use super::fallback;
+
+mod neon;
+
+/// AArch64 is a 64-bit architecture introduced with ARMv8. NEON is required
+/// in all standard ARMv8 implementations, so no runtime detection is required
+/// to call NEON functions.
+///
+/// # Safety
+///
+/// There are no safety requirements for this definition of the macro. It is
+/// safe for all inputs since it is restricted to either the fallback routine
+/// or the NEON routine, which is always safe to call on AArch64 as explained
+/// previously.
+macro_rules! unsafe_ifunc {
+    ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
+        if cfg!(memchr_runtime_neon) {
+            unsafe { neon::$name($($needle),+, $haystack) }
+        } else {
+            fallback::$name($($needle),+, $haystack)
+        }
+    }}
+}
+
+#[inline(always)]
+pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    unsafe_ifunc!(
+        fn(u8, u8, &[u8]) -> Option<usize>,
+        memchr2,
+        haystack,
+        n1,
+        n2
+    )
+}
+
+#[inline(always)]
+pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    unsafe_ifunc!(
+        fn(u8, u8, u8, &[u8]) -> Option<usize>,
+        memchr3,
+        haystack,
+        n1,
+        n2,
+        n3
+    )
+}
+
+#[inline(always)]
+pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+    unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
+}
+
+#[inline(always)]
+pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+    unsafe_ifunc!(
+        fn(u8, u8, &[u8]) -> Option<usize>,
+        memrchr2,
+        haystack,
+        n1,
+        n2
+    )
+}
+
+#[inline(always)]
+pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
+    unsafe_ifunc!(
+        fn(u8, u8, u8, &[u8]) -> Option<usize>,
+        memrchr3,
+        haystack,
+        n1,
+        n2,
+        n3
+    )
+}