From 95b7080acd5bde4a87533669beed57fdeac18903 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 23 Jun 2024 08:40:24 -0400
Subject: [PATCH 001/166] Initial commit

---
 .gitignore |   2 +
 Cargo.toml |   7 ++
 src/lib.rs | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 266 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.toml
 create mode 100644 src/lib.rs

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..96ef6c0b9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 000000000..197471fea
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "xx-renu"
+version = "0.1.0"
+edition = "2021"
+
+[dev-dependencies]
+proptest = "1.4.0"
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 000000000..ca11d623b
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,257 @@
+#![deny(rust_2018_idioms)]
+
+use core::mem;
+
+const PRIME64_1: u64 = 0x9E3779B185EBCA87;
+const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
+const PRIME64_3: u64 = 0x165667B19E3779F9;
+const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
+const PRIME64_5: u64 = 0x27D4EB2F165667C5;
+
+#[derive(Default)]
+#[repr(align(32))]
+struct AlignedData([u8; 32]);
+
+impl AlignedData {
+    fn as_u64s(&self) -> &[u64; 4] {
+        // SAFETY: We are guaranteed to be aligned
+        unsafe { mem::transmute(&self.0) }
+    }
+}
+
+#[derive(Default)]
+struct Buffer {
+    offset: usize,
+    data: AlignedData,
+}
+
+impl Buffer {
+    fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) {
+        if self.offset == 0 {
+            return (None, data);
+        };
+
+        let (_filled, empty) = self.data.0.split_at_mut(self.offset); // todo unchecked?
+        let n_to_copy = usize::min(empty.len(), data.len());
+
+        let dst = &mut empty[..n_to_copy];
+        let (src, rest) = data.split_at(n_to_copy);
+
+        dst.copy_from_slice(src);
+        self.offset += n_to_copy;
+
+        if self.offset == self.data.0.len() {
+            (Some(self.data.as_u64s()), rest)
+        } else {
+            (None, rest)
+        }
+    }
+
+    fn set(&mut self, data: &[u8]) {
+        let n_to_copy = data.len();
+        debug_assert!(n_to_copy < self.data.0.len());
+        self.data.0[..n_to_copy].copy_from_slice(data);
+        self.offset = data.len();
+    }
+
+    fn remaining(&self) -> &[u8] {
+        &self.data.0[..self.offset]
+    }
+}
+
+pub struct XxHash64 {
+    seed: u64,
+    accumulators: [u64; 4],
+    buffer: Buffer,
+    length: u64,
+}
+
+impl XxHash64 {
+    pub fn with_seed(seed: u64) -> Self {
+        // Step 1. Initialize internal accumulators
+        let accumulators = [
+            seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
+            seed.wrapping_add(PRIME64_2),
+            seed,
+            seed.wrapping_sub(PRIME64_1),
+        ];
+
+        Self {
+            seed,
+            accumulators,
+            buffer: Buffer::default(),
+            length: 0,
+        }
+    }
+
+    pub fn write(&mut self, data: &[u8]) {
+        let len = data.len();
+
+        // Step 2. Process stripes
+        // todo: dereference?
+        let [acc1, acc2, acc3, acc4] = &mut self.accumulators;
+
+        let (check, data) = self.buffer.extend(data);
+
+        if let Some(&[lane1, lane2, lane3, lane4]) = check {
+            // todo: little-endian transform
+
+            *acc1 = Self::round(*acc1, lane1);
+            *acc2 = Self::round(*acc2, lane2);
+            *acc3 = Self::round(*acc3, lane3);
+            *acc4 = Self::round(*acc4, lane4);
+        }
+
+        let mut data = data;
+        while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
+            let [lane1, lane2, lane3, lane4] =
+                unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
+            // todo: little-endian transform
+
+            *acc1 = Self::round(*acc1, lane1);
+            *acc2 = Self::round(*acc2, lane2);
+            *acc3 = Self::round(*acc3, lane3);
+            *acc4 = Self::round(*acc4, lane4);
+
+            data = rest;
+        }
+        let data = data;
+
+        self.buffer.set(data);
+
+        self.length += len.into_u64();
+    }
+
+    pub fn finish(&mut self) -> u64 {
+        // Step 3. Accumulator convergence
+        let mut acc = if self.length < 32 {
+            self.seed.wrapping_add(PRIME64_5)
+        } else {
+            let [acc1, acc2, acc3, acc4] = self.accumulators;
+
+            let mut acc = {
+                let acc1 = acc1.rotate_left(1);
+                let acc2 = acc2.rotate_left(7);
+                let acc3 = acc3.rotate_left(12);
+                let acc4 = acc4.rotate_left(18);
+
+                acc1.wrapping_add(acc2)
+                    .wrapping_add(acc3)
+                    .wrapping_add(acc4)
+            };
+
+            acc = Self::merge_accumulator(acc, acc1);
+            acc = Self::merge_accumulator(acc, acc2);
+            acc = Self::merge_accumulator(acc, acc3);
+            acc = Self::merge_accumulator(acc, acc4);
+
+            acc
+        };
+
+        // Step 4. Add input length
+        acc += self.length;
+
+        // Step 5. Consume remaining input
+        let mut remaining = self.buffer.remaining();
+
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() {
+            let lane = u64::from_ne_bytes(*chunk);
+            // todo: little-endian
+
+            acc ^= Self::round(0, lane);
+            acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
+            acc = acc.wrapping_add(PRIME64_4);
+            remaining = rest;
+        }
+
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() {
+            let lane = u32::from_ne_bytes(*chunk).into_u64();
+            // todo: little-endian
+
+            acc ^= lane.wrapping_mul(PRIME64_1);
+            acc = acc.rotate_left(23).wrapping_mul(PRIME64_2);
+            acc = acc.wrapping_add(PRIME64_3);
+
+            remaining = rest;
+        }
+
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() {
+            let lane = chunk[0].into_u64();
+
+            acc ^= lane.wrapping_mul(PRIME64_5);
+            acc = acc.rotate_left(11).wrapping_mul(PRIME64_1);
+
+            remaining = rest;
+        }
+
+        // Step 6. Final mix (avalanche)
+        acc ^= acc >> 33;
+        acc = acc.wrapping_mul(PRIME64_2);
+        acc ^= acc >> 29;
+        acc = acc.wrapping_mul(PRIME64_3);
+        acc ^= acc >> 32;
+
+        acc
+    }
+
+    fn round(mut acc: u64, lane: u64) -> u64 {
+        acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
+        acc = acc.rotate_left(31);
+        acc.wrapping_mul(PRIME64_1)
+    }
+
+    fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
+        acc ^= Self::round(0, acc_n);
+        acc = acc.wrapping_mul(PRIME64_1);
+        acc.wrapping_add(PRIME64_4)
+    }
+}
+
+trait IntoU64 {
+    fn into_u64(self) -> u64;
+}
+
+impl IntoU64 for u8 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+impl IntoU64 for u32 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+impl IntoU64 for usize {
+    fn into_u64(self) -> u64 {
+        self as u64
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn hash_of_nothing_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(&[]);
+        assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
+    }
+
+    #[test]
+    fn hash_of_single_byte_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(&[42]);
+        assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
+    }
+
+    #[test]
+    fn hash_of_exactly_32_bytes() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(&[0; 32]);
+        assert_eq!(hasher.finish(), 0xf6e9_be5d_7063_2cf5);
+    }
+}

From ccb57232a31e88f0165657f3007a7733c4d2485e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 23 Jun 2024 09:49:18 -0400
Subject: [PATCH 002/166] Add comparison to native implementation

---
 .gitignore                     |  2 +-
 .gitmodules                    |  6 ++++
 compare/.gitignore             |  2 ++
 compare/Cargo.toml             |  9 ++++++
 compare/src/lib.rs             | 39 +++++++++++++++++++++++
 compare/xx_hash-sys/.gitignore |  2 ++
 compare/xx_hash-sys/Cargo.toml |  8 +++++
 compare/xx_hash-sys/build.rs   | 10 ++++++
 compare/xx_hash-sys/src/lib.rs | 58 ++++++++++++++++++++++++++++++++++
 compare/xx_hash-sys/xxHash     |  1 +
 10 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 .gitmodules
 create mode 100644 compare/.gitignore
 create mode 100644 compare/Cargo.toml
 create mode 100644 compare/src/lib.rs
 create mode 100644 compare/xx_hash-sys/.gitignore
 create mode 100644 compare/xx_hash-sys/Cargo.toml
 create mode 100644 compare/xx_hash-sys/build.rs
 create mode 100644 compare/xx_hash-sys/src/lib.rs
 create mode 160000 compare/xx_hash-sys/xxHash

diff --git a/.gitignore b/.gitignore
index 96ef6c0b9..1b72444ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
+/Cargo.lock
 /target
-Cargo.lock
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..52476fd22
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "xxHash"]
+	path = compare/xxHash
+	url = git@github.com:Cyan4973/xxHash.git
+[submodule "compare/xx_hash-sys/xxHash"]
+	path = compare/xx_hash-sys/xxHash
+	url = git@github.com:Cyan4973/xxHash.git
diff --git a/compare/.gitignore b/compare/.gitignore
new file mode 100644
index 000000000..1b72444ae
--- /dev/null
+++ b/compare/.gitignore
@@ -0,0 +1,2 @@
+/Cargo.lock
+/target
diff --git a/compare/Cargo.toml b/compare/Cargo.toml
new file mode 100644
index 000000000..ac6bb09cb
--- /dev/null
+++ b/compare/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "compare"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+proptest = "1.5.0"
+xx-renu = { path = ".." }
+xx_hash-sys = { path = "xx_hash-sys" }
diff --git a/compare/src/lib.rs b/compare/src/lib.rs
new file mode 100644
index 000000000..02947ab45
--- /dev/null
+++ b/compare/src/lib.rs
@@ -0,0 +1,39 @@
+#![cfg(test)]
+
+use proptest::{num, prelude::*, test_runner::TestCaseResult};
+
+proptest! {
+    #[test]
+    fn it_works(seed: u64, data: Vec<u8>) {
+        it_works_impl(seed, &data)?;
+    }
+
+    #[test]
+    fn it_works_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+        it_works_impl(seed, &data[offset..])?;
+    }
+}
+
+fn it_works_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+    let native = {
+        let mut hasher = xx_hash_sys::Stream::with_seed(seed);
+        hasher.write(data);
+        hasher.finish()
+    };
+
+    let rust = {
+        let mut hasher = xx_renu::XxHash64::with_seed(seed);
+        hasher.write(data);
+        hasher.finish()
+    };
+
+    prop_assert_eq!(native, rust);
+    Ok(())
+}
+
+fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
+    prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| {
+        let len = vec.len();
+        (Just(vec), 0..len)
+    })
+}
diff --git a/compare/xx_hash-sys/.gitignore b/compare/xx_hash-sys/.gitignore
new file mode 100644
index 000000000..1b72444ae
--- /dev/null
+++ b/compare/xx_hash-sys/.gitignore
@@ -0,0 +1,2 @@
+/Cargo.lock
+/target
diff --git a/compare/xx_hash-sys/Cargo.toml b/compare/xx_hash-sys/Cargo.toml
new file mode 100644
index 000000000..d385daf66
--- /dev/null
+++ b/compare/xx_hash-sys/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "xx_hash-sys"
+version = "0.0.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+libc = { version = "0.2.155", default-features = false }
diff --git a/compare/xx_hash-sys/build.rs b/compare/xx_hash-sys/build.rs
new file mode 100644
index 000000000..cdc31eb97
--- /dev/null
+++ b/compare/xx_hash-sys/build.rs
@@ -0,0 +1,10 @@
+use std::{env, path::PathBuf};
+
+fn main() {
+    let base = env::var_os("CARGO_MANIFEST_DIR").unwrap();
+    let base: PathBuf = base.into();
+    let xxhash = base.join("xxHash");
+
+    println!("cargo::rustc-link-lib=static=xxhash");
+    println!("cargo::rustc-link-search={}", xxhash.display());
+}
diff --git a/compare/xx_hash-sys/src/lib.rs b/compare/xx_hash-sys/src/lib.rs
new file mode 100644
index 000000000..6728d92b8
--- /dev/null
+++ b/compare/xx_hash-sys/src/lib.rs
@@ -0,0 +1,58 @@
+#![allow(non_camel_case_types)]
+
+type XXH64_hash_t = u64;
+#[repr(C)]
+pub struct XXH64_state_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+type XXH_errorcode = libc::c_int;
+const XXH_OK: XXH_errorcode = 0;
+
+extern "C" {
+    fn XXH64(input: *const libc::c_void, length: libc::size_t, seed: XXH64_hash_t) -> XXH64_hash_t;
+
+    fn XXH64_createState() -> *mut XXH64_state_t;
+    fn XXH64_reset(state: *mut XXH64_state_t, seed: XXH64_hash_t) -> XXH_errorcode;
+    fn XXH64_update(
+        state: *mut XXH64_state_t,
+        buffer: *const libc::c_void,
+        length: libc::size_t,
+    ) -> XXH_errorcode;
+    fn XXH64_digest(state: *mut XXH64_state_t) -> XXH64_hash_t;
+    fn XXH64_freeState(state: *mut XXH64_state_t);
+}
+
+pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
+    unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) }
+}
+
+pub struct Stream(*mut XXH64_state_t);
+
+impl Stream {
+    pub fn with_seed(seed: u64) -> Self {
+        let state = unsafe {
+            let state = XXH64_createState();
+            XXH64_reset(state, seed);
+            state
+        };
+
+        Self(state)
+    }
+
+    pub fn write(&mut self, data: &[u8]) {
+        let retval = unsafe { XXH64_update(self.0, data.as_ptr().cast(), data.len()) };
+        assert_eq!(retval, XXH_OK);
+    }
+
+    pub fn finish(&mut self) -> u64 {
+        unsafe { XXH64_digest(self.0) }
+    }
+}
+
+impl Drop for Stream {
+    fn drop(&mut self) {
+        unsafe { XXH64_freeState(self.0) }
+    }
+}
diff --git a/compare/xx_hash-sys/xxHash b/compare/xx_hash-sys/xxHash
new file mode 160000
index 000000000..805c00b68
--- /dev/null
+++ b/compare/xx_hash-sys/xxHash
@@ -0,0 +1 @@
+Subproject commit 805c00b68fa754200ada0c207ffeaa7a4409377c

From e61bdb7617c5ae97a7d9fe16984e23f4b7261828 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 23 Jun 2024 14:20:55 -0400
Subject: [PATCH 003/166] Add benchmarks

---
 compare/Cargo.toml             |  6 +++
 compare/benches/benchmark.rs   | 95 ++++++++++++++++++++++++++++++++++
 compare/xx_hash-sys/src/lib.rs |  8 +--
 src/lib.rs                     |  6 +++
 4 files changed, 111 insertions(+), 4 deletions(-)
 create mode 100644 compare/benches/benchmark.rs

diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index ac6bb09cb..cb14ba84f 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -3,7 +3,13 @@ name = "compare"
 version = "0.1.0"
 edition = "2021"
 
+[[bench]]
+name = "benchmark"
+harness = false
+
 [dependencies]
+criterion = "0.5.1"
 proptest = "1.5.0"
+rand = "0.8.5"
 xx-renu = { path = ".." }
 xx_hash-sys = { path = "xx_hash-sys" }
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
new file mode 100644
index 000000000..58fce0ebc
--- /dev/null
+++ b/compare/benches/benchmark.rs
@@ -0,0 +1,95 @@
+use criterion::{criterion_group, criterion_main, Criterion, Throughput};
+use rand::{Rng, RngCore, SeedableRng};
+use std::{hint::black_box, iter};
+use xx_hash_sys::Stream;
+use xx_renu::XxHash64;
+
+fn oneshot(c: &mut Criterion) {
+    let (seed, data) = gen_data();
+    let mut g = c.benchmark_group("oneshot");
+
+    for size in half_sizes(&data).take(10) {
+        let data = &data[..size];
+        g.throughput(Throughput::Bytes(data.len() as _));
+
+        let id = format!("xxHash/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let hash = Stream::oneshot(seed, &data);
+                black_box(hash);
+            })
+        });
+
+        let id = format!("renu/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let hash = XxHash64::oneshot(seed, &data);
+                black_box(hash);
+            })
+        });
+    }
+
+    g.finish();
+}
+
+fn streaming_one_chunk(c: &mut Criterion) {
+    let (seed, data) = gen_data();
+    let mut g = c.benchmark_group("streaming_one_chunk");
+
+    for size in half_sizes(&data).take(10) {
+        let data = &data[..size];
+        g.throughput(Throughput::Bytes(data.len() as _));
+
+        let id = format!("xxHash/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let mut hasher = Stream::with_seed(seed);
+                hasher.write(&data);
+                let hash = hasher.finish();
+                black_box(hash);
+            })
+        });
+
+        let id = format!("renu/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let mut hasher = XxHash64::with_seed(seed);
+                hasher.write(&data);
+                let hash = hasher.finish();
+                black_box(hash);
+            })
+        });
+    }
+
+    g.finish();
+}
+
+const SEED: u64 = 0xc651_4843_1995_363f;
+const DATA_SIZE: usize = 100 * 1024 * 1024;
+
+fn gen_data() -> (u64, Vec<u8>) {
+    let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
+
+    let seed = rng.gen();
+
+    let mut data = vec![0; DATA_SIZE];
+    rng.fill_bytes(&mut data);
+
+    (seed, data)
+}
+
+fn half_sizes(data: &[u8]) -> impl Iterator<Item = usize> {
+    iter::successors(
+        Some(data.len()),
+        |&v| {
+            if v == 1 {
+                None
+            } else {
+                Some(v / 2)
+            }
+        },
+    )
+}
+
+criterion_group!(benches, oneshot, streaming_one_chunk);
+criterion_main!(benches);
diff --git a/compare/xx_hash-sys/src/lib.rs b/compare/xx_hash-sys/src/lib.rs
index 6728d92b8..2c3ed6e8a 100644
--- a/compare/xx_hash-sys/src/lib.rs
+++ b/compare/xx_hash-sys/src/lib.rs
@@ -24,13 +24,13 @@ extern "C" {
     fn XXH64_freeState(state: *mut XXH64_state_t);
 }
 
-pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
-    unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) }
-}
-
 pub struct Stream(*mut XXH64_state_t);
 
 impl Stream {
+    pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
+        unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) }
+    }
+
     pub fn with_seed(seed: u64) -> Self {
         let state = unsafe {
             let state = XXH64_createState();
diff --git a/src/lib.rs b/src/lib.rs
index ca11d623b..f3e904706 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -67,6 +67,12 @@ pub struct XxHash64 {
 }
 
 impl XxHash64 {
+    pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
+        let mut this = Self::with_seed(seed);
+        this.write(data);
+        this.finish()
+    }
+
     pub fn with_seed(seed: u64) -> Self {
         // Step 1. Initialize internal accumulators
         let accumulators = [

From c7eed0e8280e960933d6c9567b6fac6dd2ed9161 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 23 Jun 2024 21:24:45 -0400
Subject: [PATCH 004/166] Add commandline sum tool

---
 renu-sum/.gitignore  |  2 ++
 renu-sum/Cargo.toml  |  7 +++++++
 renu-sum/src/main.rs | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 renu-sum/.gitignore
 create mode 100644 renu-sum/Cargo.toml
 create mode 100644 renu-sum/src/main.rs

diff --git a/renu-sum/.gitignore b/renu-sum/.gitignore
new file mode 100644
index 000000000..1b72444ae
--- /dev/null
+++ b/renu-sum/.gitignore
@@ -0,0 +1,2 @@
+/Cargo.lock
+/target
diff --git a/renu-sum/Cargo.toml b/renu-sum/Cargo.toml
new file mode 100644
index 000000000..2677f2ea2
--- /dev/null
+++ b/renu-sum/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "renu-sum"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+xx-renu = { path = ".." }
diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
new file mode 100644
index 000000000..08d1e1413
--- /dev/null
+++ b/renu-sum/src/main.rs
@@ -0,0 +1,40 @@
+use std::{
+    env,
+    fs::File,
+    io::Read,
+    path::{Path, PathBuf},
+};
+use xx_renu::XxHash64;
+
+type Result<T, E = Box<dyn std::error::Error>> = std::result::Result<T, E>;
+
+fn main() -> Result<()> {
+    let mut buffer = vec![0; 32 * 1024 * 1024];
+
+    for path in env::args_os().skip(1) {
+        let path = PathBuf::from(path);
+        let hash = hash_one_file(&path, &mut buffer)?;
+        eprintln!("{hash:x}  {}", path.display());
+    }
+
+    Ok(())
+}
+
+fn hash_one_file(path: &Path, buffer: &mut [u8]) -> Result<u64> {
+    let mut file = File::open(path)?;
+    let mut hasher = XxHash64::with_seed(0);
+
+    loop {
+        let n_bytes = file.read(buffer)?;
+        if n_bytes == 0 {
+            break;
+        }
+
+        let valid = &buffer[..n_bytes];
+
+        hasher.write(valid);
+    }
+
+    let hash = hasher.finish();
+    Ok(hash)
+}

From c5150c9e9bbcb00d67a86351721563d0c88793c2 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 23 Jun 2024 21:30:18 -0400
Subject: [PATCH 005/166] Move to a workspace

---
 .gitmodules                                     | 2 +-
 Cargo.toml                                      | 8 ++++++--
 compare/.gitignore                              | 1 -
 compare/Cargo.toml                              | 2 +-
 renu-sum/.gitignore                             | 1 -
 {compare/xx_hash-sys => xx_hash-sys}/.gitignore | 1 -
 {compare/xx_hash-sys => xx_hash-sys}/Cargo.toml | 0
 {compare/xx_hash-sys => xx_hash-sys}/build.rs   | 0
 {compare/xx_hash-sys => xx_hash-sys}/src/lib.rs | 0
 {compare/xx_hash-sys => xx_hash-sys}/xxHash     | 0
 10 files changed, 8 insertions(+), 7 deletions(-)
 rename {compare/xx_hash-sys => xx_hash-sys}/.gitignore (60%)
 rename {compare/xx_hash-sys => xx_hash-sys}/Cargo.toml (100%)
 rename {compare/xx_hash-sys => xx_hash-sys}/build.rs (100%)
 rename {compare/xx_hash-sys => xx_hash-sys}/src/lib.rs (100%)
 rename {compare/xx_hash-sys => xx_hash-sys}/xxHash (100%)

diff --git a/.gitmodules b/.gitmodules
index 52476fd22..5a68a159b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,5 +2,5 @@
 	path = compare/xxHash
 	url = git@github.com:Cyan4973/xxHash.git
 [submodule "compare/xx_hash-sys/xxHash"]
-	path = compare/xx_hash-sys/xxHash
+	path = xx_hash-sys/xxHash
 	url = git@github.com:Cyan4973/xxHash.git
diff --git a/Cargo.toml b/Cargo.toml
index 197471fea..09a2d4f94 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,5 +3,9 @@ name = "xx-renu"
 version = "0.1.0"
 edition = "2021"
 
-[dev-dependencies]
-proptest = "1.4.0"
+[workspace]
+members = [
+    "compare",
+    "renu-sum",
+    "xx_hash-sys",
+]
diff --git a/compare/.gitignore b/compare/.gitignore
index 1b72444ae..5a44eef09 100644
--- a/compare/.gitignore
+++ b/compare/.gitignore
@@ -1,2 +1 @@
 /Cargo.lock
-/target
diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index cb14ba84f..f91e12f2a 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -12,4 +12,4 @@ criterion = "0.5.1"
 proptest = "1.5.0"
 rand = "0.8.5"
 xx-renu = { path = ".." }
-xx_hash-sys = { path = "xx_hash-sys" }
+xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/renu-sum/.gitignore b/renu-sum/.gitignore
index 1b72444ae..5a44eef09 100644
--- a/renu-sum/.gitignore
+++ b/renu-sum/.gitignore
@@ -1,2 +1 @@
 /Cargo.lock
-/target
diff --git a/compare/xx_hash-sys/.gitignore b/xx_hash-sys/.gitignore
similarity index 60%
rename from compare/xx_hash-sys/.gitignore
rename to xx_hash-sys/.gitignore
index 1b72444ae..5a44eef09 100644
--- a/compare/xx_hash-sys/.gitignore
+++ b/xx_hash-sys/.gitignore
@@ -1,2 +1 @@
 /Cargo.lock
-/target
diff --git a/compare/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml
similarity index 100%
rename from compare/xx_hash-sys/Cargo.toml
rename to xx_hash-sys/Cargo.toml
diff --git a/compare/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
similarity index 100%
rename from compare/xx_hash-sys/build.rs
rename to xx_hash-sys/build.rs
diff --git a/compare/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
similarity index 100%
rename from compare/xx_hash-sys/src/lib.rs
rename to xx_hash-sys/src/lib.rs
diff --git a/compare/xx_hash-sys/xxHash b/xx_hash-sys/xxHash
similarity index 100%
rename from compare/xx_hash-sys/xxHash
rename to xx_hash-sys/xxHash

From e9bc13664f304a2ebf11a51c328364e3ef87d115 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 08:11:24 -0400
Subject: [PATCH 006/166] Adjust submodule

---
 .gitmodules | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 5a68a159b..451faf0f9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
-[submodule "xxHash"]
-	path = compare/xxHash
-	url = git@github.com:Cyan4973/xxHash.git
-[submodule "compare/xx_hash-sys/xxHash"]
+[submodule "xx_hash-sys/xxHash"]
 	path = xx_hash-sys/xxHash
-	url = git@github.com:Cyan4973/xxHash.git
+	url = https://github.com/Cyan4973/xxHash.git

From 198fe0b8fb5ce2fbe3477cb2be86134fe542822d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 11:01:07 -0400
Subject: [PATCH 007/166] Extract a helper

---
 src/lib.rs | 129 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 69 insertions(+), 60 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index f3e904706..eadf5c4c8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,7 +13,7 @@ const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 struct AlignedData([u8; 32]);
 
 impl AlignedData {
-    fn as_u64s(&self) -> &[u64; 4] {
+    const fn as_u64s(&self) -> &[u64; 4] {
         // SAFETY: We are guaranteed to be aligned
         unsafe { mem::transmute(&self.0) }
     }
@@ -59,9 +59,61 @@ impl Buffer {
     }
 }
 
+struct Accumulators([u64; 4]);
+
+impl Accumulators {
+    const fn new(seed: u64) -> Self {
+        Self([
+            seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
+            seed.wrapping_add(PRIME64_2),
+            seed,
+            seed.wrapping_sub(PRIME64_1),
+        ])
+    }
+
+    fn write(&mut self, lanes: [u64; 4]) {
+        let [acc1, acc2, acc3, acc4] = &mut self.0;
+        let [lane1, lane2, lane3, lane4] = lanes;
+        // todo: little-endian transform
+
+        *acc1 = round(*acc1, lane1);
+        *acc2 = round(*acc2, lane2);
+        *acc3 = round(*acc3, lane3);
+        *acc4 = round(*acc4, lane4);
+    }
+
+    const fn finish(&self) -> u64 {
+        let [acc1, acc2, acc3, acc4] = self.0;
+
+        let mut acc = {
+            let acc1 = acc1.rotate_left(1);
+            let acc2 = acc2.rotate_left(7);
+            let acc3 = acc3.rotate_left(12);
+            let acc4 = acc4.rotate_left(18);
+
+            acc1.wrapping_add(acc2)
+                .wrapping_add(acc3)
+                .wrapping_add(acc4)
+        };
+
+        acc = Self::merge_accumulator(acc, acc1);
+        acc = Self::merge_accumulator(acc, acc2);
+        acc = Self::merge_accumulator(acc, acc3);
+        acc = Self::merge_accumulator(acc, acc4);
+
+        acc
+    }
+
+    const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
+        acc ^= round(0, acc_n);
+        acc = acc.wrapping_mul(PRIME64_1);
+        acc.wrapping_add(PRIME64_4)
+    }
+}
+
 pub struct XxHash64 {
     seed: u64,
-    accumulators: [u64; 4],
+    accumulators: Accumulators,
     buffer: Buffer,
     length: u64,
 }
@@ -75,16 +127,10 @@ impl XxHash64 {
 
     pub fn with_seed(seed: u64) -> Self {
         // Step 1. Initialize internal accumulators
-        let accumulators = [
-            seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
-            seed.wrapping_add(PRIME64_2),
-            seed,
-            seed.wrapping_sub(PRIME64_1),
-        ];
 
         Self {
             seed,
-            accumulators,
+            accumulators: Accumulators::new(seed),
             buffer: Buffer::default(),
             length: 0,
         }
@@ -94,31 +140,18 @@ impl XxHash64 {
         let len = data.len();
 
         // Step 2. Process stripes
-        // todo: dereference?
-        let [acc1, acc2, acc3, acc4] = &mut self.accumulators;
-
-        let (check, data) = self.buffer.extend(data);
-
-        if let Some(&[lane1, lane2, lane3, lane4]) = check {
-            // todo: little-endian transform
+        let (buffered_lanes, data) = self.buffer.extend(data);
 
-            *acc1 = Self::round(*acc1, lane1);
-            *acc2 = Self::round(*acc2, lane2);
-            *acc3 = Self::round(*acc3, lane3);
-            *acc4 = Self::round(*acc4, lane4);
+        if let Some(&lanes) = buffered_lanes {
+            self.accumulators.write(lanes);
         }
 
         let mut data = data;
         while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
-            let [lane1, lane2, lane3, lane4] =
-                unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
-            // todo: little-endian transform
-
-            *acc1 = Self::round(*acc1, lane1);
-            *acc2 = Self::round(*acc2, lane2);
-            *acc3 = Self::round(*acc3, lane3);
-            *acc4 = Self::round(*acc4, lane4);
-
+            // SAFETY: We have the right number of bytes and are
+            // handling the unaligned case.
+            let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
+            self.accumulators.write(lanes);
             data = rest;
         }
         let data = data;
@@ -133,25 +166,7 @@ impl XxHash64 {
         let mut acc = if self.length < 32 {
             self.seed.wrapping_add(PRIME64_5)
         } else {
-            let [acc1, acc2, acc3, acc4] = self.accumulators;
-
-            let mut acc = {
-                let acc1 = acc1.rotate_left(1);
-                let acc2 = acc2.rotate_left(7);
-                let acc3 = acc3.rotate_left(12);
-                let acc4 = acc4.rotate_left(18);
-
-                acc1.wrapping_add(acc2)
-                    .wrapping_add(acc3)
-                    .wrapping_add(acc4)
-            };
-
-            acc = Self::merge_accumulator(acc, acc1);
-            acc = Self::merge_accumulator(acc, acc2);
-            acc = Self::merge_accumulator(acc, acc3);
-            acc = Self::merge_accumulator(acc, acc4);
-
-            acc
+            self.accumulators.finish()
         };
 
         // Step 4. Add input length
@@ -164,7 +179,7 @@ impl XxHash64 {
             let lane = u64::from_ne_bytes(*chunk);
             // todo: little-endian
 
-            acc ^= Self::round(0, lane);
+            acc ^= round(0, lane);
             acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
             acc = acc.wrapping_add(PRIME64_4);
             remaining = rest;
@@ -199,18 +214,12 @@ impl XxHash64 {
 
         acc
     }
+}
 
-    fn round(mut acc: u64, lane: u64) -> u64 {
-        acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
-        acc = acc.rotate_left(31);
-        acc.wrapping_mul(PRIME64_1)
-    }
-
-    fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
-        acc ^= Self::round(0, acc_n);
-        acc = acc.wrapping_mul(PRIME64_1);
-        acc.wrapping_add(PRIME64_4)
-    }
+const fn round(mut acc: u64, lane: u64) -> u64 {
+    acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
+    acc = acc.rotate_left(31);
+    acc.wrapping_mul(PRIME64_1)
 }
 
 trait IntoU64 {

From f9ff61c0bdd12f8ef452a09758b76550ee0bf66f Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 11:01:30 -0400
Subject: [PATCH 008/166] Use threads for the CLI

---
 renu-sum/src/main.rs | 67 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 13 deletions(-)

diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
index 08d1e1413..1fe00770e 100644
--- a/renu-sum/src/main.rs
+++ b/renu-sum/src/main.rs
@@ -3,38 +3,79 @@ use std::{
     fs::File,
     io::Read,
     path::{Path, PathBuf},
+    sync::mpsc,
+    thread,
 };
 use xx_renu::XxHash64;
 
-type Result<T, E = Box<dyn std::error::Error>> = std::result::Result<T, E>;
+type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
+type Result<T, E = Error> = std::result::Result<T, E>;
 
-fn main() -> Result<()> {
-    let mut buffer = vec![0; 32 * 1024 * 1024];
+const BUFFER_SIZE: usize = 16 * 1024;
+const BUFFER_COUNT: usize = 8;
 
+fn main() -> Result<()> {
     for path in env::args_os().skip(1) {
         let path = PathBuf::from(path);
-        let hash = hash_one_file(&path, &mut buffer)?;
+        let hash = hash_one_file(&path)?;
         eprintln!("{hash:x}  {}", path.display());
     }
 
     Ok(())
 }
 
-fn hash_one_file(path: &Path, buffer: &mut [u8]) -> Result<u64> {
+fn hash_one_file(path: &Path) -> Result<u64> {
     let mut file = File::open(path)?;
     let mut hasher = XxHash64::with_seed(0);
 
-    loop {
-        let n_bytes = file.read(buffer)?;
-        if n_bytes == 0 {
-            break;
-        }
+    let (tx, rx) = mpsc::sync_channel(BUFFER_COUNT);
+    let (tx2, rx2) = mpsc::sync_channel(BUFFER_COUNT);
 
-        let valid = &buffer[..n_bytes];
-
-        hasher.write(valid);
+    for _ in 0..BUFFER_COUNT {
+        tx.send(vec![0; BUFFER_SIZE])
+            .expect("Must be able to populate initial buffers");
     }
 
+    thread::scope(|scope| {
+        let t1 = scope.spawn(move || {
+            while let Ok(mut buffer) = rx.recv() {
+                let n_bytes = file.read(&mut buffer)?;
+
+                if n_bytes == 0 {
+                    break;
+                }
+
+                tx2.send((buffer, n_bytes))?;
+            }
+
+            Ok::<_, Error>(())
+        });
+
+        let t2 = scope.spawn({
+            let hasher = &mut hasher;
+            move || {
+                while let Ok((buffer, n_bytes)) = rx2.recv() {
+                    let valid = &buffer[..n_bytes];
+
+                    hasher.write(valid);
+
+                    if tx.send(buffer).is_err() {
+                        // The reading thread has exited and there's
+                        // nowhere to return this buffer to.
+                        continue;
+                    }
+                }
+
+                Ok::<_, Error>(())
+            }
+        });
+
+        t1.join().unwrap()?;
+        t2.join().unwrap()?;
+
+        Ok::<_, Error>(())
+    })?;
+
     let hash = hasher.finish();
     Ok(hash)
 }

From 7fa13e66413f161abeab0edc8baa7d0d7d460f03 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 11:36:47 -0400
Subject: [PATCH 009/166] Parameterize and tune the buffer size and count

---
 renu-sum/src/main.rs | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
index 1fe00770e..54d0d699c 100644
--- a/renu-sum/src/main.rs
+++ b/renu-sum/src/main.rs
@@ -11,28 +11,54 @@ use xx_renu::XxHash64;
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T, E = Error> = std::result::Result<T, E>;
 
-const BUFFER_SIZE: usize = 16 * 1024;
+const BUFFER_SIZE: usize = 128 * 1024;
 const BUFFER_COUNT: usize = 8;
 
+struct Config {
+    buffer_size: usize,
+    buffer_count: usize,
+}
+
+impl Config {
+    fn from_env() -> Self {
+        let buffer_size = env::var("BUFFER_SIZE")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(BUFFER_SIZE);
+
+        let buffer_count = env::var("BUFFER_COUNT")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(BUFFER_COUNT);
+
+        Self {
+            buffer_size,
+            buffer_count,
+        }
+    }
+}
+
 fn main() -> Result<()> {
+    let config = Config::from_env();
+
     for path in env::args_os().skip(1) {
         let path = PathBuf::from(path);
-        let hash = hash_one_file(&path)?;
+        let hash = hash_one_file(&config, &path)?;
         eprintln!("{hash:x}  {}", path.display());
     }
 
     Ok(())
 }
 
-fn hash_one_file(path: &Path) -> Result<u64> {
+fn hash_one_file(config: &Config, path: &Path) -> Result<u64> {
     let mut file = File::open(path)?;
     let mut hasher = XxHash64::with_seed(0);
 
-    let (tx, rx) = mpsc::sync_channel(BUFFER_COUNT);
-    let (tx2, rx2) = mpsc::sync_channel(BUFFER_COUNT);
+    let (tx, rx) = mpsc::sync_channel(config.buffer_count);
+    let (tx2, rx2) = mpsc::sync_channel(config.buffer_count);
 
-    for _ in 0..BUFFER_COUNT {
-        tx.send(vec![0; BUFFER_SIZE])
+    for _ in 0..config.buffer_count {
+        tx.send(vec![0; config.buffer_size])
             .expect("Must be able to populate initial buffers");
     }
 

From 51beb3f12b790b828de796e9f7b0fe8e1df95148 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 12:04:12 -0400
Subject: [PATCH 010/166] Reduce allocation count

---
 renu-sum/src/main.rs | 55 +++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
index 54d0d699c..409ee35a7 100644
--- a/renu-sum/src/main.rs
+++ b/renu-sum/src/main.rs
@@ -3,7 +3,7 @@ use std::{
     fs::File,
     io::Read,
     path::{Path, PathBuf},
-    sync::mpsc,
+    sync::mpsc::{self, SendError},
     thread,
 };
 use xx_renu::XxHash64;
@@ -41,63 +41,66 @@ impl Config {
 fn main() -> Result<()> {
     let config = Config::from_env();
 
+    let mut buffer = vec![0; config.buffer_count * config.buffer_size];
+
     for path in env::args_os().skip(1) {
         let path = PathBuf::from(path);
-        let hash = hash_one_file(&config, &path)?;
+        let hash = hash_one_file(&config, &path, &mut buffer)?;
         eprintln!("{hash:x}  {}", path.display());
     }
 
     Ok(())
 }
 
-fn hash_one_file(config: &Config, path: &Path) -> Result<u64> {
+fn hash_one_file(config: &Config, path: &Path, buffer: &mut [u8]) -> Result<u64> {
     let mut file = File::open(path)?;
     let mut hasher = XxHash64::with_seed(0);
 
-    let (tx, rx) = mpsc::sync_channel(config.buffer_count);
-    let (tx2, rx2) = mpsc::sync_channel(config.buffer_count);
+    let (tx_empty, rx_empty) = mpsc::channel();
+    let (tx_filled, rx_filled) = mpsc::channel();
 
-    for _ in 0..config.buffer_count {
-        tx.send(vec![0; config.buffer_size])
+    for buffer in buffer.chunks_mut(config.buffer_size) {
+        tx_empty
+            .send(buffer)
             .expect("Must be able to populate initial buffers");
     }
 
     thread::scope(|scope| {
-        let t1 = scope.spawn(move || {
-            while let Ok(mut buffer) = rx.recv() {
-                let n_bytes = file.read(&mut buffer)?;
+        let thread_reader = scope.spawn(move || {
+            while let Ok(buffer) = rx_empty.recv() {
+                let n_bytes = file.read(buffer)?;
 
                 if n_bytes == 0 {
                     break;
                 }
 
-                tx2.send((buffer, n_bytes))?;
+                tx_filled
+                    .send((buffer, n_bytes))
+                    .map_err(|_| SendError(()))?;
             }
 
             Ok::<_, Error>(())
         });
 
-        let t2 = scope.spawn({
-            let hasher = &mut hasher;
-            move || {
-                while let Ok((buffer, n_bytes)) = rx2.recv() {
-                    let valid = &buffer[..n_bytes];
+        let hasher = &mut hasher;
+        let thread_hasher = scope.spawn(move || {
+            while let Ok((buffer, n_bytes)) = rx_filled.recv() {
+                let valid = &buffer[..n_bytes];
 
-                    hasher.write(valid);
+                hasher.write(valid);
 
-                    if tx.send(buffer).is_err() {
-                        // The reading thread has exited and there's
-                        // nowhere to return this buffer to.
-                        continue;
-                    }
+                if tx_empty.send(buffer).is_err() {
+                    // The reading thread has exited and there's
+                    // nowhere to return this buffer to.
+                    continue;
                 }
-
-                Ok::<_, Error>(())
             }
+
+            Ok::<_, Error>(())
         });
 
-        t1.join().unwrap()?;
-        t2.join().unwrap()?;
+        thread_reader.join().unwrap()?;
+        thread_hasher.join().unwrap()?;
 
         Ok::<_, Error>(())
     })?;

From 41b899c7855c0dd94f03e2544457344ef942a60d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 13:08:47 -0400
Subject: [PATCH 011/166] twox-hash bench

---
 compare/Cargo.toml           |  1 +
 compare/benches/benchmark.rs | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index f91e12f2a..44ef771ee 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -11,5 +11,6 @@ harness = false
 criterion = "0.5.1"
 proptest = "1.5.0"
 rand = "0.8.5"
+twox-hash = "1.6.3"
 xx-renu = { path = ".." }
 xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 58fce0ebc..ae0ea4964 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -1,6 +1,8 @@
 use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 use rand::{Rng, RngCore, SeedableRng};
+use std::hash::Hasher;
 use std::{hint::black_box, iter};
+use twox_hash::XxHash64 as Old;
 use xx_hash_sys::Stream;
 use xx_renu::XxHash64;
 
@@ -59,6 +61,16 @@ fn streaming_one_chunk(c: &mut Criterion) {
                 black_box(hash);
             })
         });
+
+        let id = format!("twox-hash/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let mut hasher = Old::with_seed(seed);
+                hasher.write(&data);
+                let hash = hasher.finish();
+                black_box(hash);
+            })
+        });
     }
 
     g.finish();

From 19848d0bb76dc04a95ea664a60d90aea16558962 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 13:09:03 -0400
Subject: [PATCH 012/166] const it more

---
 src/lib.rs | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index eadf5c4c8..fd939ba67 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
+#![no_std]
 #![deny(rust_2018_idioms)]
 
 use core::mem;
@@ -8,24 +9,33 @@ const PRIME64_3: u64 = 0x165667B19E3779F9;
 const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
 const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 
-#[derive(Default)]
 #[repr(align(32))]
 struct AlignedData([u8; 32]);
 
 impl AlignedData {
+    const fn new() -> Self {
+        Self([0; 32])
+    }
+
     const fn as_u64s(&self) -> &[u64; 4] {
         // SAFETY: We are guaranteed to be aligned
         unsafe { mem::transmute(&self.0) }
     }
 }
 
-#[derive(Default)]
 struct Buffer {
     offset: usize,
     data: AlignedData,
 }
 
 impl Buffer {
+    const fn new() -> Self {
+        Self {
+            offset: 0,
+            data: AlignedData::new(),
+        }
+    }
+
     fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) {
         if self.offset == 0 {
             return (None, data);
@@ -49,7 +59,9 @@ impl Buffer {
 
     fn set(&mut self, data: &[u8]) {
         let n_to_copy = data.len();
+
         debug_assert!(n_to_copy < self.data.0.len());
+
         self.data.0[..n_to_copy].copy_from_slice(data);
         self.offset = data.len();
     }
@@ -125,13 +137,13 @@ impl XxHash64 {
         this.finish()
     }
 
-    pub fn with_seed(seed: u64) -> Self {
+    pub const fn with_seed(seed: u64) -> Self {
         // Step 1. Initialize internal accumulators
 
         Self {
             seed,
             accumulators: Accumulators::new(seed),
-            buffer: Buffer::default(),
+            buffer: Buffer::new(),
             length: 0,
         }
     }

From c4fdb7f7dc8a4149302110d2b4a5dc697dd804e4 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 14:54:49 -0400
Subject: [PATCH 013/166] Document unchecked decision

---
 src/lib.rs | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index fd939ba67..1235ea1c6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,11 +10,17 @@ const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
 const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 
 #[repr(align(32))]
-struct AlignedData([u8; 32]);
+struct AlignedData([u8; Self::LEN]);
 
 impl AlignedData {
+    const LEN: usize = 32;
+
     const fn new() -> Self {
-        Self([0; 32])
+        Self([0; Self::LEN])
+    }
+
+    const fn len(&self) -> usize {
+        Self::LEN
     }
 
     const fn as_u64s(&self) -> &[u64; 4] {
@@ -37,20 +43,34 @@ impl Buffer {
     }
 
     fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) {
+        // Most of the slice methods we use here have `_unchecked` variants, but
+        //
+        // 1. this method is called one time per `XxHash64::write` call
+        // 2. this method early exits if we don't have anything in the buffer
+        //
+        // Because of this, removing the panics via `unsafe` doesn't
+        // have much benefit other than reducing code size by a tiny
+        // bit.
+
+        debug_assert!(self.offset <= self.data.len());
+
         if self.offset == 0 {
             return (None, data);
         };
 
-        let (_filled, empty) = self.data.0.split_at_mut(self.offset); // todo unchecked?
+        let empty = &mut self.data.0[self.offset..];
         let n_to_copy = usize::min(empty.len(), data.len());
 
         let dst = &mut empty[..n_to_copy];
+
         let (src, rest) = data.split_at(n_to_copy);
 
         dst.copy_from_slice(src);
         self.offset += n_to_copy;
 
-        if self.offset == self.data.0.len() {
+        debug_assert!(self.offset <= self.data.len());
+
+        if self.offset == self.data.len() {
             (Some(self.data.as_u64s()), rest)
         } else {
             (None, rest)
@@ -58,9 +78,11 @@ impl Buffer {
     }
 
     fn set(&mut self, data: &[u8]) {
+        debug_assert!([0, self.data.len()].contains(&self.offset));
+
         let n_to_copy = data.len();
 
-        debug_assert!(n_to_copy < self.data.0.len());
+        debug_assert!(n_to_copy < self.data.len());
 
         self.data.0[..n_to_copy].copy_from_slice(data);
         self.offset = data.len();

From ebebd1b0eb7a694254c4a97f1b07bc8d19d590f1 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 16:45:43 -0400
Subject: [PATCH 014/166] cleaning

---
 src/lib.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index 1235ea1c6..8479acb0d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,6 +3,7 @@
 
 use core::mem;
 
+// Keeping these constants in this form to match the C code.
 const PRIME64_1: u64 = 0x9E3779B185EBCA87;
 const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
 const PRIME64_3: u64 = 0x165667B19E3779F9;
@@ -153,12 +154,14 @@ pub struct XxHash64 {
 }
 
 impl XxHash64 {
+    #[must_use]
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
         let mut this = Self::with_seed(seed);
         this.write(data);
         this.finish()
     }
 
+    #[must_use]
     pub const fn with_seed(seed: u64) -> Self {
         // Step 1. Initialize internal accumulators
 

From bd9f19246fe1129a02674f51388ebf1bb0fbd524 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 16:49:06 -0400
Subject: [PATCH 015/166] Proptest oneshot methods

---
 compare/src/lib.rs | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 02947ab45..be6a7c411 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -4,17 +4,35 @@ use proptest::{num, prelude::*, test_runner::TestCaseResult};
 
 proptest! {
     #[test]
-    fn it_works(seed: u64, data: Vec<u8>) {
-        it_works_impl(seed, &data)?;
+    fn oneshot(seed: u64, data: Vec<u8>) {
+        oneshot_impl(seed, &data)?;
     }
 
     #[test]
-    fn it_works_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-        it_works_impl(seed, &data[offset..])?;
+    fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+        oneshot_impl(seed, &data[offset..])?;
     }
+
+    #[test]
+    fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
+        streaming_one_chunk_impl(seed, &data)?;
+    }
+
+    #[test]
+    fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+        streaming_one_chunk_impl(seed, &data[offset..])?;
+    }
+}
+
+fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+    let native = xx_hash_sys::Stream::oneshot(seed, data);
+    let rust = xx_renu::XxHash64::oneshot(seed, data);
+
+    prop_assert_eq!(native, rust);
+    Ok(())
 }
 
-fn it_works_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
     let native = {
         let mut hasher = xx_hash_sys::Stream::with_seed(seed);
         hasher.write(data);

From addb9acfd242dd6800e3d7bbaec686db543d8a4d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 24 Jun 2024 21:08:26 -0400
Subject: [PATCH 016/166] moar tests

---
 src/lib.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 80 insertions(+), 7 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 8479acb0d..c21b48e57 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,10 @@
 #![no_std]
 #![deny(rust_2018_idioms)]
 
-use core::mem;
+#[cfg(test)]
+extern crate std;
+
+use core::{fmt, mem, hash::Hasher};
 
 // Keeping these constants in this form to match the C code.
 const PRIME64_1: u64 = 0x9E3779B185EBCA87;
@@ -30,6 +33,13 @@ impl AlignedData {
     }
 }
 
+impl fmt::Debug for AlignedData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list().entries(self.0.iter()).finish()
+    }
+}
+
+#[derive(Debug)]
 struct Buffer {
     offset: usize,
     data: AlignedData,
@@ -72,6 +82,7 @@ impl Buffer {
         debug_assert!(self.offset <= self.data.len());
 
         if self.offset == self.data.len() {
+            self.offset = 0;
             (Some(self.data.as_u64s()), rest)
         } else {
             (None, rest)
@@ -79,7 +90,11 @@ impl Buffer {
     }
 
     fn set(&mut self, data: &[u8]) {
-        debug_assert!([0, self.data.len()].contains(&self.offset));
+        if data.is_empty() {
+            return;
+        }
+
+        debug_assert_eq!(self.offset, 0);
 
         let n_to_copy = data.len();
 
@@ -146,6 +161,19 @@ impl Accumulators {
     }
 }
 
+impl fmt::Debug for Accumulators {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let [acc1, acc2, acc3, acc4] = self.0;
+        f.debug_struct("Accumulators")
+            .field("acc1", &acc1)
+            .field("acc2", &acc2)
+            .field("acc3", &acc3)
+            .field("acc4", &acc4)
+            .finish()
+    }
+}
+
+#[derive(Debug)]
 pub struct XxHash64 {
     seed: u64,
     accumulators: Accumulators,
@@ -172,8 +200,10 @@ impl XxHash64 {
             length: 0,
         }
     }
+}
 
-    pub fn write(&mut self, data: &[u8]) {
+impl Hasher for XxHash64 {
+    fn write(&mut self, data: &[u8]) {
         let len = data.len();
 
         // Step 2. Process stripes
@@ -198,7 +228,8 @@ impl XxHash64 {
         self.length += len.into_u64();
     }
 
-    pub fn finish(&mut self) -> u64 {
+    #[must_use]
+    fn finish(&self) -> u64 {
         // Step 3. Accumulator convergence
         let mut acc = if self.length < 32 {
             self.seed.wrapping_add(PRIME64_5)
@@ -284,8 +315,27 @@ impl IntoU64 for usize {
 
 #[cfg(test)]
 mod test {
+    use core::array;
+
     use super::*;
 
+    #[test]
+    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
+        let bytes = [0x9c; 32];
+
+        let mut byte_by_byte = XxHash64::with_seed(0);
+        for byte in bytes.chunks(1) {
+            byte_by_byte.write(byte);
+        }
+        let byte_by_byte = byte_by_byte.finish();
+
+        let mut one_chunk = XxHash64::with_seed(0);
+        one_chunk.write(&bytes);
+        let one_chunk = one_chunk.finish();
+
+        assert_eq!(byte_by_byte, one_chunk);
+    }
+
     #[test]
     fn hash_of_nothing_matches_c_implementation() {
         let mut hasher = XxHash64::with_seed(0);
@@ -301,9 +351,32 @@ mod test {
     }
 
     #[test]
-    fn hash_of_exactly_32_bytes() {
+    fn hash_of_multiple_bytes_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(b"Hello, world!\0");
+        assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
+    }
+
+    #[test]
+    fn hash_of_multiple_chunks_matches_c_implementation() {
+        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
         let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&[0; 32]);
-        assert_eq!(hasher.finish(), 0xf6e9_be5d_7063_2cf5);
+        hasher.write(&bytes);
+        assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
+    }
+
+    #[test]
+    fn hash_with_different_seed_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
+        hasher.write(&[]);
+        assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
+    }
+
+    #[test]
+    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
+        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
+        hasher.write(&bytes);
+        assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
     }
 }

From 537f5f81d296ca70a6a2ded7d6da10524ad161fd Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 15:06:07 -0400
Subject: [PATCH 017/166] Simplify oneshot

---
 compare/src/lib.rs | 23 +++++++++++
 src/lib.rs         | 98 ++++++++++++++++++++++++++++------------------
 2 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index be6a7c411..3f5654e91 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -1,8 +1,19 @@
 #![cfg(test)]
 
 use proptest::{num, prelude::*, test_runner::TestCaseResult};
+use std::hash::Hasher;
 
 proptest! {
+    #[test]
+    fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
+        oneshot_same_as_one_chunk_impl(seed, &data)?;
+    }
+
+    #[test]
+    fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+        oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+    }
+
     #[test]
     fn oneshot(seed: u64, data: Vec<u8>) {
         oneshot_impl(seed, &data)?;
@@ -24,6 +35,18 @@ proptest! {
     }
 }
 
+fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+    let oneshot = xx_renu::XxHash64::oneshot(seed, data);
+    let one_chunk = {
+        let mut hasher = xx_renu::XxHash64::with_seed(seed);
+        hasher.write(data);
+        hasher.finish()
+    };
+
+    prop_assert_eq!(oneshot, one_chunk);
+    Ok(())
+}
+
 fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
     let native = xx_hash_sys::Stream::oneshot(seed, data);
     let rust = xx_renu::XxHash64::oneshot(seed, data);
diff --git a/src/lib.rs b/src/lib.rs
index c21b48e57..2fbda2761 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,7 +4,7 @@
 #[cfg(test)]
 extern crate std;
 
-use core::{fmt, mem, hash::Hasher};
+use core::{fmt, hash::Hasher, mem};
 
 // Keeping these constants in this form to match the C code.
 const PRIME64_1: u64 = 0x9E3779B185EBCA87;
@@ -132,6 +132,17 @@ impl Accumulators {
         *acc4 = round(*acc4, lane4);
     }
 
+    fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
+        while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
+            // SAFETY: We have the right number of bytes and are
+            // handling the unaligned case.
+            let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
+            self.write(lanes);
+            data = rest;
+        }
+        data
+    }
+
     const fn finish(&self) -> u64 {
         let [acc1, acc2, acc3, acc4] = self.0;
 
@@ -181,12 +192,22 @@ pub struct XxHash64 {
     length: u64,
 }
 
+impl Default for XxHash64 {
+    fn default() -> Self {
+        Self::with_seed(0)
+    }
+}
+
 impl XxHash64 {
     #[must_use]
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
-        let mut this = Self::with_seed(seed);
-        this.write(data);
-        this.finish()
+        let len = data.len();
+
+        let mut accumulators = Accumulators::new(seed);
+
+        let data = accumulators.write_many(data);
+
+        Self::finish_with(seed, len.into_u64(), &accumulators, data)
     }
 
     #[must_use]
@@ -200,49 +221,19 @@ impl XxHash64 {
             length: 0,
         }
     }
-}
 
-impl Hasher for XxHash64 {
-    fn write(&mut self, data: &[u8]) {
-        let len = data.len();
-
-        // Step 2. Process stripes
-        let (buffered_lanes, data) = self.buffer.extend(data);
-
-        if let Some(&lanes) = buffered_lanes {
-            self.accumulators.write(lanes);
-        }
-
-        let mut data = data;
-        while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
-            // SAFETY: We have the right number of bytes and are
-            // handling the unaligned case.
-            let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
-            self.accumulators.write(lanes);
-            data = rest;
-        }
-        let data = data;
-
-        self.buffer.set(data);
-
-        self.length += len.into_u64();
-    }
-
-    #[must_use]
-    fn finish(&self) -> u64 {
+    fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
         // Step 3. Accumulator convergence
-        let mut acc = if self.length < 32 {
-            self.seed.wrapping_add(PRIME64_5)
+        let mut acc = if len < 32 {
+            seed.wrapping_add(PRIME64_5)
         } else {
-            self.accumulators.finish()
+            accumulators.finish()
         };
 
         // Step 4. Add input length
-        acc += self.length;
+        acc += len;
 
         // Step 5. Consume remaining input
-        let mut remaining = self.buffer.remaining();
-
         while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() {
             let lane = u64::from_ne_bytes(*chunk);
             // todo: little-endian
@@ -284,6 +275,35 @@ impl Hasher for XxHash64 {
     }
 }
 
+impl Hasher for XxHash64 {
+    fn write(&mut self, data: &[u8]) {
+        let len = data.len();
+
+        // Step 2. Process stripes
+        let (buffered_lanes, data) = self.buffer.extend(data);
+
+        if let Some(&lanes) = buffered_lanes {
+            self.accumulators.write(lanes);
+        }
+
+        let data = self.accumulators.write_many(data);
+
+        self.buffer.set(data);
+
+        self.length += len.into_u64();
+    }
+
+    #[must_use]
+    fn finish(&self) -> u64 {
+        Self::finish_with(
+            self.seed,
+            self.length,
+            &self.accumulators,
+            self.buffer.remaining(),
+        )
+    }
+}
+
 const fn round(mut acc: u64, lane: u64) -> u64 {
     acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
     acc = acc.rotate_left(31);

From 8012bef79f47fed1bb748d9b0b9036f44c2721ea Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 16:05:17 -0400
Subject: [PATCH 018/166] inline it

---
 compare/benches/benchmark.rs | 76 ++++++++++++++++++++++++++++++------
 src/lib.rs                   |  9 ++++-
 2 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index ae0ea4964..6cb801573 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -6,8 +6,63 @@ use twox_hash::XxHash64 as Old;
 use xx_hash_sys::Stream;
 use xx_renu::XxHash64;
 
+const TINY_DATA_SIZE: usize = 32;
+const BIG_DATA_SIZE: usize = 100 * 1024 * 1024;
+
+fn tiny_data(c: &mut Criterion) {
+    let (seed, data) = gen_data(TINY_DATA_SIZE);
+    let mut g = c.benchmark_group("tiny_data");
+
+    for size in 0..=data.len() {
+        let data = &data[..size];
+        g.throughput(Throughput::Bytes(data.len() as _));
+
+        let id = format!("xxHash/oneshot/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let hash = Stream::oneshot(seed, data);
+                black_box(hash);
+            })
+        });
+
+        let id = format!("xxHash/streaming/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let hash = {
+                    let mut hasher = Stream::with_seed(seed);
+                    hasher.write(data);
+                    hasher.finish()
+                };
+                black_box(hash);
+            })
+        });
+
+        let id = format!("renu/oneshot/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let hash = XxHash64::oneshot(seed, data);
+                black_box(hash);
+            })
+        });
+
+        let id = format!("renu/streaming/{size}");
+        g.bench_function(id, |b| {
+            b.iter(|| {
+                let hash = {
+                    let mut hasher = XxHash64::with_seed(seed);
+                    hasher.write(data);
+                    hasher.finish()
+                };
+                black_box(hash);
+            })
+        });
+    }
+
+    g.finish();
+}
+
 fn oneshot(c: &mut Criterion) {
-    let (seed, data) = gen_data();
+    let (seed, data) = gen_data(BIG_DATA_SIZE);
     let mut g = c.benchmark_group("oneshot");
 
     for size in half_sizes(&data).take(10) {
@@ -17,7 +72,7 @@ fn oneshot(c: &mut Criterion) {
         let id = format!("xxHash/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = Stream::oneshot(seed, &data);
+                let hash = Stream::oneshot(seed, data);
                 black_box(hash);
             })
         });
@@ -25,7 +80,7 @@ fn oneshot(c: &mut Criterion) {
         let id = format!("renu/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = XxHash64::oneshot(seed, &data);
+                let hash = XxHash64::oneshot(seed, data);
                 black_box(hash);
             })
         });
@@ -35,7 +90,7 @@ fn oneshot(c: &mut Criterion) {
 }
 
 fn streaming_one_chunk(c: &mut Criterion) {
-    let (seed, data) = gen_data();
+    let (seed, data) = gen_data(BIG_DATA_SIZE);
     let mut g = c.benchmark_group("streaming_one_chunk");
 
     for size in half_sizes(&data).take(10) {
@@ -46,7 +101,7 @@ fn streaming_one_chunk(c: &mut Criterion) {
         g.bench_function(id, |b| {
             b.iter(|| {
                 let mut hasher = Stream::with_seed(seed);
-                hasher.write(&data);
+                hasher.write(data);
                 let hash = hasher.finish();
                 black_box(hash);
             })
@@ -56,7 +111,7 @@ fn streaming_one_chunk(c: &mut Criterion) {
         g.bench_function(id, |b| {
             b.iter(|| {
                 let mut hasher = XxHash64::with_seed(seed);
-                hasher.write(&data);
+                hasher.write(data);
                 let hash = hasher.finish();
                 black_box(hash);
             })
@@ -66,7 +121,7 @@ fn streaming_one_chunk(c: &mut Criterion) {
         g.bench_function(id, |b| {
             b.iter(|| {
                 let mut hasher = Old::with_seed(seed);
-                hasher.write(&data);
+                hasher.write(data);
                 let hash = hasher.finish();
                 black_box(hash);
             })
@@ -77,14 +132,13 @@ fn streaming_one_chunk(c: &mut Criterion) {
 }
 
 const SEED: u64 = 0xc651_4843_1995_363f;
-const DATA_SIZE: usize = 100 * 1024 * 1024;
 
-fn gen_data() -> (u64, Vec<u8>) {
+fn gen_data(length: usize) -> (u64, Vec<u8>) {
     let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
 
     let seed = rng.gen();
 
-    let mut data = vec![0; DATA_SIZE];
+    let mut data = vec![0; length];
     rng.fill_bytes(&mut data);
 
     (seed, data)
@@ -103,5 +157,5 @@ fn half_sizes(data: &[u8]) -> impl Iterator<Item = usize> {
     )
 }
 
-criterion_group!(benches, oneshot, streaming_one_chunk);
+criterion_group!(benches, tiny_data, oneshot, streaming_one_chunk);
 criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
index 2fbda2761..9386c68e0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -199,10 +199,16 @@ impl Default for XxHash64 {
 }
 
 impl XxHash64 {
+    /// Hash all data at once. If you can use this function, you may
+    /// see noticable speed gains for certain types of input.
     #[must_use]
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
         let len = data.len();
 
+        // Notably, since we know that there's no more data coming, we
+        // don't need to construct the intermediate buffers or copy
+        // data to / from them.
+
         let mut accumulators = Accumulators::new(seed);
 
         let data = accumulators.write_many(data);
@@ -213,7 +219,6 @@ impl XxHash64 {
     #[must_use]
     pub const fn with_seed(seed: u64) -> Self {
         // Step 1. Initialize internal accumulators
-
         Self {
             seed,
             accumulators: Accumulators::new(seed),
@@ -222,6 +227,8 @@ impl XxHash64 {
         }
     }
 
+    #[must_use]
+    #[inline(always)]
     fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
         // Step 3. Accumulator convergence
         let mut acc = if len < 32 {

From 8233b3669bf6c49ada4697d16ec5901e94f3ce37 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 16:07:12 -0400
Subject: [PATCH 019/166] add little endian

---
 src/lib.rs | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 9386c68e0..c89c430bf 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -124,12 +124,11 @@ impl Accumulators {
     fn write(&mut self, lanes: [u64; 4]) {
         let [acc1, acc2, acc3, acc4] = &mut self.0;
         let [lane1, lane2, lane3, lane4] = lanes;
-        // todo: little-endian transform
 
-        *acc1 = round(*acc1, lane1);
-        *acc2 = round(*acc2, lane2);
-        *acc3 = round(*acc3, lane3);
-        *acc4 = round(*acc4, lane4);
+        *acc1 = round(*acc1, lane1.to_le());
+        *acc2 = round(*acc2, lane2.to_le());
+        *acc3 = round(*acc3, lane3.to_le());
+        *acc4 = round(*acc4, lane4.to_le());
     }
 
     fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
@@ -242,8 +241,7 @@ impl XxHash64 {
 
         // Step 5. Consume remaining input
         while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() {
-            let lane = u64::from_ne_bytes(*chunk);
-            // todo: little-endian
+            let lane = u64::from_ne_bytes(*chunk).to_le();
 
             acc ^= round(0, lane);
             acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
@@ -252,8 +250,7 @@ impl XxHash64 {
         }
 
         while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() {
-            let lane = u32::from_ne_bytes(*chunk).into_u64();
-            // todo: little-endian
+            let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
 
             acc ^= lane.wrapping_mul(PRIME64_1);
             acc = acc.rotate_left(23).wrapping_mul(PRIME64_2);

From 542a9cf06f734eb42ac13dbc7c0c1ae75f9aff2d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 16:11:00 -0400
Subject: [PATCH 020/166] std and serialize impls

---
 Cargo.toml |  14 ++++
 src/lib.rs | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 195 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 09a2d4f94..818b3be61 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,3 +9,17 @@ members = [
     "renu-sum",
     "xx_hash-sys",
 ]
+
+[features]
+default = ["std"]
+
+std = ["dep:rand"]
+
+serialize = ["dep:serde"]
+
+[dependencies]
+rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
+serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
+
+[dev-dependencies]
+serde_json = "1.0.117"
diff --git a/src/lib.rs b/src/lib.rs
index c89c430bf..8a9ee551d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,7 @@ const PRIME64_3: u64 = 0x165667B19E3779F9;
 const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
 const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 
+#[derive(PartialEq)]
 #[repr(align(32))]
 struct AlignedData([u8; Self::LEN]);
 
@@ -39,7 +40,7 @@ impl fmt::Debug for AlignedData {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 struct Buffer {
     offset: usize,
     data: AlignedData,
@@ -109,6 +110,7 @@ impl Buffer {
     }
 }
 
+#[derive(PartialEq)]
 struct Accumulators([u64; 4]);
 
 impl Accumulators {
@@ -183,7 +185,7 @@ impl fmt::Debug for Accumulators {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub struct XxHash64 {
     seed: u64,
     accumulators: Accumulators,
@@ -404,3 +406,180 @@ mod test {
         assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
     }
 }
+
+#[cfg(feature = "std")]
+mod std_impl {
+    use core::hash::BuildHasher;
+
+    use super::*;
+
+    pub struct RandomXxHashBuilder64(u64);
+
+    impl Default for RandomXxHashBuilder64 {
+        fn default() -> Self {
+            Self::new()
+        }
+    }
+
+    impl RandomXxHashBuilder64 {
+        fn new() -> Self {
+            Self(rand::random())
+        }
+    }
+
+    impl BuildHasher for RandomXxHashBuilder64 {
+        type Hasher = XxHash64;
+
+        fn build_hasher(&self) -> Self::Hasher {
+            XxHash64::with_seed(self.0)
+        }
+    }
+
+    #[cfg(test)]
+    mod test {
+        use core::hash::BuildHasherDefault;
+        use std::collections::HashMap;
+
+        use super::*;
+
+        #[test]
+        fn can_be_used_in_a_hashmap_with_a_default_seed() {
+            let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+            hash.insert(42, "the answer");
+            assert_eq!(hash.get(&42), Some(&"the answer"));
+        }
+
+        #[test]
+        fn can_be_used_in_a_hashmap_with_a_random_seed() {
+            let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
+            hash.insert(42, "the answer");
+            assert_eq!(hash.get(&42), Some(&"the answer"));
+        }
+    }
+}
+
+#[cfg(feature = "serialize")]
+mod serialize_impl {
+    use serde::{Deserialize, Serialize};
+
+    use super::*;
+
+    impl<'de> Deserialize<'de> for XxHash64 {
+        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: serde::Deserializer<'de>,
+        {
+            let shim = Deserialize::deserialize(deserializer)?;
+
+            let Shim {
+                total_len,
+                seed,
+                core,
+                buffer,
+                buffer_usage,
+            } = shim;
+            let Core { v1, v2, v3, v4 } = core;
+
+            Ok(XxHash64 {
+                seed,
+                accumulators: Accumulators([v1, v2, v3, v4]),
+                buffer: Buffer {
+                    offset: buffer_usage,
+                    data: AlignedData(buffer),
+                },
+                length: total_len,
+            })
+        }
+    }
+
+    impl Serialize for XxHash64 {
+        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+        where
+            S: serde::Serializer,
+        {
+            let XxHash64 {
+                seed,
+                ref accumulators,
+                ref buffer,
+                length,
+            } = *self;
+            let [v1, v2, v3, v4] = accumulators.0;
+            let Buffer { offset, ref data } = *buffer;
+
+            let shim = Shim {
+                total_len: length,
+                seed,
+                core: Core { v1, v2, v3, v4 },
+                buffer: data.0,
+                buffer_usage: offset,
+            };
+
+            shim.serialize(serializer)
+        }
+    }
+
+    #[derive(Serialize, Deserialize)]
+    struct Shim {
+        total_len: u64,
+        seed: u64,
+        core: Core,
+        buffer: [u8; 32],
+        buffer_usage: usize,
+    }
+
+    #[derive(Serialize, Deserialize)]
+    struct Core {
+        v1: u64,
+        v2: u64,
+        v3: u64,
+        v4: u64,
+    }
+
+    #[cfg(test)]
+    mod test {
+        use super::*;
+
+        type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
+
+        #[test]
+        fn test_serialization_cycle() -> Result {
+            let mut hasher = XxHash64::with_seed(0);
+            hasher.write(b"Hello, world!\0");
+            hasher.finish();
+
+            let serialized = serde_json::to_string(&hasher)?;
+            let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
+            assert_eq!(hasher, unserialized);
+            Ok(())
+        }
+
+        #[test]
+        fn test_serialization_stability() -> Result {
+            let mut hasher = XxHash64::with_seed(0);
+            hasher.write(b"Hello, world!\0");
+            hasher.finish();
+
+            let serialized = r#"{
+                    "total_len": 14,
+                    "seed": 0,
+                    "core": {
+                      "v1": 6983438078262162902,
+                      "v2": 14029467366897019727,
+                      "v3": 0,
+                      "v4": 7046029288634856825
+                    },
+                    "buffer": [
+                      72,  101, 108, 108, 111, 44, 32, 119,
+                      111, 114, 108, 100, 33,  0,  0,  0,
+                      0,   0,   0,   0,   0,   0,  0,  0,
+                      0,   0,   0,   0,   0,   0,  0,  0
+                    ],
+                    "buffer_usage": 14
+                }"#;
+
+            let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
+            assert_eq!(hasher, unserialized);
+            Ok(())
+        }
+    }
+}

From 678f579feac3cbea8c0d0bd726b7ec9d1499679a Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 16:49:08 -0400
Subject: [PATCH 021/166] moar

---
 src/lib.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 8a9ee551d..a0b6cd48f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,7 +14,7 @@ const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
 const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 
 #[derive(PartialEq)]
-#[repr(align(32))]
+#[repr(align(8))]
 struct AlignedData([u8; Self::LEN]);
 
 impl AlignedData {
@@ -203,12 +203,13 @@ impl XxHash64 {
     /// Hash all data at once. If you can use this function, you may
     /// see noticable speed gains for certain types of input.
     #[must_use]
+    #[inline]
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
         let len = data.len();
 
-        // Notably, since we know that there's no more data coming, we
-        // don't need to construct the intermediate buffers or copy
-        // data to / from them.
+        // Since we know that there's no more data coming, we don't
+        // need to construct the intermediate buffers or copy data to
+        // or from the buffers.
 
         let mut accumulators = Accumulators::new(seed);
 
@@ -458,6 +459,9 @@ mod std_impl {
     }
 }
 
+#[cfg(feature = "std")]
+pub use std_impl::RandomXxHashBuilder64;
+
 #[cfg(feature = "serialize")]
 mod serialize_impl {
     use serde::{Deserialize, Serialize};

From 006bd68c0ff5c4ef2543c4eac748a7cc7ed7f529 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 21:11:55 -0400
Subject: [PATCH 022/166] tweaks

---
 compare/src/lib.rs | 26 +++++++++++++++++++++++
 src/lib.rs         | 53 ++++++++++++++++++++++++++--------------------
 2 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 3f5654e91..c717eff7b 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -14,6 +14,11 @@ proptest! {
         oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
     }
 
+    #[test]
+    fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
+        oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+    }
+
     #[test]
     fn oneshot(seed: u64, data: Vec<u8>) {
         oneshot_impl(seed, &data)?;
@@ -47,6 +52,20 @@ fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
     Ok(())
 }
 
+fn oneshot_same_as_many_chunks_impl(seed: u64, data: &[u8], chunks: &[Vec<u8>]) -> TestCaseResult {
+    let oneshot = xx_renu::XxHash64::oneshot(seed, data);
+    let many_chunks = {
+        let mut hasher = xx_renu::XxHash64::with_seed(seed);
+        for chunk in chunks {
+            hasher.write(chunk);
+        }
+        hasher.finish()
+    };
+
+    prop_assert_eq!(oneshot, many_chunks);
+    Ok(())
+}
+
 fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
     let native = xx_hash_sys::Stream::oneshot(seed, data);
     let rust = xx_renu::XxHash64::oneshot(seed, data);
@@ -78,3 +97,10 @@ fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
         (Just(vec), 0..len)
     })
 }
+
+fn data_and_chunks() -> impl Strategy<Value = (Vec<u8>, Vec<Vec<u8>>)> {
+    prop::collection::vec(prop::collection::vec(num::u8::ANY, 90..=100), 90..=100).prop_map(|vs| {
+        let data = vs.iter().flatten().copied().collect();
+        (data, vs)
+    })
+}
diff --git a/src/lib.rs b/src/lib.rs
index a0b6cd48f..d0eb2ec73 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,27 +14,27 @@ const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
 const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 
 #[derive(PartialEq)]
-#[repr(align(8))]
-struct AlignedData([u8; Self::LEN]);
-
-impl AlignedData {
-    const LEN: usize = 32;
+struct BufferData([u64; 4]);
 
+impl BufferData {
     const fn new() -> Self {
-        Self([0; Self::LEN])
+        Self([0; 4])
     }
 
-    const fn len(&self) -> usize {
-        Self::LEN
+    fn bytes(&self) -> &[u8; 32] {
+        const { assert!(mem::align_of::<u8>() <= mem::align_of::<u64>()) }
+        // SAFETY[bytes]: The alignment of `u64` is at least that of
+        // `u8` and all the values are initialized.
+        unsafe { &*self.0.as_ptr().cast() }
     }
 
-    const fn as_u64s(&self) -> &[u64; 4] {
-        // SAFETY: We are guaranteed to be aligned
-        unsafe { mem::transmute(&self.0) }
+    fn bytes_mut(&mut self) -> &mut [u8; 32] {
+        // SAFETY: See SAFETY[bytes]
+        unsafe { &mut *self.0.as_mut_ptr().cast() }
     }
 }
 
-impl fmt::Debug for AlignedData {
+impl fmt::Debug for BufferData {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_list().entries(self.0.iter()).finish()
     }
@@ -43,14 +43,14 @@ impl fmt::Debug for AlignedData {
 #[derive(Debug, PartialEq)]
 struct Buffer {
     offset: usize,
-    data: AlignedData,
+    data: BufferData,
 }
 
 impl Buffer {
     const fn new() -> Self {
         Self {
             offset: 0,
-            data: AlignedData::new(),
+            data: BufferData::new(),
         }
     }
 
@@ -64,13 +64,14 @@ impl Buffer {
         // have much benefit other than reducing code size by a tiny
         // bit.
 
-        debug_assert!(self.offset <= self.data.len());
-
         if self.offset == 0 {
             return (None, data);
         };
 
-        let empty = &mut self.data.0[self.offset..];
+        let bytes = self.data.bytes_mut();
+        debug_assert!(self.offset <= bytes.len());
+
+        let empty = &mut bytes[self.offset..];
         let n_to_copy = usize::min(empty.len(), data.len());
 
         let dst = &mut empty[..n_to_copy];
@@ -80,11 +81,11 @@ impl Buffer {
         dst.copy_from_slice(src);
         self.offset += n_to_copy;
 
-        debug_assert!(self.offset <= self.data.len());
+        debug_assert!(self.offset <= bytes.len());
 
-        if self.offset == self.data.len() {
+        if self.offset == bytes.len() {
             self.offset = 0;
-            (Some(self.data.as_u64s()), rest)
+            (Some(&self.data.0), rest)
         } else {
             (None, rest)
         }
@@ -99,14 +100,15 @@ impl Buffer {
 
         let n_to_copy = data.len();
 
-        debug_assert!(n_to_copy < self.data.len());
+        let bytes = self.data.bytes_mut();
+        debug_assert!(n_to_copy < bytes.len());
 
-        self.data.0[..n_to_copy].copy_from_slice(data);
+        bytes[..n_to_copy].copy_from_slice(data);
         self.offset = data.len();
     }
 
     fn remaining(&self) -> &[u8] {
-        &self.data.0[..self.offset]
+        &self.data.bytes()[..self.offset]
     }
 }
 
@@ -203,6 +205,10 @@ impl XxHash64 {
     /// Hash all data at once. If you can use this function, you may
     /// see noticable speed gains for certain types of input.
     #[must_use]
+    // RATIONALE[inline]: In one case [1], this `inline` helps unlock a
+    // speedup from ~900µs to ~200µs.
+    //
+    // [1]: https://github.com/apache/datafusion-comet/pull/575
     #[inline]
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
         let len = data.len();
@@ -230,6 +236,7 @@ impl XxHash64 {
     }
 
     #[must_use]
+    // RATIONALE: See RATIONALE[inline]
     #[inline(always)]
     fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
         // Step 3. Accumulator convergence

From efd13bd29d1dd0e42fc05a0b522fcb2ba24b6358 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 25 Jun 2024 21:13:08 -0400
Subject: [PATCH 023/166] simpelr

---
 src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index d0eb2ec73..db413b9bd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,7 +22,7 @@ impl BufferData {
     }
 
     fn bytes(&self) -> &[u8; 32] {
-        const { assert!(mem::align_of::<u8>() <= mem::align_of::<u64>()) }
+        const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<u64>());
         // SAFETY[bytes]: The alignment of `u64` is at least that of
         // `u8` and all the values are initialized.
         unsafe { &*self.0.as_ptr().cast() }

From 3f722df12e64d06521a147b0f488ba9a52b5b88e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 26 Jun 2024 14:25:08 -0400
Subject: [PATCH 024/166] to-test

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 000000000..1eca71ca2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+cargo test # unit tests
+cargo test -p comparison # proptests
+cargo miri test # unsafe
+cargo miri test --target s390x-unknown-linux-gnu # big-endian

From ae7b3884443c584dc0a84806f773fea09c0c004e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 26 Jun 2024 14:30:52 -0400
Subject: [PATCH 025/166] moar

---
 README.md          | 6 ++++++
 compare/src/lib.rs | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1eca71ca2..f2f88dd86 100644
--- a/README.md
+++ b/README.md
@@ -2,3 +2,9 @@ cargo test # unit tests
 cargo test -p comparison # proptests
 cargo miri test # unsafe
 cargo miri test --target s390x-unknown-linux-gnu # big-endian
+
+minimal versions
+no-features
+all-features
+
+features for 32 / 64 / xx3
diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index c717eff7b..6e1a4e8be 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -99,7 +99,7 @@ fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
 }
 
 fn data_and_chunks() -> impl Strategy<Value = (Vec<u8>, Vec<Vec<u8>>)> {
-    prop::collection::vec(prop::collection::vec(num::u8::ANY, 90..=100), 90..=100).prop_map(|vs| {
+    prop::collection::vec(prop::collection::vec(num::u8::ANY, 0..100), 0..100).prop_map(|vs| {
         let data = vs.iter().flatten().copied().collect();
         (data, vs)
     })

From 5d455ff31a0ff398923d7cc40c0cf317ef3c6db9 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 26 Jun 2024 18:38:39 -0400
Subject: [PATCH 026/166] move to new file

---
 Cargo.toml      |   4 +-
 src/lib.rs      | 593 +----------------------------------------------
 src/xxhash64.rs | 594 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 601 insertions(+), 590 deletions(-)
 create mode 100644 src/xxhash64.rs

diff --git a/Cargo.toml b/Cargo.toml
index 818b3be61..4e91fc7cd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,12 +11,14 @@ members = [
 ]
 
 [features]
-default = ["std"]
+default = ["std", "xxhash64"]
 
 std = ["dep:rand"]
 
 serialize = ["dep:serde"]
 
+xxhash64 = []
+
 [dependencies]
 rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
 serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
diff --git a/src/lib.rs b/src/lib.rs
index db413b9bd..fde3a8e96 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,593 +4,8 @@
 #[cfg(test)]
 extern crate std;
 
-use core::{fmt, hash::Hasher, mem};
+#[cfg(feature = "xxhash64")]
+mod xxhash64;
 
-// Keeping these constants in this form to match the C code.
-const PRIME64_1: u64 = 0x9E3779B185EBCA87;
-const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
-const PRIME64_3: u64 = 0x165667B19E3779F9;
-const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
-const PRIME64_5: u64 = 0x27D4EB2F165667C5;
-
-#[derive(PartialEq)]
-struct BufferData([u64; 4]);
-
-impl BufferData {
-    const fn new() -> Self {
-        Self([0; 4])
-    }
-
-    fn bytes(&self) -> &[u8; 32] {
-        const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<u64>());
-        // SAFETY[bytes]: The alignment of `u64` is at least that of
-        // `u8` and all the values are initialized.
-        unsafe { &*self.0.as_ptr().cast() }
-    }
-
-    fn bytes_mut(&mut self) -> &mut [u8; 32] {
-        // SAFETY: See SAFETY[bytes]
-        unsafe { &mut *self.0.as_mut_ptr().cast() }
-    }
-}
-
-impl fmt::Debug for BufferData {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_list().entries(self.0.iter()).finish()
-    }
-}
-
-#[derive(Debug, PartialEq)]
-struct Buffer {
-    offset: usize,
-    data: BufferData,
-}
-
-impl Buffer {
-    const fn new() -> Self {
-        Self {
-            offset: 0,
-            data: BufferData::new(),
-        }
-    }
-
-    fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) {
-        // Most of the slice methods we use here have `_unchecked` variants, but
-        //
-        // 1. this method is called one time per `XxHash64::write` call
-        // 2. this method early exits if we don't have anything in the buffer
-        //
-        // Because of this, removing the panics via `unsafe` doesn't
-        // have much benefit other than reducing code size by a tiny
-        // bit.
-
-        if self.offset == 0 {
-            return (None, data);
-        };
-
-        let bytes = self.data.bytes_mut();
-        debug_assert!(self.offset <= bytes.len());
-
-        let empty = &mut bytes[self.offset..];
-        let n_to_copy = usize::min(empty.len(), data.len());
-
-        let dst = &mut empty[..n_to_copy];
-
-        let (src, rest) = data.split_at(n_to_copy);
-
-        dst.copy_from_slice(src);
-        self.offset += n_to_copy;
-
-        debug_assert!(self.offset <= bytes.len());
-
-        if self.offset == bytes.len() {
-            self.offset = 0;
-            (Some(&self.data.0), rest)
-        } else {
-            (None, rest)
-        }
-    }
-
-    fn set(&mut self, data: &[u8]) {
-        if data.is_empty() {
-            return;
-        }
-
-        debug_assert_eq!(self.offset, 0);
-
-        let n_to_copy = data.len();
-
-        let bytes = self.data.bytes_mut();
-        debug_assert!(n_to_copy < bytes.len());
-
-        bytes[..n_to_copy].copy_from_slice(data);
-        self.offset = data.len();
-    }
-
-    fn remaining(&self) -> &[u8] {
-        &self.data.bytes()[..self.offset]
-    }
-}
-
-#[derive(PartialEq)]
-struct Accumulators([u64; 4]);
-
-impl Accumulators {
-    const fn new(seed: u64) -> Self {
-        Self([
-            seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
-            seed.wrapping_add(PRIME64_2),
-            seed,
-            seed.wrapping_sub(PRIME64_1),
-        ])
-    }
-
-    fn write(&mut self, lanes: [u64; 4]) {
-        let [acc1, acc2, acc3, acc4] = &mut self.0;
-        let [lane1, lane2, lane3, lane4] = lanes;
-
-        *acc1 = round(*acc1, lane1.to_le());
-        *acc2 = round(*acc2, lane2.to_le());
-        *acc3 = round(*acc3, lane3.to_le());
-        *acc4 = round(*acc4, lane4.to_le());
-    }
-
-    fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
-        while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
-            // SAFETY: We have the right number of bytes and are
-            // handling the unaligned case.
-            let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
-            self.write(lanes);
-            data = rest;
-        }
-        data
-    }
-
-    const fn finish(&self) -> u64 {
-        let [acc1, acc2, acc3, acc4] = self.0;
-
-        let mut acc = {
-            let acc1 = acc1.rotate_left(1);
-            let acc2 = acc2.rotate_left(7);
-            let acc3 = acc3.rotate_left(12);
-            let acc4 = acc4.rotate_left(18);
-
-            acc1.wrapping_add(acc2)
-                .wrapping_add(acc3)
-                .wrapping_add(acc4)
-        };
-
-        acc = Self::merge_accumulator(acc, acc1);
-        acc = Self::merge_accumulator(acc, acc2);
-        acc = Self::merge_accumulator(acc, acc3);
-        acc = Self::merge_accumulator(acc, acc4);
-
-        acc
-    }
-
-    const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
-        acc ^= round(0, acc_n);
-        acc = acc.wrapping_mul(PRIME64_1);
-        acc.wrapping_add(PRIME64_4)
-    }
-}
-
-impl fmt::Debug for Accumulators {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let [acc1, acc2, acc3, acc4] = self.0;
-        f.debug_struct("Accumulators")
-            .field("acc1", &acc1)
-            .field("acc2", &acc2)
-            .field("acc3", &acc3)
-            .field("acc4", &acc4)
-            .finish()
-    }
-}
-
-#[derive(Debug, PartialEq)]
-pub struct XxHash64 {
-    seed: u64,
-    accumulators: Accumulators,
-    buffer: Buffer,
-    length: u64,
-}
-
-impl Default for XxHash64 {
-    fn default() -> Self {
-        Self::with_seed(0)
-    }
-}
-
-impl XxHash64 {
-    /// Hash all data at once. If you can use this function, you may
-    /// see noticable speed gains for certain types of input.
-    #[must_use]
-    // RATIONALE[inline]: In one case [1], this `inline` helps unlock a
-    // speedup from ~900µs to ~200µs.
-    //
-    // [1]: https://github.com/apache/datafusion-comet/pull/575
-    #[inline]
-    pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
-        let len = data.len();
-
-        // Since we know that there's no more data coming, we don't
-        // need to construct the intermediate buffers or copy data to
-        // or from the buffers.
-
-        let mut accumulators = Accumulators::new(seed);
-
-        let data = accumulators.write_many(data);
-
-        Self::finish_with(seed, len.into_u64(), &accumulators, data)
-    }
-
-    #[must_use]
-    pub const fn with_seed(seed: u64) -> Self {
-        // Step 1. Initialize internal accumulators
-        Self {
-            seed,
-            accumulators: Accumulators::new(seed),
-            buffer: Buffer::new(),
-            length: 0,
-        }
-    }
-
-    #[must_use]
-    // RATIONALE: See RATIONALE[inline]
-    #[inline(always)]
-    fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
-        // Step 3. Accumulator convergence
-        let mut acc = if len < 32 {
-            seed.wrapping_add(PRIME64_5)
-        } else {
-            accumulators.finish()
-        };
-
-        // Step 4. Add input length
-        acc += len;
-
-        // Step 5. Consume remaining input
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() {
-            let lane = u64::from_ne_bytes(*chunk).to_le();
-
-            acc ^= round(0, lane);
-            acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
-            acc = acc.wrapping_add(PRIME64_4);
-            remaining = rest;
-        }
-
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() {
-            let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
-
-            acc ^= lane.wrapping_mul(PRIME64_1);
-            acc = acc.rotate_left(23).wrapping_mul(PRIME64_2);
-            acc = acc.wrapping_add(PRIME64_3);
-
-            remaining = rest;
-        }
-
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() {
-            let lane = chunk[0].into_u64();
-
-            acc ^= lane.wrapping_mul(PRIME64_5);
-            acc = acc.rotate_left(11).wrapping_mul(PRIME64_1);
-
-            remaining = rest;
-        }
-
-        // Step 6. Final mix (avalanche)
-        acc ^= acc >> 33;
-        acc = acc.wrapping_mul(PRIME64_2);
-        acc ^= acc >> 29;
-        acc = acc.wrapping_mul(PRIME64_3);
-        acc ^= acc >> 32;
-
-        acc
-    }
-}
-
-impl Hasher for XxHash64 {
-    fn write(&mut self, data: &[u8]) {
-        let len = data.len();
-
-        // Step 2. Process stripes
-        let (buffered_lanes, data) = self.buffer.extend(data);
-
-        if let Some(&lanes) = buffered_lanes {
-            self.accumulators.write(lanes);
-        }
-
-        let data = self.accumulators.write_many(data);
-
-        self.buffer.set(data);
-
-        self.length += len.into_u64();
-    }
-
-    #[must_use]
-    fn finish(&self) -> u64 {
-        Self::finish_with(
-            self.seed,
-            self.length,
-            &self.accumulators,
-            self.buffer.remaining(),
-        )
-    }
-}
-
-const fn round(mut acc: u64, lane: u64) -> u64 {
-    acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
-    acc = acc.rotate_left(31);
-    acc.wrapping_mul(PRIME64_1)
-}
-
-trait IntoU64 {
-    fn into_u64(self) -> u64;
-}
-
-impl IntoU64 for u8 {
-    fn into_u64(self) -> u64 {
-        self.into()
-    }
-}
-
-impl IntoU64 for u32 {
-    fn into_u64(self) -> u64 {
-        self.into()
-    }
-}
-
-#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-impl IntoU64 for usize {
-    fn into_u64(self) -> u64 {
-        self as u64
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use core::array;
-
-    use super::*;
-
-    #[test]
-    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
-        let bytes = [0x9c; 32];
-
-        let mut byte_by_byte = XxHash64::with_seed(0);
-        for byte in bytes.chunks(1) {
-            byte_by_byte.write(byte);
-        }
-        let byte_by_byte = byte_by_byte.finish();
-
-        let mut one_chunk = XxHash64::with_seed(0);
-        one_chunk.write(&bytes);
-        let one_chunk = one_chunk.finish();
-
-        assert_eq!(byte_by_byte, one_chunk);
-    }
-
-    #[test]
-    fn hash_of_nothing_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&[]);
-        assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
-    }
-
-    #[test]
-    fn hash_of_single_byte_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&[42]);
-        assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
-    }
-
-    #[test]
-    fn hash_of_multiple_bytes_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
-    }
-
-    #[test]
-    fn hash_of_multiple_chunks_matches_c_implementation() {
-        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&bytes);
-        assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
-    }
-
-    #[test]
-    fn hash_with_different_seed_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-        hasher.write(&[]);
-        assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
-    }
-
-    #[test]
-    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
-        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
-        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-        hasher.write(&bytes);
-        assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
-    }
-}
-
-#[cfg(feature = "std")]
-mod std_impl {
-    use core::hash::BuildHasher;
-
-    use super::*;
-
-    pub struct RandomXxHashBuilder64(u64);
-
-    impl Default for RandomXxHashBuilder64 {
-        fn default() -> Self {
-            Self::new()
-        }
-    }
-
-    impl RandomXxHashBuilder64 {
-        fn new() -> Self {
-            Self(rand::random())
-        }
-    }
-
-    impl BuildHasher for RandomXxHashBuilder64 {
-        type Hasher = XxHash64;
-
-        fn build_hasher(&self) -> Self::Hasher {
-            XxHash64::with_seed(self.0)
-        }
-    }
-
-    #[cfg(test)]
-    mod test {
-        use core::hash::BuildHasherDefault;
-        use std::collections::HashMap;
-
-        use super::*;
-
-        #[test]
-        fn can_be_used_in_a_hashmap_with_a_default_seed() {
-            let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
-            hash.insert(42, "the answer");
-            assert_eq!(hash.get(&42), Some(&"the answer"));
-        }
-
-        #[test]
-        fn can_be_used_in_a_hashmap_with_a_random_seed() {
-            let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
-            hash.insert(42, "the answer");
-            assert_eq!(hash.get(&42), Some(&"the answer"));
-        }
-    }
-}
-
-#[cfg(feature = "std")]
-pub use std_impl::RandomXxHashBuilder64;
-
-#[cfg(feature = "serialize")]
-mod serialize_impl {
-    use serde::{Deserialize, Serialize};
-
-    use super::*;
-
-    impl<'de> Deserialize<'de> for XxHash64 {
-        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-        where
-            D: serde::Deserializer<'de>,
-        {
-            let shim = Deserialize::deserialize(deserializer)?;
-
-            let Shim {
-                total_len,
-                seed,
-                core,
-                buffer,
-                buffer_usage,
-            } = shim;
-            let Core { v1, v2, v3, v4 } = core;
-
-            Ok(XxHash64 {
-                seed,
-                accumulators: Accumulators([v1, v2, v3, v4]),
-                buffer: Buffer {
-                    offset: buffer_usage,
-                    data: AlignedData(buffer),
-                },
-                length: total_len,
-            })
-        }
-    }
-
-    impl Serialize for XxHash64 {
-        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            let XxHash64 {
-                seed,
-                ref accumulators,
-                ref buffer,
-                length,
-            } = *self;
-            let [v1, v2, v3, v4] = accumulators.0;
-            let Buffer { offset, ref data } = *buffer;
-
-            let shim = Shim {
-                total_len: length,
-                seed,
-                core: Core { v1, v2, v3, v4 },
-                buffer: data.0,
-                buffer_usage: offset,
-            };
-
-            shim.serialize(serializer)
-        }
-    }
-
-    #[derive(Serialize, Deserialize)]
-    struct Shim {
-        total_len: u64,
-        seed: u64,
-        core: Core,
-        buffer: [u8; 32],
-        buffer_usage: usize,
-    }
-
-    #[derive(Serialize, Deserialize)]
-    struct Core {
-        v1: u64,
-        v2: u64,
-        v3: u64,
-        v4: u64,
-    }
-
-    #[cfg(test)]
-    mod test {
-        use super::*;
-
-        type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
-
-        #[test]
-        fn test_serialization_cycle() -> Result {
-            let mut hasher = XxHash64::with_seed(0);
-            hasher.write(b"Hello, world!\0");
-            hasher.finish();
-
-            let serialized = serde_json::to_string(&hasher)?;
-            let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
-            assert_eq!(hasher, unserialized);
-            Ok(())
-        }
-
-        #[test]
-        fn test_serialization_stability() -> Result {
-            let mut hasher = XxHash64::with_seed(0);
-            hasher.write(b"Hello, world!\0");
-            hasher.finish();
-
-            let serialized = r#"{
-                    "total_len": 14,
-                    "seed": 0,
-                    "core": {
-                      "v1": 6983438078262162902,
-                      "v2": 14029467366897019727,
-                      "v3": 0,
-                      "v4": 7046029288634856825
-                    },
-                    "buffer": [
-                      72,  101, 108, 108, 111, 44, 32, 119,
-                      111, 114, 108, 100, 33,  0,  0,  0,
-                      0,   0,   0,   0,   0,   0,  0,  0,
-                      0,   0,   0,   0,   0,   0,  0,  0
-                    ],
-                    "buffer_usage": 14
-                }"#;
-
-            let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
-            assert_eq!(hasher, unserialized);
-            Ok(())
-        }
-    }
-}
+#[cfg(feature = "xxhash64")]
+pub use xxhash64::*;
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
new file mode 100644
index 000000000..fbeaefc2e
--- /dev/null
+++ b/src/xxhash64.rs
@@ -0,0 +1,594 @@
+use core::{fmt, hash::Hasher, mem};
+
+// Keeping these constants in this form to match the C code.
+const PRIME64_1: u64 = 0x9E3779B185EBCA87;
+const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
+const PRIME64_3: u64 = 0x165667B19E3779F9;
+const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
+const PRIME64_5: u64 = 0x27D4EB2F165667C5;
+
+#[derive(PartialEq)]
+struct BufferData([u64; 4]);
+
+impl BufferData {
+    const fn new() -> Self {
+        Self([0; 4])
+    }
+
+    fn bytes(&self) -> &[u8; 32] {
+        const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<u64>());
+        // SAFETY[bytes]: The alignment of `u64` is at least that of
+        // `u8` and all the values are initialized.
+        unsafe { &*self.0.as_ptr().cast() }
+    }
+
+    fn bytes_mut(&mut self) -> &mut [u8; 32] {
+        // SAFETY: See SAFETY[bytes]
+        unsafe { &mut *self.0.as_mut_ptr().cast() }
+    }
+}
+
+impl fmt::Debug for BufferData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list().entries(self.0.iter()).finish()
+    }
+}
+
+#[derive(Debug, PartialEq)]
+struct Buffer {
+    offset: usize,
+    data: BufferData,
+}
+
+impl Buffer {
+    const fn new() -> Self {
+        Self {
+            offset: 0,
+            data: BufferData::new(),
+        }
+    }
+
+    fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) {
+        // Most of the slice methods we use here have `_unchecked` variants, but
+        //
+        // 1. this method is called one time per `XxHash64::write` call
+        // 2. this method early exits if we don't have anything in the buffer
+        //
+        // Because of this, removing the panics via `unsafe` doesn't
+        // have much benefit other than reducing code size by a tiny
+        // bit.
+
+        if self.offset == 0 {
+            return (None, data);
+        };
+
+        let bytes = self.data.bytes_mut();
+        debug_assert!(self.offset <= bytes.len());
+
+        let empty = &mut bytes[self.offset..];
+        let n_to_copy = usize::min(empty.len(), data.len());
+
+        let dst = &mut empty[..n_to_copy];
+
+        let (src, rest) = data.split_at(n_to_copy);
+
+        dst.copy_from_slice(src);
+        self.offset += n_to_copy;
+
+        debug_assert!(self.offset <= bytes.len());
+
+        if self.offset == bytes.len() {
+            self.offset = 0;
+            (Some(&self.data.0), rest)
+        } else {
+            (None, rest)
+        }
+    }
+
+    fn set(&mut self, data: &[u8]) {
+        if data.is_empty() {
+            return;
+        }
+
+        debug_assert_eq!(self.offset, 0);
+
+        let n_to_copy = data.len();
+
+        let bytes = self.data.bytes_mut();
+        debug_assert!(n_to_copy < bytes.len());
+
+        bytes[..n_to_copy].copy_from_slice(data);
+        self.offset = data.len();
+    }
+
+    fn remaining(&self) -> &[u8] {
+        &self.data.bytes()[..self.offset]
+    }
+}
+
+#[derive(PartialEq)]
+struct Accumulators([u64; 4]);
+
+impl Accumulators {
+    const fn new(seed: u64) -> Self {
+        Self([
+            seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
+            seed.wrapping_add(PRIME64_2),
+            seed,
+            seed.wrapping_sub(PRIME64_1),
+        ])
+    }
+
+    fn write(&mut self, lanes: [u64; 4]) {
+        let [acc1, acc2, acc3, acc4] = &mut self.0;
+        let [lane1, lane2, lane3, lane4] = lanes;
+
+        *acc1 = round(*acc1, lane1.to_le());
+        *acc2 = round(*acc2, lane2.to_le());
+        *acc3 = round(*acc3, lane3.to_le());
+        *acc4 = round(*acc4, lane4.to_le());
+    }
+
+    fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
+        while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
+            // SAFETY: We have the right number of bytes and are
+            // handling the unaligned case.
+            let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
+            self.write(lanes);
+            data = rest;
+        }
+        data
+    }
+
+    const fn finish(&self) -> u64 {
+        let [acc1, acc2, acc3, acc4] = self.0;
+
+        let mut acc = {
+            let acc1 = acc1.rotate_left(1);
+            let acc2 = acc2.rotate_left(7);
+            let acc3 = acc3.rotate_left(12);
+            let acc4 = acc4.rotate_left(18);
+
+            acc1.wrapping_add(acc2)
+                .wrapping_add(acc3)
+                .wrapping_add(acc4)
+        };
+
+        acc = Self::merge_accumulator(acc, acc1);
+        acc = Self::merge_accumulator(acc, acc2);
+        acc = Self::merge_accumulator(acc, acc3);
+        acc = Self::merge_accumulator(acc, acc4);
+
+        acc
+    }
+
+    const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
+        acc ^= round(0, acc_n);
+        acc = acc.wrapping_mul(PRIME64_1);
+        acc.wrapping_add(PRIME64_4)
+    }
+}
+
+impl fmt::Debug for Accumulators {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let [acc1, acc2, acc3, acc4] = self.0;
+        f.debug_struct("Accumulators")
+            .field("acc1", &acc1)
+            .field("acc2", &acc2)
+            .field("acc3", &acc3)
+            .field("acc4", &acc4)
+            .finish()
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub struct XxHash64 {
+    seed: u64,
+    accumulators: Accumulators,
+    buffer: Buffer,
+    length: u64,
+}
+
+impl Default for XxHash64 {
+    fn default() -> Self {
+        Self::with_seed(0)
+    }
+}
+
+impl XxHash64 {
+    /// Hash all data at once. If you can use this function, you may
+    /// see noticable speed gains for certain types of input.
+    #[must_use]
+    // RATIONALE[inline]: In one case [1], this `inline` helps unlock a
+    // speedup from ~900µs to ~200µs.
+    //
+    // [1]: https://github.com/apache/datafusion-comet/pull/575
+    #[inline]
+    pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
+        let len = data.len();
+
+        // Since we know that there's no more data coming, we don't
+        // need to construct the intermediate buffers or copy data to
+        // or from the buffers.
+
+        let mut accumulators = Accumulators::new(seed);
+
+        let data = accumulators.write_many(data);
+
+        Self::finish_with(seed, len.into_u64(), &accumulators, data)
+    }
+
+    #[must_use]
+    pub const fn with_seed(seed: u64) -> Self {
+        // Step 1. Initialize internal accumulators
+        Self {
+            seed,
+            accumulators: Accumulators::new(seed),
+            buffer: Buffer::new(),
+            length: 0,
+        }
+    }
+
+    #[must_use]
+    // RATIONALE: See RATIONALE[inline]
+    #[inline(always)]
+    fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
+        // Step 3. Accumulator convergence
+        let mut acc = if len < 32 {
+            seed.wrapping_add(PRIME64_5)
+        } else {
+            accumulators.finish()
+        };
+
+        // Step 4. Add input length
+        acc += len;
+
+        // Step 5. Consume remaining input
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() {
+            let lane = u64::from_ne_bytes(*chunk).to_le();
+
+            acc ^= round(0, lane);
+            acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
+            acc = acc.wrapping_add(PRIME64_4);
+            remaining = rest;
+        }
+
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() {
+            let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
+
+            acc ^= lane.wrapping_mul(PRIME64_1);
+            acc = acc.rotate_left(23).wrapping_mul(PRIME64_2);
+            acc = acc.wrapping_add(PRIME64_3);
+
+            remaining = rest;
+        }
+
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() {
+            let lane = chunk[0].into_u64();
+
+            acc ^= lane.wrapping_mul(PRIME64_5);
+            acc = acc.rotate_left(11).wrapping_mul(PRIME64_1);
+
+            remaining = rest;
+        }
+
+        // Step 6. Final mix (avalanche)
+        acc ^= acc >> 33;
+        acc = acc.wrapping_mul(PRIME64_2);
+        acc ^= acc >> 29;
+        acc = acc.wrapping_mul(PRIME64_3);
+        acc ^= acc >> 32;
+
+        acc
+    }
+}
+
+impl Hasher for XxHash64 {
+    fn write(&mut self, data: &[u8]) {
+        let len = data.len();
+
+        // Step 2. Process stripes
+        let (buffered_lanes, data) = self.buffer.extend(data);
+
+        if let Some(&lanes) = buffered_lanes {
+            self.accumulators.write(lanes);
+        }
+
+        let data = self.accumulators.write_many(data);
+
+        self.buffer.set(data);
+
+        self.length += len.into_u64();
+    }
+
+    #[must_use]
+    fn finish(&self) -> u64 {
+        Self::finish_with(
+            self.seed,
+            self.length,
+            &self.accumulators,
+            self.buffer.remaining(),
+        )
+    }
+}
+
+const fn round(mut acc: u64, lane: u64) -> u64 {
+    acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
+    acc = acc.rotate_left(31);
+    acc.wrapping_mul(PRIME64_1)
+}
+
+trait IntoU64 {
+    fn into_u64(self) -> u64;
+}
+
+impl IntoU64 for u8 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+impl IntoU64 for u32 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+impl IntoU64 for usize {
+    fn into_u64(self) -> u64 {
+        self as u64
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use core::array;
+
+    use super::*;
+
+    #[test]
+    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
+        let bytes = [0x9c; 32];
+
+        let mut byte_by_byte = XxHash64::with_seed(0);
+        for byte in bytes.chunks(1) {
+            byte_by_byte.write(byte);
+        }
+        let byte_by_byte = byte_by_byte.finish();
+
+        let mut one_chunk = XxHash64::with_seed(0);
+        one_chunk.write(&bytes);
+        let one_chunk = one_chunk.finish();
+
+        assert_eq!(byte_by_byte, one_chunk);
+    }
+
+    #[test]
+    fn hash_of_nothing_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(&[]);
+        assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
+    }
+
+    #[test]
+    fn hash_of_single_byte_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(&[42]);
+        assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
+    }
+
+    #[test]
+    fn hash_of_multiple_bytes_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(b"Hello, world!\0");
+        assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
+    }
+
+    #[test]
+    fn hash_of_multiple_chunks_matches_c_implementation() {
+        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+        let mut hasher = XxHash64::with_seed(0);
+        hasher.write(&bytes);
+        assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
+    }
+
+    #[test]
+    fn hash_with_different_seed_matches_c_implementation() {
+        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
+        hasher.write(&[]);
+        assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
+    }
+
+    #[test]
+    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
+        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
+        hasher.write(&bytes);
+        assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
+    }
+}
+
+#[cfg(feature = "std")]
+mod std_impl {
+    use core::hash::BuildHasher;
+
+    use super::*;
+
+    pub struct RandomXxHashBuilder64(u64);
+
+    impl Default for RandomXxHashBuilder64 {
+        fn default() -> Self {
+            Self::new()
+        }
+    }
+
+    impl RandomXxHashBuilder64 {
+        fn new() -> Self {
+            Self(rand::random())
+        }
+    }
+
+    impl BuildHasher for RandomXxHashBuilder64 {
+        type Hasher = XxHash64;
+
+        fn build_hasher(&self) -> Self::Hasher {
+            XxHash64::with_seed(self.0)
+        }
+    }
+
+    #[cfg(test)]
+    mod test {
+        use core::hash::BuildHasherDefault;
+        use std::collections::HashMap;
+
+        use super::*;
+
+        #[test]
+        fn can_be_used_in_a_hashmap_with_a_default_seed() {
+            let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+            hash.insert(42, "the answer");
+            assert_eq!(hash.get(&42), Some(&"the answer"));
+        }
+
+        #[test]
+        fn can_be_used_in_a_hashmap_with_a_random_seed() {
+            let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
+            hash.insert(42, "the answer");
+            assert_eq!(hash.get(&42), Some(&"the answer"));
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+pub use std_impl::RandomXxHashBuilder64;
+
+#[cfg(feature = "serialize")]
+mod serialize_impl {
+    use serde::{Deserialize, Serialize};
+
+    use super::*;
+
+    impl<'de> Deserialize<'de> for XxHash64 {
+        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: serde::Deserializer<'de>,
+        {
+            let shim = Deserialize::deserialize(deserializer)?;
+
+            let Shim {
+                total_len,
+                seed,
+                core,
+                buffer,
+                buffer_usage,
+            } = shim;
+            let Core { v1, v2, v3, v4 } = core;
+
+            let mut buffer_data = BufferData::new();
+            buffer_data.bytes_mut().copy_from_slice(&buffer);
+
+            Ok(XxHash64 {
+                seed,
+                accumulators: Accumulators([v1, v2, v3, v4]),
+                buffer: Buffer {
+                    offset: buffer_usage,
+                    data: buffer_data,
+                },
+                length: total_len,
+            })
+        }
+    }
+
+    impl Serialize for XxHash64 {
+        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+        where
+            S: serde::Serializer,
+        {
+            let XxHash64 {
+                seed,
+                ref accumulators,
+                ref buffer,
+                length,
+            } = *self;
+            let [v1, v2, v3, v4] = accumulators.0;
+            let Buffer { offset, ref data } = *buffer;
+            let buffer = *data.bytes();
+
+            let shim = Shim {
+                total_len: length,
+                seed,
+                core: Core { v1, v2, v3, v4 },
+                buffer,
+                buffer_usage: offset,
+            };
+
+            shim.serialize(serializer)
+        }
+    }
+
+    #[derive(Serialize, Deserialize)]
+    struct Shim {
+        total_len: u64,
+        seed: u64,
+        core: Core,
+        buffer: [u8; 32],
+        buffer_usage: usize,
+    }
+
+    #[derive(Serialize, Deserialize)]
+    struct Core {
+        v1: u64,
+        v2: u64,
+        v3: u64,
+        v4: u64,
+    }
+
+    #[cfg(test)]
+    mod test {
+        use super::*;
+
+        type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
+
+        #[test]
+        fn test_serialization_cycle() -> Result {
+            let mut hasher = XxHash64::with_seed(0);
+            hasher.write(b"Hello, world!\0");
+            hasher.finish();
+
+            let serialized = serde_json::to_string(&hasher)?;
+            let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
+            assert_eq!(hasher, unserialized);
+            Ok(())
+        }
+
+        #[test]
+        fn test_serialization_stability() -> Result {
+            let mut hasher = XxHash64::with_seed(0);
+            hasher.write(b"Hello, world!\0");
+            hasher.finish();
+
+            let serialized = r#"{
+                    "total_len": 14,
+                    "seed": 0,
+                    "core": {
+                      "v1": 6983438078262162902,
+                      "v2": 14029467366897019727,
+                      "v3": 0,
+                      "v4": 7046029288634856825
+                    },
+                    "buffer": [
+                      72,  101, 108, 108, 111, 44, 32, 119,
+                      111, 114, 108, 100, 33,  0,  0,  0,
+                      0,   0,   0,   0,   0,   0,  0,  0,
+                      0,   0,   0,   0,   0,   0,  0,  0
+                    ],
+                    "buffer_usage": 14
+                }"#;
+
+            let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
+            assert_eq!(hasher, unserialized);
+            Ok(())
+        }
+    }
+}

From 6e3961cb2f9974ea9c472d7a9b3c03062d433b08 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 26 Jun 2024 21:19:03 -0400
Subject: [PATCH 027/166] 32-bit too

---
 Cargo.toml      |   3 +-
 src/lib.rs      |  39 ++++++
 src/xxhash32.rs | 337 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/xxhash64.rs |  49 +++----
 4 files changed, 395 insertions(+), 33 deletions(-)
 create mode 100644 src/xxhash32.rs

diff --git a/Cargo.toml b/Cargo.toml
index 4e91fc7cd..2c44331ff 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,12 +11,13 @@ members = [
 ]
 
 [features]
-default = ["std", "xxhash64"]
+default = ["std", "xxhash32", "xxhash64"]
 
 std = ["dep:rand"]
 
 serialize = ["dep:serde"]
 
+xxhash32 = []
 xxhash64 = []
 
 [dependencies]
diff --git a/src/lib.rs b/src/lib.rs
index fde3a8e96..64a4d3f3f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,8 +4,47 @@
 #[cfg(test)]
 extern crate std;
 
+#[cfg(feature = "xxhash32")]
+mod xxhash32;
+
+#[cfg(feature = "xxhash32")]
+pub use xxhash32::*;
+
 #[cfg(feature = "xxhash64")]
 mod xxhash64;
 
 #[cfg(feature = "xxhash64")]
 pub use xxhash64::*;
+
+trait IntoU32 {
+    fn into_u32(self) -> u32;
+}
+
+impl IntoU32 for u8 {
+    fn into_u32(self) -> u32 {
+        self.into()
+    }
+}
+
+trait IntoU64 {
+    fn into_u64(self) -> u64;
+}
+
+impl IntoU64 for u8 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+impl IntoU64 for u32 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+impl IntoU64 for usize {
+    fn into_u64(self) -> u64 {
+        self as u64
+    }
+}
diff --git a/src/xxhash32.rs b/src/xxhash32.rs
new file mode 100644
index 000000000..41bf7f11d
--- /dev/null
+++ b/src/xxhash32.rs
@@ -0,0 +1,337 @@
+use core::{fmt, hash::Hasher, mem};
+
+use crate::{IntoU32, IntoU64};
+
+// Keeping these constants in this form to match the C code.
+const PRIME32_1: u32 = 0x9E3779B1;
+const PRIME32_2: u32 = 0x85EBCA77;
+const PRIME32_3: u32 = 0xC2B2AE3D;
+const PRIME32_4: u32 = 0x27D4EB2F;
+const PRIME32_5: u32 = 0x165667B1;
+
+type Lane = u32;
+type Lanes = [Lane; 4];
+type Bytes = [u8; 16];
+
+const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
+
+#[derive(PartialEq)]
+struct BufferData(Lanes);
+
+impl BufferData {
+    const fn new() -> Self {
+        Self([0; 4])
+    }
+
+    const fn bytes(&self) -> &Bytes {
+        const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
+        // SAFETY[bytes]: The alignment of `u32` is at least that of
+        // `u8` and all the values are initialized.
+        unsafe { &*self.0.as_ptr().cast() }
+    }
+
+    fn bytes_mut(&mut self) -> &mut Bytes {
+        // SAFETY: See SAFETY[bytes]
+        unsafe { &mut *self.0.as_mut_ptr().cast() }
+    }
+}
+
+impl fmt::Debug for BufferData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list().entries(self.0.iter()).finish()
+    }
+}
+
+#[derive(Debug, PartialEq)]
+struct Buffer {
+    offset: usize,
+    data: BufferData,
+}
+
+impl Buffer {
+    const fn new() -> Self {
+        Self {
+            offset: 0,
+            data: BufferData::new(),
+        }
+    }
+
+    fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
+        // Most of the slice methods we use here have `_unchecked` variants, but
+        //
+        // 1. this method is called one time per `XxHash64::write` call
+        // 2. this method early exits if we don't have anything in the buffer
+        //
+        // Because of this, removing the panics via `unsafe` doesn't
+        // have much benefit other than reducing code size by a tiny
+        // bit.
+
+        if self.offset == 0 {
+            return (None, data);
+        };
+
+        let bytes = self.data.bytes_mut();
+        debug_assert!(self.offset <= bytes.len());
+
+        let empty = &mut bytes[self.offset..];
+        let n_to_copy = usize::min(empty.len(), data.len());
+
+        let dst = &mut empty[..n_to_copy];
+
+        let (src, rest) = data.split_at(n_to_copy);
+
+        dst.copy_from_slice(src);
+        self.offset += n_to_copy;
+
+        debug_assert!(self.offset <= bytes.len());
+
+        if self.offset == bytes.len() {
+            self.offset = 0;
+            (Some(&self.data.0), rest)
+        } else {
+            (None, rest)
+        }
+    }
+
+    fn set(&mut self, data: &[u8]) {
+        if data.is_empty() {
+            return;
+        }
+
+        debug_assert_eq!(self.offset, 0);
+
+        let n_to_copy = data.len();
+
+        let bytes = self.data.bytes_mut();
+        debug_assert!(n_to_copy < bytes.len());
+
+        bytes[..n_to_copy].copy_from_slice(data);
+        self.offset = data.len();
+    }
+
+    fn remaining(&self) -> &[u8] {
+        &self.data.bytes()[..self.offset]
+    }
+}
+
+struct Accumulators(Lanes);
+
+impl Accumulators {
+    const fn new(seed: u32) -> Self {
+        Self([
+            seed.wrapping_add(PRIME32_1).wrapping_add(PRIME32_2),
+            seed.wrapping_add(PRIME32_2),
+            seed,
+            seed.wrapping_sub(PRIME32_1),
+        ])
+    }
+
+    fn write(&mut self, lanes: Lanes) {
+        let [acc1, acc2, acc3, acc4] = &mut self.0;
+        let [lane1, lane2, lane3, lane4] = lanes;
+
+        *acc1 = round(*acc1, lane1.to_le());
+        *acc2 = round(*acc2, lane2.to_le());
+        *acc3 = round(*acc3, lane3.to_le());
+        *acc4 = round(*acc4, lane4.to_le());
+    }
+
+    fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
+        while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
+            // SAFETY: We have the right number of bytes and are
+            // handling the unaligned case.
+            let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
+            self.write(lanes);
+            data = rest;
+        }
+        data
+    }
+
+    const fn finish(&self) -> u32 {
+        let [acc1, acc2, acc3, acc4] = self.0;
+
+        let acc1 = acc1.rotate_left(1);
+        let acc2 = acc2.rotate_left(7);
+        let acc3 = acc3.rotate_left(12);
+        let acc4 = acc4.rotate_left(18);
+
+        acc1.wrapping_add(acc2)
+            .wrapping_add(acc3)
+            .wrapping_add(acc4)
+    }
+}
+
+pub struct XxHash32 {
+    seed: u32,
+    accumulators: Accumulators,
+    buffer: Buffer,
+    length: u64,
+}
+
+impl XxHash32 {
+    pub const fn with_seed(seed: u32) -> Self {
+        // Step 1. Initialize internal accumulators
+        Self {
+            seed,
+            accumulators: Accumulators::new(seed),
+            buffer: Buffer::new(),
+            length: 0,
+        }
+    }
+
+    #[must_use]
+    // RATIONALE: See RATIONALE[inline]
+    #[inline(always)]
+    fn finish_32(&self) -> u32 {
+        Self::finish_with(
+            self.seed,
+            self.length,
+            &self.accumulators,
+            self.buffer.remaining(),
+        )
+    }
+
+    #[must_use]
+    // RATIONALE: See RATIONALE[inline]
+    #[inline(always)]
+    fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 {
+        // Step 3. Accumulator convergence
+        let mut acc = if len < BYTES_IN_LANE.into_u64() {
+            seed.wrapping_add(PRIME32_5)
+        } else {
+            accumulators.finish()
+        };
+
+        // Step 4. Add input length
+        //
+        // "Note that, if input length is so large that it requires
+        // more than 32-bits, only the lower 32-bits are added to the
+        // accumulator."
+        acc += len as u32;
+
+        // Step 5. Consume remaining input
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u32>() }>() {
+            let lane = u32::from_ne_bytes(*chunk).to_le();
+
+            acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3));
+            acc = acc.rotate_left(17).wrapping_mul(PRIME32_4);
+
+            remaining = rest;
+        }
+
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u8>() }>() {
+            let lane = chunk[0].into_u32();
+
+            acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5));
+            acc = acc.rotate_left(11).wrapping_mul(PRIME32_1);
+
+            remaining = rest;
+        }
+
+        // Step 6. Final mix (avalanche)
+        acc ^= acc >> 15;
+        acc = acc.wrapping_mul(PRIME32_2);
+        acc ^= acc >> 13;
+        acc = acc.wrapping_mul(PRIME32_3);
+        acc ^= acc >> 16;
+
+        acc
+    }
+}
+
+impl Hasher for XxHash32 {
+    fn write(&mut self, data: &[u8]) {
+        let len = data.len();
+
+        // Step 2. Process stripes
+        let (buffered_lanes, data) = self.buffer.extend(data);
+
+        if let Some(&lanes) = buffered_lanes {
+            self.accumulators.write(lanes);
+        }
+
+        let data = self.accumulators.write_many(data);
+
+        self.buffer.set(data);
+
+        self.length += len.into_u64();
+    }
+
+    fn finish(&self) -> u64 {
+        XxHash32::finish_32(self).into()
+    }
+}
+
+const fn round(mut acc: u32, lane: u32) -> u32 {
+    acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2));
+    acc = acc.rotate_left(13);
+    acc.wrapping_mul(PRIME32_1)
+}
+
+#[cfg(test)]
+mod test {
+    use core::array;
+
+    use super::*;
+
+    #[test]
+    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
+        let bytes = [0; 32];
+
+        let mut byte_by_byte = XxHash32::with_seed(0);
+        for byte in bytes.chunks(1) {
+            byte_by_byte.write(byte);
+        }
+        let byte_by_byte = byte_by_byte.finish_32();
+
+        let mut one_chunk = XxHash32::with_seed(0);
+        one_chunk.write(&bytes);
+        let one_chunk = one_chunk.finish_32();
+
+        assert_eq!(byte_by_byte, one_chunk);
+    }
+
+    #[test]
+    fn hash_of_nothing_matches_c_implementation() {
+        let mut hasher = XxHash32::with_seed(0);
+        hasher.write(&[]);
+        assert_eq!(hasher.finish_32(), 0x02cc_5d05);
+    }
+
+    #[test]
+    fn hash_of_single_byte_matches_c_implementation() {
+        let mut hasher = XxHash32::with_seed(0);
+        hasher.write(&[42]);
+        assert_eq!(hasher.finish_32(), 0xe0fe_705f);
+    }
+
+    #[test]
+    fn hash_of_multiple_bytes_matches_c_implementation() {
+        let mut hasher = XxHash32::with_seed(0);
+        hasher.write(b"Hello, world!\0");
+        assert_eq!(hasher.finish_32(), 0x9e5e_7e93);
+    }
+
+    #[test]
+    fn hash_of_multiple_chunks_matches_c_implementation() {
+        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+        let mut hasher = XxHash32::with_seed(0);
+        hasher.write(&bytes);
+        assert_eq!(hasher.finish_32(), 0x7f89_ba44);
+    }
+
+    #[test]
+    fn hash_with_different_seed_matches_c_implementation() {
+        let mut hasher = XxHash32::with_seed(0x42c9_1977);
+        hasher.write(&[]);
+        assert_eq!(hasher.finish_32(), 0xd6bf_8459);
+    }
+
+    #[test]
+    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
+        let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+        let mut hasher = XxHash32::with_seed(0x42c9_1977);
+        hasher.write(&bytes);
+        assert_eq!(hasher.finish_32(), 0x6d2f_6c17);
+    }
+}
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index fbeaefc2e..9be69096b 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -1,5 +1,7 @@
 use core::{fmt, hash::Hasher, mem};
 
+use crate::IntoU64;
+
 // Keeping these constants in this form to match the C code.
 const PRIME64_1: u64 = 0x9E3779B185EBCA87;
 const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
@@ -7,22 +9,28 @@ const PRIME64_3: u64 = 0x165667B19E3779F9;
 const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
 const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 
+type Lane = u64;
+type Lanes = [Lane; 4];
+type Bytes = [u8; 32];
+
+const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
+
 #[derive(PartialEq)]
-struct BufferData([u64; 4]);
+struct BufferData(Lanes);
 
 impl BufferData {
     const fn new() -> Self {
         Self([0; 4])
     }
 
-    fn bytes(&self) -> &[u8; 32] {
-        const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<u64>());
+    fn bytes(&self) -> &Bytes {
+        const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
         // SAFETY[bytes]: The alignment of `u64` is at least that of
         // `u8` and all the values are initialized.
         unsafe { &*self.0.as_ptr().cast() }
     }
 
-    fn bytes_mut(&mut self) -> &mut [u8; 32] {
+    fn bytes_mut(&mut self) -> &mut Bytes {
         // SAFETY: See SAFETY[bytes]
         unsafe { &mut *self.0.as_mut_ptr().cast() }
     }
@@ -48,7 +56,7 @@ impl Buffer {
         }
     }
 
-    fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) {
+    fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
         // Most of the slice methods we use here have `_unchecked` variants, but
         //
         // 1. this method is called one time per `XxHash64::write` call
@@ -107,7 +115,7 @@ impl Buffer {
 }
 
 #[derive(PartialEq)]
-struct Accumulators([u64; 4]);
+struct Accumulators(Lanes);
 
 impl Accumulators {
     const fn new(seed: u64) -> Self {
@@ -119,7 +127,7 @@ impl Accumulators {
         ])
     }
 
-    fn write(&mut self, lanes: [u64; 4]) {
+    fn write(&mut self, lanes: Lanes) {
         let [acc1, acc2, acc3, acc4] = &mut self.0;
         let [lane1, lane2, lane3, lane4] = lanes;
 
@@ -133,7 +141,7 @@ impl Accumulators {
         while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
             // SAFETY: We have the right number of bytes and are
             // handling the unaligned case.
-            let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() };
+            let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
             self.write(lanes);
             data = rest;
         }
@@ -234,7 +242,7 @@ impl XxHash64 {
     #[inline(always)]
     fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
         // Step 3. Accumulator convergence
-        let mut acc = if len < 32 {
+        let mut acc = if len < BYTES_IN_LANE.into_u64() {
             seed.wrapping_add(PRIME64_5)
         } else {
             accumulators.finish()
@@ -318,29 +326,6 @@ const fn round(mut acc: u64, lane: u64) -> u64 {
     acc.wrapping_mul(PRIME64_1)
 }
 
-trait IntoU64 {
-    fn into_u64(self) -> u64;
-}
-
-impl IntoU64 for u8 {
-    fn into_u64(self) -> u64 {
-        self.into()
-    }
-}
-
-impl IntoU64 for u32 {
-    fn into_u64(self) -> u64 {
-        self.into()
-    }
-}
-
-#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-impl IntoU64 for usize {
-    fn into_u64(self) -> u64 {
-        self as u64
-    }
-}
-
 #[cfg(test)]
 mod test {
     use core::array;

From a4eb4bd7b1269e94cc4337182782b779699a6c21 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 26 Jun 2024 21:33:42 -0400
Subject: [PATCH 028/166] align

---
 src/xxhash32.rs | 61 +++++++++++++++++++++++++++++++++++++++++--------
 src/xxhash64.rs | 10 ++++----
 2 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 41bf7f11d..65eeb9206 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -114,6 +114,7 @@ impl Buffer {
     }
 }
 
+#[derive(PartialEq)]
 struct Accumulators(Lanes);
 
 impl Accumulators {
@@ -161,6 +162,19 @@ impl Accumulators {
     }
 }
 
+impl fmt::Debug for Accumulators {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let [acc1, acc2, acc3, acc4] = self.0;
+        f.debug_struct("Accumulators")
+            .field("acc1", &acc1)
+            .field("acc2", &acc2)
+            .field("acc3", &acc3)
+            .field("acc4", &acc4)
+            .finish()
+    }
+}
+
+#[derive(Debug, PartialEq)]
 pub struct XxHash32 {
     seed: u32,
     accumulators: Accumulators,
@@ -168,7 +182,35 @@ pub struct XxHash32 {
     length: u64,
 }
 
+impl Default for XxHash32 {
+    fn default() -> Self {
+        Self::with_seed(0)
+    }
+}
+
 impl XxHash32 {
+    /// Hash all data at once. If you can use this function, you may
+    /// see noticable speed gains for certain types of input.
+    #[must_use]
+    // RATIONALE[inline]: Keeping parallel to the XxHash64
+    // implementation, even though the performance gains for XxHash32
+    // haven't been tested.
+    #[inline]
+    pub fn oneshot(seed: u32, data: &[u8]) -> u32 {
+        let len = data.len();
+
+        // Since we know that there's no more data coming, we don't
+        // need to construct the intermediate buffers or copy data to
+        // or from the buffers.
+
+        let mut accumulators = Accumulators::new(seed);
+
+        let data = accumulators.write_many(data);
+
+        Self::finish_with(seed, len.into_u64(), &accumulators, data)
+    }
+
+    #[must_use]
     pub const fn with_seed(seed: u32) -> Self {
         // Step 1. Initialize internal accumulators
         Self {
@@ -182,7 +224,7 @@ impl XxHash32 {
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
     #[inline(always)]
-    fn finish_32(&self) -> u32 {
+    pub fn finish_32(&self) -> u32 {
         Self::finish_with(
             self.seed,
             self.length,
@@ -257,6 +299,7 @@ impl Hasher for XxHash32 {
         self.length += len.into_u64();
     }
 
+    #[must_use]
     fn finish(&self) -> u64 {
         XxHash32::finish_32(self).into()
     }
@@ -282,11 +325,11 @@ mod test {
         for byte in bytes.chunks(1) {
             byte_by_byte.write(byte);
         }
-        let byte_by_byte = byte_by_byte.finish_32();
+        let byte_by_byte = byte_by_byte.finish();
 
         let mut one_chunk = XxHash32::with_seed(0);
         one_chunk.write(&bytes);
-        let one_chunk = one_chunk.finish_32();
+        let one_chunk = one_chunk.finish();
 
         assert_eq!(byte_by_byte, one_chunk);
     }
@@ -295,21 +338,21 @@ mod test {
     fn hash_of_nothing_matches_c_implementation() {
         let mut hasher = XxHash32::with_seed(0);
         hasher.write(&[]);
-        assert_eq!(hasher.finish_32(), 0x02cc_5d05);
+        assert_eq!(hasher.finish(), 0x02cc_5d05);
     }
 
     #[test]
     fn hash_of_single_byte_matches_c_implementation() {
         let mut hasher = XxHash32::with_seed(0);
         hasher.write(&[42]);
-        assert_eq!(hasher.finish_32(), 0xe0fe_705f);
+        assert_eq!(hasher.finish(), 0xe0fe_705f);
     }
 
     #[test]
     fn hash_of_multiple_bytes_matches_c_implementation() {
         let mut hasher = XxHash32::with_seed(0);
         hasher.write(b"Hello, world!\0");
-        assert_eq!(hasher.finish_32(), 0x9e5e_7e93);
+        assert_eq!(hasher.finish(), 0x9e5e_7e93);
     }
 
     #[test]
@@ -317,14 +360,14 @@ mod test {
         let bytes: [u8; 100] = array::from_fn(|i| i as u8);
         let mut hasher = XxHash32::with_seed(0);
         hasher.write(&bytes);
-        assert_eq!(hasher.finish_32(), 0x7f89_ba44);
+        assert_eq!(hasher.finish(), 0x7f89_ba44);
     }
 
     #[test]
     fn hash_with_different_seed_matches_c_implementation() {
         let mut hasher = XxHash32::with_seed(0x42c9_1977);
         hasher.write(&[]);
-        assert_eq!(hasher.finish_32(), 0xd6bf_8459);
+        assert_eq!(hasher.finish(), 0xd6bf_8459);
     }
 
     #[test]
@@ -332,6 +375,6 @@ mod test {
         let bytes: [u8; 100] = array::from_fn(|i| i as u8);
         let mut hasher = XxHash32::with_seed(0x42c9_1977);
         hasher.write(&bytes);
-        assert_eq!(hasher.finish_32(), 0x6d2f_6c17);
+        assert_eq!(hasher.finish(), 0x6d2f_6c17);
     }
 }
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 9be69096b..cef2180f3 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -23,7 +23,7 @@ impl BufferData {
         Self([0; 4])
     }
 
-    fn bytes(&self) -> &Bytes {
+    const fn bytes(&self) -> &Bytes {
         const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
         // SAFETY[bytes]: The alignment of `u64` is at least that of
         // `u8` and all the values are initialized.
@@ -138,7 +138,7 @@ impl Accumulators {
     }
 
     fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
-        while let Some((chunk, rest)) = data.split_first_chunk::<32>() {
+        while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
             // SAFETY: We have the right number of bytes and are
             // handling the unaligned case.
             let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
@@ -252,7 +252,7 @@ impl XxHash64 {
         acc += len;
 
         // Step 5. Consume remaining input
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() {
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u64>() }>() {
             let lane = u64::from_ne_bytes(*chunk).to_le();
 
             acc ^= round(0, lane);
@@ -261,7 +261,7 @@ impl XxHash64 {
             remaining = rest;
         }
 
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() {
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u32>() }>() {
             let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
 
             acc ^= lane.wrapping_mul(PRIME64_1);
@@ -271,7 +271,7 @@ impl XxHash64 {
             remaining = rest;
         }
 
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() {
+        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u8>() }>() {
             let lane = chunk[0].into_u64();
 
             acc ^= lane.wrapping_mul(PRIME64_5);

From f6156b35ac408a459f14ee58316a215aa6a34ce7 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 27 Jun 2024 09:13:21 -0400
Subject: [PATCH 029/166] more

---
 compare/benches/benchmark.rs |  51 +++----
 compare/src/lib.rs           | 248 +++++++++++++++++++++++++----------
 xx_hash-sys/src/lib.rs       |  72 +++++++++-
 3 files changed, 266 insertions(+), 105 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 6cb801573..685f2f907 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -1,10 +1,9 @@
 use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 use rand::{Rng, RngCore, SeedableRng};
-use std::hash::Hasher;
-use std::{hint::black_box, iter};
-use twox_hash::XxHash64 as Old;
-use xx_hash_sys::Stream;
-use xx_renu::XxHash64;
+use std::{hash::Hasher, hint::black_box, iter};
+
+use xx_hash_sys as c;
+use xx_renu as rust;
 
 const TINY_DATA_SIZE: usize = 32;
 const BIG_DATA_SIZE: usize = 100 * 1024 * 1024;
@@ -17,19 +16,19 @@ fn tiny_data(c: &mut Criterion) {
         let data = &data[..size];
         g.throughput(Throughput::Bytes(data.len() as _));
 
-        let id = format!("xxHash/oneshot/{size}");
+        let id = format!("c/oneshot/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = Stream::oneshot(seed, data);
+                let hash = c::XxHash64::oneshot(seed, data);
                 black_box(hash);
             })
         });
 
-        let id = format!("xxHash/streaming/{size}");
+        let id = format!("c/streaming/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = {
-                    let mut hasher = Stream::with_seed(seed);
+                    let mut hasher = c::XxHash64::with_seed(seed);
                     hasher.write(data);
                     hasher.finish()
                 };
@@ -37,19 +36,19 @@ fn tiny_data(c: &mut Criterion) {
             })
         });
 
-        let id = format!("renu/oneshot/{size}");
+        let id = format!("rust/oneshot/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = XxHash64::oneshot(seed, data);
+                let hash = rust::XxHash64::oneshot(seed, data);
                 black_box(hash);
             })
         });
 
-        let id = format!("renu/streaming/{size}");
+        let id = format!("rust/streaming/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = {
-                    let mut hasher = XxHash64::with_seed(seed);
+                    let mut hasher = rust::XxHash64::with_seed(seed);
                     hasher.write(data);
                     hasher.finish()
                 };
@@ -69,18 +68,18 @@ fn oneshot(c: &mut Criterion) {
         let data = &data[..size];
         g.throughput(Throughput::Bytes(data.len() as _));
 
-        let id = format!("xxHash/{size}");
+        let id = format!("c/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = Stream::oneshot(seed, data);
+                let hash = c::XxHash64::oneshot(seed, data);
                 black_box(hash);
             })
         });
 
-        let id = format!("renu/{size}");
+        let id = format!("rust/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = XxHash64::oneshot(seed, data);
+                let hash = rust::XxHash64::oneshot(seed, data);
                 black_box(hash);
             })
         });
@@ -97,30 +96,20 @@ fn streaming_one_chunk(c: &mut Criterion) {
         let data = &data[..size];
         g.throughput(Throughput::Bytes(data.len() as _));
 
-        let id = format!("xxHash/{size}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let mut hasher = Stream::with_seed(seed);
-                hasher.write(data);
-                let hash = hasher.finish();
-                black_box(hash);
-            })
-        });
-
-        let id = format!("renu/{size}");
+        let id = format!("c/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let mut hasher = XxHash64::with_seed(seed);
+                let mut hasher = c::XxHash64::with_seed(seed);
                 hasher.write(data);
                 let hash = hasher.finish();
                 black_box(hash);
             })
         });
 
-        let id = format!("twox-hash/{size}");
+        let id = format!("rust/{size}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let mut hasher = Old::with_seed(seed);
+                let mut hasher = rust::XxHash64::with_seed(seed);
                 hasher.write(data);
                 let hash = hasher.finish();
                 black_box(hash);
diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 6e1a4e8be..f17554919 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -1,94 +1,206 @@
 #![cfg(test)]
 
-use proptest::{num, prelude::*, test_runner::TestCaseResult};
-use std::hash::Hasher;
+use proptest::{num, prelude::*};
 
-proptest! {
-    #[test]
-    fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
-        oneshot_same_as_one_chunk_impl(seed, &data)?;
-    }
+use xx_hash_sys as c;
+use xx_renu as rust;
 
-    #[test]
-    fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-        oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
-    }
+mod xxhash32 {
+    use proptest::{prelude::*, test_runner::TestCaseResult};
+    use std::hash::Hasher;
+
+    use super::*;
+
+    proptest! {
+        #[test]
+        fn oneshot_same_as_one_chunk(seed: u32, data: Vec<u8>) {
+            oneshot_same_as_one_chunk_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_same_as_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
+            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn oneshot_same_as_many_chunks(seed: u32, (data, chunks) in data_and_chunks()) {
+            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        }
+
+        #[test]
+        fn oneshot(seed: u32, data: Vec<u8>) {
+            oneshot_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
+            oneshot_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn streaming_one_chunk(seed: u32, data: Vec<u8>) {
+            streaming_one_chunk_impl(seed, &data)?;
+        }
 
-    #[test]
-    fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
-        oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        #[test]
+        fn streaming_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
+            streaming_one_chunk_impl(seed, &data[offset..])?;
+        }
     }
 
-    #[test]
-    fn oneshot(seed: u64, data: Vec<u8>) {
-        oneshot_impl(seed, &data)?;
+    fn oneshot_same_as_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult {
+        let oneshot = rust::XxHash32::oneshot(seed, data);
+        let one_chunk = {
+            let mut hasher = rust::XxHash32::with_seed(seed);
+            hasher.write(data);
+            hasher.finish_32()
+        };
+
+        prop_assert_eq!(oneshot, one_chunk);
+        Ok(())
     }
 
-    #[test]
-    fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-        oneshot_impl(seed, &data[offset..])?;
+    fn oneshot_same_as_many_chunks_impl(
+        seed: u32,
+        data: &[u8],
+        chunks: &[Vec<u8>],
+    ) -> TestCaseResult {
+        let oneshot = rust::XxHash32::oneshot(seed, data);
+        let many_chunks = {
+            let mut hasher = rust::XxHash32::with_seed(seed);
+            for chunk in chunks {
+                hasher.write(chunk);
+            }
+            hasher.finish_32()
+        };
+
+        prop_assert_eq!(oneshot, many_chunks);
+        Ok(())
     }
 
-    #[test]
-    fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
-        streaming_one_chunk_impl(seed, &data)?;
+    fn oneshot_impl(seed: u32, data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash32::oneshot(seed, data);
+        let rust = rust::XxHash32::oneshot(seed, data);
+
+        prop_assert_eq!(native, rust);
+        Ok(())
     }
 
-    #[test]
-    fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-        streaming_one_chunk_impl(seed, &data[offset..])?;
+    fn streaming_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash32::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash32::with_seed(seed);
+            hasher.write(data);
+            hasher.finish_32()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
     }
 }
 
-fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-    let oneshot = xx_renu::XxHash64::oneshot(seed, data);
-    let one_chunk = {
-        let mut hasher = xx_renu::XxHash64::with_seed(seed);
-        hasher.write(data);
-        hasher.finish()
-    };
+mod xxhash64 {
+    use proptest::{prelude::*, test_runner::TestCaseResult};
+    use std::hash::Hasher;
 
-    prop_assert_eq!(oneshot, one_chunk);
-    Ok(())
-}
+    use super::*;
 
-fn oneshot_same_as_many_chunks_impl(seed: u64, data: &[u8], chunks: &[Vec<u8>]) -> TestCaseResult {
-    let oneshot = xx_renu::XxHash64::oneshot(seed, data);
-    let many_chunks = {
-        let mut hasher = xx_renu::XxHash64::with_seed(seed);
-        for chunk in chunks {
-            hasher.write(chunk);
+    proptest! {
+        #[test]
+        fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
+            oneshot_same_as_one_chunk_impl(seed, &data)?;
         }
-        hasher.finish()
-    };
 
-    prop_assert_eq!(oneshot, many_chunks);
-    Ok(())
-}
+        #[test]
+        fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        }
 
-fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-    let native = xx_hash_sys::Stream::oneshot(seed, data);
-    let rust = xx_renu::XxHash64::oneshot(seed, data);
+        #[test]
+        fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
+            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        }
 
-    prop_assert_eq!(native, rust);
-    Ok(())
-}
+        #[test]
+        fn oneshot(seed: u64, data: Vec<u8>) {
+            oneshot_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
+            streaming_one_chunk_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            streaming_one_chunk_impl(seed, &data[offset..])?;
+        }
+    }
+
+    fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let oneshot = rust::XxHash64::oneshot(seed, data);
+        let one_chunk = {
+            let mut hasher = rust::XxHash64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(oneshot, one_chunk);
+        Ok(())
+    }
+
+    fn oneshot_same_as_many_chunks_impl(
+        seed: u64,
+        data: &[u8],
+        chunks: &[Vec<u8>],
+    ) -> TestCaseResult {
+        let oneshot = rust::XxHash64::oneshot(seed, data);
+        let many_chunks = {
+            let mut hasher = rust::XxHash64::with_seed(seed);
+            for chunk in chunks {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
+
+        prop_assert_eq!(oneshot, many_chunks);
+        Ok(())
+    }
+
+    fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash64::oneshot(seed, data);
+        let rust = rust::XxHash64::oneshot(seed, data);
 
-fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-    let native = {
-        let mut hasher = xx_hash_sys::Stream::with_seed(seed);
-        hasher.write(data);
-        hasher.finish()
-    };
-
-    let rust = {
-        let mut hasher = xx_renu::XxHash64::with_seed(seed);
-        hasher.write(data);
-        hasher.finish()
-    };
-
-    prop_assert_eq!(native, rust);
-    Ok(())
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
 }
 
 fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 2c3ed6e8a..0d691790f 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -1,15 +1,75 @@
 #![allow(non_camel_case_types)]
 
+type XXH_errorcode = libc::c_int;
+const XXH_OK: XXH_errorcode = 0;
+
+// ----------
+
+type XXH32_hash_t = u32;
+
+#[repr(C)]
+pub struct XXH32_state_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+extern "C" {
+    fn XXH32(input: *const libc::c_void, length: libc::size_t, seed: XXH32_hash_t) -> XXH32_hash_t;
+
+    fn XXH32_createState() -> *mut XXH32_state_t;
+    fn XXH32_reset(state: *mut XXH32_state_t, seed: XXH32_hash_t) -> XXH_errorcode;
+    fn XXH32_update(
+        state: *mut XXH32_state_t,
+        buffer: *const libc::c_void,
+        length: libc::size_t,
+    ) -> XXH_errorcode;
+    fn XXH32_digest(state: *mut XXH32_state_t) -> XXH32_hash_t;
+    fn XXH32_freeState(state: *mut XXH32_state_t);
+}
+
+pub struct XxHash32(*mut XXH32_state_t);
+
+impl XxHash32 {
+    pub fn oneshot(seed: u32, data: &[u8]) -> u32 {
+        unsafe { XXH32(data.as_ptr().cast(), data.len(), seed) }
+    }
+
+    pub fn with_seed(seed: u32) -> Self {
+        let state = unsafe {
+            let state = XXH32_createState();
+            XXH32_reset(state, seed);
+            state
+        };
+
+        Self(state)
+    }
+
+    pub fn write(&mut self, data: &[u8]) {
+        let retval = unsafe { XXH32_update(self.0, data.as_ptr().cast(), data.len()) };
+        assert_eq!(retval, XXH_OK);
+    }
+
+    pub fn finish(&mut self) -> u32 {
+        unsafe { XXH32_digest(self.0) }
+    }
+}
+
+impl Drop for XxHash32 {
+    fn drop(&mut self) {
+        unsafe { XXH32_freeState(self.0) }
+    }
+}
+
+// ----------
+
 type XXH64_hash_t = u64;
+
 #[repr(C)]
 pub struct XXH64_state_t {
     _data: [u8; 0],
     _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
 }
 
-type XXH_errorcode = libc::c_int;
-const XXH_OK: XXH_errorcode = 0;
-
 extern "C" {
     fn XXH64(input: *const libc::c_void, length: libc::size_t, seed: XXH64_hash_t) -> XXH64_hash_t;
 
@@ -24,9 +84,9 @@ extern "C" {
     fn XXH64_freeState(state: *mut XXH64_state_t);
 }
 
-pub struct Stream(*mut XXH64_state_t);
+pub struct XxHash64(*mut XXH64_state_t);
 
-impl Stream {
+impl XxHash64 {
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
         unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) }
     }
@@ -51,7 +111,7 @@ impl Stream {
     }
 }
 
-impl Drop for Stream {
+impl Drop for XxHash64 {
     fn drop(&mut self) {
         unsafe { XXH64_freeState(self.0) }
     }

From c0fdd56a46145f5c213f4a4017938da9d164d102 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 27 Jun 2024 14:28:29 -0400
Subject: [PATCH 030/166] benchmark

---
 compare/benches/benchmark.rs | 114 ++++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 47 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 685f2f907..2bce74e6c 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -6,7 +6,10 @@ use xx_hash_sys as c;
 use xx_renu as rust;
 
 const TINY_DATA_SIZE: usize = 32;
-const BIG_DATA_SIZE: usize = 100 * 1024 * 1024;
+const BIG_DATA_SIZE: usize = 4 * 1024 * 1024;
+const MIN_BIG_DATA_SIZE: usize = 256 * 1024;
+const MAX_CHUNKS: usize = 64;
+const SEED: u64 = 0xc651_4843_1995_363f;
 
 fn tiny_data(c: &mut Criterion) {
     let (seed, data) = gen_data(TINY_DATA_SIZE);
@@ -16,7 +19,7 @@ fn tiny_data(c: &mut Criterion) {
         let data = &data[..size];
         g.throughput(Throughput::Bytes(data.len() as _));
 
-        let id = format!("c/oneshot/{size}");
+        let id = format!("impl-c/fn-oneshot/size-{size:02}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = c::XxHash64::oneshot(seed, data);
@@ -24,7 +27,7 @@ fn tiny_data(c: &mut Criterion) {
             })
         });
 
-        let id = format!("c/streaming/{size}");
+        let id = format!("impl-c/fn-streaming/size-{size:02}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = {
@@ -36,7 +39,7 @@ fn tiny_data(c: &mut Criterion) {
             })
         });
 
-        let id = format!("rust/oneshot/{size}");
+        let id = format!("impl-rust/fn-oneshot/size-{size:02}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = rust::XxHash64::oneshot(seed, data);
@@ -44,7 +47,7 @@ fn tiny_data(c: &mut Criterion) {
             })
         });
 
-        let id = format!("rust/streaming/{size}");
+        let id = format!("impl-rust/fn-streaming/size-{size:02}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = {
@@ -64,11 +67,11 @@ fn oneshot(c: &mut Criterion) {
     let (seed, data) = gen_data(BIG_DATA_SIZE);
     let mut g = c.benchmark_group("oneshot");
 
-    for size in half_sizes(&data).take(10) {
+    for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
         let data = &data[..size];
         g.throughput(Throughput::Bytes(data.len() as _));
 
-        let id = format!("c/{size}");
+        let id = format!("impl-c/size-{size:07}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = c::XxHash64::oneshot(seed, data);
@@ -76,7 +79,7 @@ fn oneshot(c: &mut Criterion) {
             })
         });
 
-        let id = format!("rust/{size}");
+        let id = format!("impl-rust/size-{size:07}");
         g.bench_function(id, |b| {
             b.iter(|| {
                 let hash = rust::XxHash64::oneshot(seed, data);
@@ -88,40 +91,43 @@ fn oneshot(c: &mut Criterion) {
     g.finish();
 }
 
-fn streaming_one_chunk(c: &mut Criterion) {
-    let (seed, data) = gen_data(BIG_DATA_SIZE);
-    let mut g = c.benchmark_group("streaming_one_chunk");
-
-    for size in half_sizes(&data).take(10) {
-        let data = &data[..size];
-        g.throughput(Throughput::Bytes(data.len() as _));
+fn streaming(c: &mut Criterion) {
+    let mut g = c.benchmark_group("streaming_many_chunks");
 
-        let id = format!("c/{size}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let mut hasher = c::XxHash64::with_seed(seed);
-                hasher.write(data);
-                let hash = hasher.finish();
-                black_box(hash);
-            })
-        });
+    for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
+        for n_chunks in half_sizes(MAX_CHUNKS) {
+            let (seed, chunks) = gen_chunked_data(size, n_chunks);
+            g.throughput(Throughput::Bytes(size as _));
 
-        let id = format!("rust/{size}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let mut hasher = rust::XxHash64::with_seed(seed);
-                hasher.write(data);
-                let hash = hasher.finish();
-                black_box(hash);
-            })
-        });
+            let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let mut hasher = c::XxHash64::with_seed(seed);
+                    for chunk in &chunks {
+                        hasher.write(chunk);
+                    }
+                    let hash = hasher.finish();
+                    black_box(hash);
+                })
+            });
+
+            let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let mut hasher = rust::XxHash64::with_seed(seed);
+                    for chunk in &chunks {
+                        hasher.write(chunk);
+                    }
+                    let hash = hasher.finish();
+                    black_box(hash);
+                })
+            });
+        }
     }
 
     g.finish();
 }
 
-const SEED: u64 = 0xc651_4843_1995_363f;
-
 fn gen_data(length: usize) -> (u64, Vec<u8>) {
     let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
 
@@ -133,18 +139,32 @@ fn gen_data(length: usize) -> (u64, Vec<u8>) {
     (seed, data)
 }
 
-fn half_sizes(data: &[u8]) -> impl Iterator<Item = usize> {
-    iter::successors(
-        Some(data.len()),
-        |&v| {
-            if v == 1 {
-                None
-            } else {
-                Some(v / 2)
-            }
-        },
-    )
+fn gen_chunked_data(length: usize, n_chunks: usize) -> (u64, Vec<Vec<u8>>) {
+    assert!(length > n_chunks);
+
+    let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
+
+    let seed = rng.gen();
+
+    let chunk_size = length / n_chunks;
+
+    let mut total = 0;
+    let mut chunks = Vec::with_capacity(2 * n_chunks);
+
+    while total < length {
+        let mut data = vec![0; chunk_size];
+        rng.fill_bytes(&mut data);
+
+        total += data.len();
+        chunks.push(data)
+    }
+
+    (seed, chunks)
+}
+
+fn half_sizes(max: usize) -> impl Iterator<Item = usize> {
+    iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) })
 }
 
-criterion_group!(benches, tiny_data, oneshot, streaming_one_chunk);
+criterion_group!(benches, tiny_data, oneshot, streaming);
 criterion_main!(benches);

From c1fc63c6dbce788086fb7cb4c7547ea204cb7f57 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 27 Jun 2024 14:47:00 -0400
Subject: [PATCH 031/166] faster

---
 src/xxhash64.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index cef2180f3..7958a9c06 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -127,6 +127,11 @@ impl Accumulators {
         ])
     }
 
+    // RATIONALE[inline2]: Inspecting the disassembly showed that
+    // these helper functions were not being inlined. Avoiding a few
+    // function calls wins us the tiniest performance increase, just
+    // enough so that we are neck-and-neck with the C code.
+    #[inline]
     fn write(&mut self, lanes: Lanes) {
         let [acc1, acc2, acc3, acc4] = &mut self.0;
         let [lane1, lane2, lane3, lane4] = lanes;
@@ -137,6 +142,8 @@ impl Accumulators {
         *acc4 = round(*acc4, lane4.to_le());
     }
 
+    // RATIONALE: See RATIONALE[inline2]
+    #[inline]
     fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
         while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
             // SAFETY: We have the right number of bytes and are
@@ -148,6 +155,8 @@ impl Accumulators {
         data
     }
 
+    // RATIONALE: See RATIONALE[inline2]
+    #[inline]
     const fn finish(&self) -> u64 {
         let [acc1, acc2, acc3, acc4] = self.0;
 
@@ -170,6 +179,8 @@ impl Accumulators {
         acc
     }
 
+    // RATIONALE: See RATIONALE[inline2]
+    #[inline]
     const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
         acc ^= round(0, acc_n);
         acc = acc.wrapping_mul(PRIME64_1);
@@ -320,6 +331,8 @@ impl Hasher for XxHash64 {
     }
 }
 
+// RATIONALE: See RATIONALE[inline2]
+#[inline]
 const fn round(mut acc: u64, lane: u64) -> u64 {
     acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
     acc = acc.rotate_left(31);

From 926f257dda4d121922bbb3edcff79e50438beee7 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 27 Jun 2024 14:47:24 -0400
Subject: [PATCH 032/166] more

---
 src/xxhash64.rs | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 7958a9c06..28053d4f8 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -413,21 +413,21 @@ mod std_impl {
 
     use super::*;
 
-    pub struct RandomXxHashBuilder64(u64);
+    pub struct RandomXxHash64Builder(u64);
 
-    impl Default for RandomXxHashBuilder64 {
+    impl Default for RandomXxHash64Builder {
         fn default() -> Self {
             Self::new()
         }
     }
 
-    impl RandomXxHashBuilder64 {
+    impl RandomXxHash64Builder {
         fn new() -> Self {
             Self(rand::random())
         }
     }
 
-    impl BuildHasher for RandomXxHashBuilder64 {
+    impl BuildHasher for RandomXxHash64Builder {
         type Hasher = XxHash64;
 
         fn build_hasher(&self) -> Self::Hasher {
@@ -451,7 +451,7 @@ mod std_impl {
 
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
-            let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
+            let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default();
             hash.insert(42, "the answer");
             assert_eq!(hash.get(&42), Some(&"the answer"));
         }
@@ -459,7 +459,7 @@ mod std_impl {
 }
 
 #[cfg(feature = "std")]
-pub use std_impl::RandomXxHashBuilder64;
+pub use std_impl::RandomXxHash64Builder;
 
 #[cfg(feature = "serialize")]
 mod serialize_impl {
@@ -566,7 +566,7 @@ mod serialize_impl {
             hasher.write(b"Hello, world!\0");
             hasher.finish();
 
-            let serialized = r#"{
+            let expected_serialized = r#"{
                     "total_len": 14,
                     "seed": 0,
                     "core": {
@@ -584,8 +584,13 @@ mod serialize_impl {
                     "buffer_usage": 14
                 }"#;
 
-            let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
+            let unserialized: XxHash64 = serde_json::from_str(expected_serialized)?;
             assert_eq!(hasher, unserialized);
+
+            let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
+            let actual_value = serde_json::to_value(&hasher)?;
+            assert_eq!(expected_value, actual_value);
+
             Ok(())
         }
     }

From e568a2ed0bb8bdd5ba1f3059356f85a4780f7599 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 09:21:55 -0400
Subject: [PATCH 033/166] inline

---
 src/xxhash64.rs | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 28053d4f8..b35066407 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -56,6 +56,8 @@ impl Buffer {
         }
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
         // Most of the slice methods we use here have `_unchecked` variants, but
         //
@@ -93,6 +95,8 @@ impl Buffer {
         }
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn set(&mut self, data: &[u8]) {
         if data.is_empty() {
             return;
@@ -109,6 +113,8 @@ impl Buffer {
         self.offset = data.len();
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn remaining(&self) -> &[u8] {
         &self.data.bytes()[..self.offset]
     }
@@ -127,10 +133,7 @@ impl Accumulators {
         ])
     }
 
-    // RATIONALE[inline2]: Inspecting the disassembly showed that
-    // these helper functions were not being inlined. Avoiding a few
-    // function calls wins us the tiniest performance increase, just
-    // enough so that we are neck-and-neck with the C code.
+    // RATIONALE: See RATIONALE[inline]
     #[inline]
     fn write(&mut self, lanes: Lanes) {
         let [acc1, acc2, acc3, acc4] = &mut self.0;
@@ -142,7 +145,7 @@ impl Accumulators {
         *acc4 = round(*acc4, lane4.to_le());
     }
 
-    // RATIONALE: See RATIONALE[inline2]
+    // RATIONALE: See RATIONALE[inline]
     #[inline]
     fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
         while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
@@ -155,7 +158,7 @@ impl Accumulators {
         data
     }
 
-    // RATIONALE: See RATIONALE[inline2]
+    // RATIONALE: See RATIONALE[inline]
     #[inline]
     const fn finish(&self) -> u64 {
         let [acc1, acc2, acc3, acc4] = self.0;
@@ -179,7 +182,7 @@ impl Accumulators {
         acc
     }
 
-    // RATIONALE: See RATIONALE[inline2]
+    // RATIONALE: See RATIONALE[inline]
     #[inline]
     const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
         acc ^= round(0, acc_n);
@@ -218,8 +221,19 @@ impl XxHash64 {
     /// Hash all data at once. If you can use this function, you may
     /// see noticable speed gains for certain types of input.
     #[must_use]
-    // RATIONALE[inline]: In one case [1], this `inline` helps unlock a
-    // speedup from ~900µs to ~200µs.
+    // RATIONALE[inline]:
+    //
+    // These `inline`s help unlock a speedup in one benchmark [1] from
+    // ~900µs to ~200µs.
+    //
+    // Further inspection of the disassembly showed that various
+    // helper functions were not being inlined. Avoiding these few
+    // function calls wins us the tiniest performance increase, just
+    // enough so that we are neck-and-neck with (or slightly faster
+    // than!) the C code.
+    //
+    // This results in the entire hash computation being inlined at
+    // the call site.
     //
     // [1]: https://github.com/apache/datafusion-comet/pull/575
     #[inline]
@@ -250,7 +264,7 @@ impl XxHash64 {
 
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
-    #[inline(always)]
+    #[inline]
     fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
         // Step 3. Accumulator convergence
         let mut acc = if len < BYTES_IN_LANE.into_u64() {
@@ -303,6 +317,8 @@ impl XxHash64 {
 }
 
 impl Hasher for XxHash64 {
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn write(&mut self, data: &[u8]) {
         let len = data.len();
 
@@ -320,7 +336,8 @@ impl Hasher for XxHash64 {
         self.length += len.into_u64();
     }
 
-    #[must_use]
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn finish(&self) -> u64 {
         Self::finish_with(
             self.seed,
@@ -331,7 +348,7 @@ impl Hasher for XxHash64 {
     }
 }
 
-// RATIONALE: See RATIONALE[inline2]
+// RATIONALE: See RATIONALE[inline]
 #[inline]
 const fn round(mut acc: u64, lane: u64) -> u64 {
     acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));

From 84220308e3ff5f2e15ae5d9ecac7ddbaaebbab4d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 09:22:02 -0400
Subject: [PATCH 034/166] offset

---
 src/xxhash64.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index b35066407..6bff23ede 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -422,6 +422,22 @@ mod test {
         hasher.write(&bytes);
         assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
     }
+
+    #[test]
+    fn hashes_with_different_offsets_are_the_same() {
+        let bytes = [0x7c; 4096];
+        let expected = XxHash64::oneshot(0, &[0x7c; 64]);
+
+        let the_same = bytes
+            .windows(64)
+            .map(|w| {
+                let mut hasher = XxHash64::with_seed(0);
+                hasher.write(w);
+                hasher.finish()
+            })
+            .all(|h| h == expected);
+        assert!(the_same);
+    }
 }
 
 #[cfg(feature = "std")]

From 59836ed06c7b95f40a5be3bf0ec923ba1b229115 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 09:31:27 -0400
Subject: [PATCH 035/166] simpla

---
 src/xxhash32.rs | 45 +++++++++++++++++++++++++++++++++++++--------
 src/xxhash64.rs | 10 ++++------
 2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 65eeb9206..fdd55c443 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -56,6 +56,8 @@ impl Buffer {
         }
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
         // Most of the slice methods we use here have `_unchecked` variants, but
         //
@@ -93,6 +95,8 @@ impl Buffer {
         }
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn set(&mut self, data: &[u8]) {
         if data.is_empty() {
             return;
@@ -109,6 +113,8 @@ impl Buffer {
         self.offset = data.len();
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn remaining(&self) -> &[u8] {
         &self.data.bytes()[..self.offset]
     }
@@ -127,6 +133,8 @@ impl Accumulators {
         ])
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn write(&mut self, lanes: Lanes) {
         let [acc1, acc2, acc3, acc4] = &mut self.0;
         let [lane1, lane2, lane3, lane4] = lanes;
@@ -137,6 +145,8 @@ impl Accumulators {
         *acc4 = round(*acc4, lane4.to_le());
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
         while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
             // SAFETY: We have the right number of bytes and are
@@ -148,6 +158,8 @@ impl Accumulators {
         data
     }
 
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     const fn finish(&self) -> u32 {
         let [acc1, acc2, acc3, acc4] = self.0;
 
@@ -223,7 +235,7 @@ impl XxHash32 {
 
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
-    #[inline(always)]
+    #[inline]
     pub fn finish_32(&self) -> u32 {
         Self::finish_with(
             self.seed,
@@ -235,7 +247,7 @@ impl XxHash32 {
 
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
-    #[inline(always)]
+    #[inline]
     fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 {
         // Step 3. Accumulator convergence
         let mut acc = if len < BYTES_IN_LANE.into_u64() {
@@ -252,7 +264,7 @@ impl XxHash32 {
         acc += len as u32;
 
         // Step 5. Consume remaining input
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u32>() }>() {
+        while let Some((chunk, rest)) = remaining.split_first_chunk() {
             let lane = u32::from_ne_bytes(*chunk).to_le();
 
             acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3));
@@ -261,13 +273,11 @@ impl XxHash32 {
             remaining = rest;
         }
 
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u8>() }>() {
-            let lane = chunk[0].into_u32();
+        for &byte in remaining {
+            let lane = byte.into_u32();
 
             acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5));
             acc = acc.rotate_left(11).wrapping_mul(PRIME32_1);
-
-            remaining = rest;
         }
 
         // Step 6. Final mix (avalanche)
@@ -282,6 +292,8 @@ impl XxHash32 {
 }
 
 impl Hasher for XxHash32 {
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn write(&mut self, data: &[u8]) {
         let len = data.len();
 
@@ -299,7 +311,8 @@ impl Hasher for XxHash32 {
         self.length += len.into_u64();
     }
 
-    #[must_use]
+    // RATIONALE: See RATIONALE[inline]
+    #[inline]
     fn finish(&self) -> u64 {
         XxHash32::finish_32(self).into()
     }
@@ -377,4 +390,20 @@ mod test {
         hasher.write(&bytes);
         assert_eq!(hasher.finish(), 0x6d2f_6c17);
     }
+
+    #[test]
+    fn hashes_with_different_offsets_are_the_same() {
+        let bytes = [0x7c; 4096];
+        let expected = XxHash32::oneshot(0, &[0x7c; 64]);
+
+        let the_same = bytes
+            .windows(64)
+            .map(|w| {
+                let mut hasher = XxHash32::with_seed(0);
+                hasher.write(w);
+                hasher.finish_32()
+            })
+            .all(|h| h == expected);
+        assert!(the_same);
+    }
 }
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 6bff23ede..251e63985 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -277,7 +277,7 @@ impl XxHash64 {
         acc += len;
 
         // Step 5. Consume remaining input
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u64>() }>() {
+        while let Some((chunk, rest)) = remaining.split_first_chunk() {
             let lane = u64::from_ne_bytes(*chunk).to_le();
 
             acc ^= round(0, lane);
@@ -286,7 +286,7 @@ impl XxHash64 {
             remaining = rest;
         }
 
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u32>() }>() {
+        while let Some((chunk, rest)) = remaining.split_first_chunk() {
             let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
 
             acc ^= lane.wrapping_mul(PRIME64_1);
@@ -296,13 +296,11 @@ impl XxHash64 {
             remaining = rest;
         }
 
-        while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::<u8>() }>() {
-            let lane = chunk[0].into_u64();
+        for &byte in remaining {
+            let lane = byte.into_u64();
 
             acc ^= lane.wrapping_mul(PRIME64_5);
             acc = acc.rotate_left(11).wrapping_mul(PRIME64_1);
-
-            remaining = rest;
         }
 
         // Step 6. Final mix (avalanche)

From 3e30866e62c8d03a4b34b3fea6ce7cdcb05fab28 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 10:18:37 -0400
Subject: [PATCH 036/166] inline

---
 src/xxhash32.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index fdd55c443..19147d4c2 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -318,6 +318,8 @@ impl Hasher for XxHash32 {
     }
 }
 
+// RATIONALE: See RATIONALE[inline]
+#[inline]
 const fn round(mut acc: u32, lane: u32) -> u32 {
     acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2));
     acc = acc.rotate_left(13);

From 5c1f9771c8cebd94e120f7aa7f9847f9d5c80cb7 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 10:36:55 -0400
Subject: [PATCH 037/166] moar tests

---
 src/xxhash32.rs | 210 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/xxhash64.rs |  34 ++++----
 2 files changed, 227 insertions(+), 17 deletions(-)

diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 19147d4c2..1b90dd3f8 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -408,4 +408,214 @@ mod test {
             .all(|h| h == expected);
         assert!(the_same);
     }
+
+    // This test validates wraparound/truncation behavior for very
+    // large inputs of a 32-bit hash, but runs very slowly in the
+    // normal "cargo test" build config since it hashes 4.3GB of
+    // data. It runs reasonably quick under "cargo test --release".
+    #[ignore]
+    #[test]
+    fn length_overflows_32bit() {
+        // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32.
+        let bytes200: [u8; 200] = array::from_fn(|i| i as _);
+
+        let mut hasher = XxHash32::with_seed(0);
+        for _ in 0..(4_300_000_000 / bytes200.len()) {
+            hasher.write(&bytes200);
+        }
+
+        // assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00);
+        // assert_eq!(hasher.total_len(), 0x004c_cb00);
+
+        // compared against the C implementation
+        assert_eq!(hasher.finish(), 0x1522_4ca7);
+    }
+}
+
+#[cfg(feature = "std")]
+mod std_impl {
+    use core::hash::BuildHasher;
+
+    use super::*;
+
+    pub struct RandomXxHash32Builder(u32);
+
+    impl Default for RandomXxHash32Builder {
+        fn default() -> Self {
+            Self::new()
+        }
+    }
+
+    impl RandomXxHash32Builder {
+        fn new() -> Self {
+            Self(rand::random())
+        }
+    }
+
+    impl BuildHasher for RandomXxHash32Builder {
+        type Hasher = XxHash32;
+
+        fn build_hasher(&self) -> Self::Hasher {
+            XxHash32::with_seed(self.0)
+        }
+    }
+
+    #[cfg(test)]
+    mod test {
+        use core::hash::BuildHasherDefault;
+        use std::collections::HashMap;
+
+        use super::*;
+
+        #[test]
+        fn can_be_used_in_a_hashmap_with_a_default_seed() {
+            let mut hash: HashMap<_, _, BuildHasherDefault<XxHash32>> = Default::default();
+            hash.insert(42, "the answer");
+            assert_eq!(hash.get(&42), Some(&"the answer"));
+        }
+
+        #[test]
+        fn can_be_used_in_a_hashmap_with_a_random_seed() {
+            let mut hash: HashMap<_, _, RandomXxHash32Builder> = Default::default();
+            hash.insert(42, "the answer");
+            assert_eq!(hash.get(&42), Some(&"the answer"));
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+pub use std_impl::*;
+
+
+#[cfg(feature = "serialize")]
+mod serialize_impl {
+    use serde::{Deserialize, Serialize};
+
+    use super::*;
+
+    impl<'de> Deserialize<'de> for XxHash32 {
+        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: serde::Deserializer<'de>,
+        {
+            let shim = Deserialize::deserialize(deserializer)?;
+
+            let Shim {
+                total_len,
+                seed,
+                core,
+                buffer,
+                buffer_usage,
+            } = shim;
+            let Core { v1, v2, v3, v4 } = core;
+
+            let mut buffer_data = BufferData::new();
+            buffer_data.bytes_mut().copy_from_slice(&buffer);
+
+            Ok(XxHash32 {
+                seed,
+                accumulators: Accumulators([v1, v2, v3, v4]),
+                buffer: Buffer {
+                    offset: buffer_usage,
+                    data: buffer_data,
+                },
+                length: total_len,
+            })
+        }
+    }
+
+    impl Serialize for XxHash32 {
+        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+        where
+            S: serde::Serializer,
+        {
+            let XxHash32 {
+                seed,
+                ref accumulators,
+                ref buffer,
+                length,
+            } = *self;
+            let [v1, v2, v3, v4] = accumulators.0;
+            let Buffer { offset, ref data } = *buffer;
+            let buffer = *data.bytes();
+
+            let shim = Shim {
+                total_len: length,
+                seed,
+                core: Core { v1, v2, v3, v4 },
+                buffer,
+                buffer_usage: offset,
+            };
+
+            shim.serialize(serializer)
+        }
+    }
+
+    #[derive(Serialize, Deserialize)]
+    struct Shim {
+        total_len: u64,
+        seed: u32,
+        core: Core,
+        buffer: [u8; 16],
+        buffer_usage: usize,
+    }
+
+    #[derive(Serialize, Deserialize)]
+    struct Core {
+        v1: u32,
+        v2: u32,
+        v3: u32,
+        v4: u32,
+    }
+
+    #[cfg(test)]
+    mod test {
+        use super::*;
+
+        type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
+
+        #[test]
+        fn test_serialization_cycle() -> Result {
+            let mut hasher = XxHash32::with_seed(0);
+            hasher.write(b"Hello, world!\0");
+            hasher.finish();
+
+            let serialized = serde_json::to_string(&hasher)?;
+            let unserialized: XxHash32 = serde_json::from_str(&serialized)?;
+            assert_eq!(hasher, unserialized);
+            Ok(())
+        }
+
+        #[test]
+        fn test_serialization_stability() -> Result {
+            let mut hasher = XxHash32::with_seed(0);
+            hasher.write(b"Hello, world!\0");
+            hasher.finish();
+
+            let expected_serialized = r#"{
+                "total_len": 14,
+                "seed": 0,
+                "core": {
+                  "v1": 606290984,
+                  "v2": 2246822519,
+                  "v3": 0,
+                  "v4": 1640531535
+                },
+                "buffer": [
+                  72,  101, 108, 108, 111, 44, 32, 119,
+                  111, 114, 108, 100, 33,  0,  0,  0
+                ],
+                "buffer_usage": 14
+            }"#;
+
+            let unserialized: XxHash32 = serde_json::from_str(expected_serialized)?;
+            assert_eq!(hasher, unserialized);
+
+            let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
+            let actual_value = serde_json::to_value(&hasher)?;
+            assert_eq!(expected_value, actual_value);
+
+            Ok(())
+        }
+    }
 }
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 251e63985..fa2f63681 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -490,7 +490,7 @@ mod std_impl {
 }
 
 #[cfg(feature = "std")]
-pub use std_impl::RandomXxHash64Builder;
+pub use std_impl::*;
 
 #[cfg(feature = "serialize")]
 mod serialize_impl {
@@ -598,22 +598,22 @@ mod serialize_impl {
             hasher.finish();
 
             let expected_serialized = r#"{
-                    "total_len": 14,
-                    "seed": 0,
-                    "core": {
-                      "v1": 6983438078262162902,
-                      "v2": 14029467366897019727,
-                      "v3": 0,
-                      "v4": 7046029288634856825
-                    },
-                    "buffer": [
-                      72,  101, 108, 108, 111, 44, 32, 119,
-                      111, 114, 108, 100, 33,  0,  0,  0,
-                      0,   0,   0,   0,   0,   0,  0,  0,
-                      0,   0,   0,   0,   0,   0,  0,  0
-                    ],
-                    "buffer_usage": 14
-                }"#;
+                "total_len": 14,
+                "seed": 0,
+                "core": {
+                  "v1": 6983438078262162902,
+                  "v2": 14029467366897019727,
+                  "v3": 0,
+                  "v4": 7046029288634856825
+                },
+                "buffer": [
+                  72,  101, 108, 108, 111, 44, 32, 119,
+                  111, 114, 108, 100, 33,  0,  0,  0,
+                  0,   0,   0,   0,   0,   0,  0,  0,
+                  0,   0,   0,   0,   0,   0,  0,  0
+                ],
+                "buffer_usage": 14
+            }"#;
 
             let unserialized: XxHash64 = serde_json::from_str(expected_serialized)?;
             assert_eq!(hasher, unserialized);

From 63b17995baf143ddb3089319722c86c989e4405f Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 11:03:38 -0400
Subject: [PATCH 038/166] dox

---
 src/lib.rs      | 41 ++++++++++++++++++++++++++++++++++++++++-
 src/xxhash32.rs | 34 +++++++++++++++++++++++++++++++---
 src/xxhash64.rs | 14 ++++++++++++++
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 64a4d3f3f..9e382cc68 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,46 @@
+//! A Rust implementation of the [XXHash][] algorithm.
+//!
+//! [XXHash]: https://github.com/Cyan4973/xxHash
+//!
+//! ## Hashing arbitrary data
+//!
+//! ```rust
+//! use xx_renu::XxHash64;
+//!
+//! let seed = 1234;
+//! let hash = XxHash64::oneshot(seed, b"some bytes");
+//! assert_eq!(0xeab5_5659_a496_d78b, hash);
+//! ```
+//!
+//! ## In a [`HashMap`](std::collections::HashMap)
+//!
+//! ### With a fixed seed
+//!
+//! ```rust
+//! use std::{collections::HashMap, hash::BuildHasherDefault};
+//! use xx_renu::XxHash64;
+//!
+//! let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+//! hash.insert(42, "the answer");
+//! assert_eq!(hash.get(&42), Some(&"the answer"));
+//! ```
+//!
+//! ### With a random seed
+//!
+//! ```rust
+//! use std::collections::HashMap;
+//! use xx_renu::RandomXxHash64Builder;
+//!
+//! let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default();
+//! hash.insert(42, "the answer");
+//! assert_eq!(hash.get(&42), Some(&"the answer"));
+//! ```
+
 #![no_std]
 #![deny(rust_2018_idioms)]
+#![deny(missing_docs)]
 
-#[cfg(test)]
+#[cfg(any(doc, test))]
 extern crate std;
 
 #[cfg(feature = "xxhash32")]
diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 1b90dd3f8..d6ec93879 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -186,6 +186,12 @@ impl fmt::Debug for Accumulators {
     }
 }
 
+/// Calculates the 32-bit hash. Care should be taken when using this
+/// hash.
+///
+/// Although this struct implements `Hasher`, it only calculates a
+/// 32-bit number, leaving the upper bits as 0. This means it is
+/// unlikely to be correct to use this in places like a `HashMap`.
 #[derive(Debug, PartialEq)]
 pub struct XxHash32 {
     seed: u32,
@@ -222,6 +228,7 @@ impl XxHash32 {
         Self::finish_with(seed, len.into_u64(), &accumulators, data)
     }
 
+    /// Constructs the hasher with an initial seed.
     #[must_use]
     pub const fn with_seed(seed: u32) -> Self {
         // Step 1. Initialize internal accumulators
@@ -233,6 +240,26 @@ impl XxHash32 {
         }
     }
 
+    /// The seed this hasher was created with.
+    pub const fn seed(&self) -> u32 {
+        self.seed
+    }
+
+    /// The total number of bytes hashed.
+    pub const fn total_len(&self) -> u64 {
+        self.length
+    }
+
+    /// The total number of bytes hashed, truncated to 32 bits.
+    ///
+    /// For the full 64-bit byte count, use [`total_len`](Self::total_len).
+    pub const fn total_len_32(&self) -> u64 {
+        self.length
+    }
+
+    /// Returns the hash value for the values written so far. Unlike
+    /// [`Hasher::finish`][], this method returns the actual 32-bit
+    /// value calculated, not a 64-bit value.
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
     #[inline]
@@ -424,8 +451,8 @@ mod test {
             hasher.write(&bytes200);
         }
 
-        // assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00);
-        // assert_eq!(hasher.total_len(), 0x004c_cb00);
+        assert_eq!(hasher.total_len(), 0x0000_0001_004c_cb00);
+        assert_eq!(hasher.total_len_32(), 0x004c_cb00);
 
         // compared against the C implementation
         assert_eq!(hasher.finish(), 0x1522_4ca7);
@@ -438,6 +465,8 @@ mod std_impl {
 
     use super::*;
 
+    /// Constructs a randomized seed and reuses it for multiple hasher
+    /// instances. See the usage warning on [`XxHash32`][].
     pub struct RandomXxHash32Builder(u32);
 
     impl Default for RandomXxHash32Builder {
@@ -486,7 +515,6 @@ mod std_impl {
 #[cfg(feature = "std")]
 pub use std_impl::*;
 
-
 #[cfg(feature = "serialize")]
 mod serialize_impl {
     use serde::{Deserialize, Serialize};
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index fa2f63681..7c988a098 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -203,6 +203,7 @@ impl fmt::Debug for Accumulators {
     }
 }
 
+/// Calculates the 64-bit hash.
 #[derive(Debug, PartialEq)]
 pub struct XxHash64 {
     seed: u64,
@@ -251,6 +252,7 @@ impl XxHash64 {
         Self::finish_with(seed, len.into_u64(), &accumulators, data)
     }
 
+    /// Constructs the hasher with an initial seed.
     #[must_use]
     pub const fn with_seed(seed: u64) -> Self {
         // Step 1. Initialize internal accumulators
@@ -262,6 +264,16 @@ impl XxHash64 {
         }
     }
 
+    /// The seed this hasher was created with.
+    pub const fn seed(&self) -> u64 {
+        self.seed
+    }
+
+    /// The total number of bytes hashed.
+    pub const fn total_len(&self) -> u64 {
+        self.length
+    }
+
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
     #[inline]
@@ -444,6 +456,8 @@ mod std_impl {
 
     use super::*;
 
+    /// Constructs a randomized seed and reuses it for multiple hasher
+    /// instances.
     pub struct RandomXxHash64Builder(u64);
 
     impl Default for RandomXxHash64Builder {

From 3dd65268a006cf448a4f3c3fda0ae3a2a45aa5ce Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 28 Jun 2024 12:35:07 -0400
Subject: [PATCH 039/166] dox

---
 src/lib.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index 9e382cc68..ae8755496 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,6 +4,8 @@
 //!
 //! ## Hashing arbitrary data
 //!
+//! ### When all the data is available at once
+//!
 //! ```rust
 //! use xx_renu::XxHash64;
 //!
@@ -12,6 +14,21 @@
 //! assert_eq!(0xeab5_5659_a496_d78b, hash);
 //! ```
 //!
+//! ### When the data is streaming
+//!
+//! ```rust
+//! use std::hash::Hasher;
+//! use xx_renu::XxHash64;
+//!
+//! let seed = 1234;
+//! let mut hasher = XxHash64::with_seed(seed);
+//! hasher.write(b"some");
+//! hasher.write(b" ");
+//! hasher.write(b"bytes");
+//! let hash = hasher.finish();
+//! assert_eq!(0xeab5_5659_a496_d78b, hash);
+//! ```
+//!
 //! ## In a [`HashMap`](std::collections::HashMap)
 //!
 //! ### With a fixed seed

From 424f847f693cbe4b657ebcf2fbd277a65c42ae77 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 29 Jun 2024 12:50:24 -0400
Subject: [PATCH 040/166] rename

---
 README.md            |   4 ++
 compare/src/lib.rs   |   4 +-
 renu-sum/src/main.rs |   1 +
 src/lib.rs           |  29 +++++++----
 src/xxhash32.rs      | 114 ++++++++++++++++++++++++++-----------------
 src/xxhash64.rs      |  86 +++++++++++++++++++-------------
 6 files changed, 148 insertions(+), 90 deletions(-)

diff --git a/README.md b/README.md
index f2f88dd86..e3e37f9c9 100644
--- a/README.md
+++ b/README.md
@@ -8,3 +8,7 @@ no-features
 all-features
 
 features for 32 / 64 / xx3
+
+
+rand feature instead of `std`?
+remove digest as we aren't crypto?
diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index f17554919..5be3f3f73 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -7,7 +7,7 @@ use xx_renu as rust;
 
 mod xxhash32 {
     use proptest::{prelude::*, test_runner::TestCaseResult};
-    use std::hash::Hasher;
+    use std::hash::Hasher as _;
 
     use super::*;
 
@@ -106,7 +106,7 @@ mod xxhash32 {
 
 mod xxhash64 {
     use proptest::{prelude::*, test_runner::TestCaseResult};
-    use std::hash::Hasher;
+    use std::hash::Hasher as _;
 
     use super::*;
 
diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
index 409ee35a7..172b164c3 100644
--- a/renu-sum/src/main.rs
+++ b/renu-sum/src/main.rs
@@ -2,6 +2,7 @@ use std::{
     env,
     fs::File,
     io::Read,
+    hash::Hasher as _,
     path::{Path, PathBuf},
     sync::mpsc::{self, SendError},
     thread,
diff --git a/src/lib.rs b/src/lib.rs
index ae8755496..3d6713a41 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,7 +17,7 @@
 //! ### When the data is streaming
 //!
 //! ```rust
-//! use std::hash::Hasher;
+//! use std::hash::Hasher as _;
 //! use xx_renu::XxHash64;
 //!
 //! let seed = 1234;
@@ -31,13 +31,13 @@
 //!
 //! ## In a [`HashMap`](std::collections::HashMap)
 //!
-//! ### With a fixed seed
+//! ### With a default seed
 //!
 //! ```rust
 //! use std::{collections::HashMap, hash::BuildHasherDefault};
 //! use xx_renu::XxHash64;
 //!
-//! let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+//! let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
 //! hash.insert(42, "the answer");
 //! assert_eq!(hash.get(&42), Some(&"the answer"));
 //! ```
@@ -46,9 +46,20 @@
 //!
 //! ```rust
 //! use std::collections::HashMap;
-//! use xx_renu::RandomXxHash64Builder;
+//! use xx_renu::xxhash64;
+//!
+//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
+//! hash.insert(42, "the answer");
+//! assert_eq!(hash.get(&42), Some(&"the answer"));
+//! ```
+//!
+//! ### With a fixed seed
+//!
+//! ```rust
+//! use std::collections::HashMap;
+//! use xx_renu::xxhash64;
 //!
-//! let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default();
+//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
 //! hash.insert(42, "the answer");
 //! assert_eq!(hash.get(&42), Some(&"the answer"));
 //! ```
@@ -61,16 +72,16 @@
 extern crate std;
 
 #[cfg(feature = "xxhash32")]
-mod xxhash32;
+pub mod xxhash32;
 
 #[cfg(feature = "xxhash32")]
-pub use xxhash32::*;
+pub use xxhash32::Hasher as XxHash32;
 
 #[cfg(feature = "xxhash64")]
-mod xxhash64;
+pub mod xxhash64;
 
 #[cfg(feature = "xxhash64")]
-pub use xxhash64::*;
+pub use xxhash64::Hasher as XxHash64;
 
 trait IntoU32 {
     fn into_u32(self) -> u32;
diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index d6ec93879..ad2b4e988 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -1,4 +1,6 @@
-use core::{fmt, hash::Hasher, mem};
+//! The implementation of XXH32.
+
+use core::{fmt, hash, mem};
 
 use crate::{IntoU32, IntoU64};
 
@@ -61,7 +63,7 @@ impl Buffer {
     fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
         // Most of the slice methods we use here have `_unchecked` variants, but
         //
-        // 1. this method is called one time per `XxHash64::write` call
+        // 1. this method is called one time per `Hasher::write` call
         // 2. this method early exits if we don't have anything in the buffer
         //
         // Because of this, removing the panics via `unsafe` doesn't
@@ -186,33 +188,34 @@ impl fmt::Debug for Accumulators {
     }
 }
 
-/// Calculates the 32-bit hash. Care should be taken when using this
-/// hash.
+/// Calculates the 32-bit hash.
+///
+/// ### Caution
 ///
-/// Although this struct implements `Hasher`, it only calculates a
+/// Although this struct implements [`hash::Hasher`][], it only calculates a
 /// 32-bit number, leaving the upper bits as 0. This means it is
-/// unlikely to be correct to use this in places like a `HashMap`.
+/// unlikely to be correct to use this in places like a [`HashMap`][std::collections::HashMap].
 #[derive(Debug, PartialEq)]
-pub struct XxHash32 {
+pub struct Hasher {
     seed: u32,
     accumulators: Accumulators,
     buffer: Buffer,
     length: u64,
 }
 
-impl Default for XxHash32 {
+impl Default for Hasher {
     fn default() -> Self {
         Self::with_seed(0)
     }
 }
 
-impl XxHash32 {
+impl Hasher {
     /// Hash all data at once. If you can use this function, you may
     /// see noticable speed gains for certain types of input.
     #[must_use]
-    // RATIONALE[inline]: Keeping parallel to the XxHash64
-    // implementation, even though the performance gains for XxHash32
-    // haven't been tested.
+    // RATIONALE[inline]: Keeping parallel to the 64-bit
+    // implementation, even though the performance gains for the
+    // 32-bit version haven't been tested.
     #[inline]
     pub fn oneshot(seed: u32, data: &[u8]) -> u32 {
         let len = data.len();
@@ -253,12 +256,12 @@ impl XxHash32 {
     /// The total number of bytes hashed, truncated to 32 bits.
     ///
     /// For the full 64-bit byte count, use [`total_len`](Self::total_len).
-    pub const fn total_len_32(&self) -> u64 {
-        self.length
+    pub const fn total_len_32(&self) -> u32 {
+        self.length as u32
     }
 
     /// Returns the hash value for the values written so far. Unlike
-    /// [`Hasher::finish`][], this method returns the actual 32-bit
+    /// [`hash::Hasher::finish`][], this method returns the actual 32-bit
     /// value calculated, not a 64-bit value.
     #[must_use]
     // RATIONALE: See RATIONALE[inline]
@@ -318,7 +321,7 @@ impl XxHash32 {
     }
 }
 
-impl Hasher for XxHash32 {
+impl hash::Hasher for Hasher {
     // RATIONALE: See RATIONALE[inline]
     #[inline]
     fn write(&mut self, data: &[u8]) {
@@ -341,7 +344,7 @@ impl Hasher for XxHash32 {
     // RATIONALE: See RATIONALE[inline]
     #[inline]
     fn finish(&self) -> u64 {
-        XxHash32::finish_32(self).into()
+        Hasher::finish_32(self).into()
     }
 }
 
@@ -355,7 +358,7 @@ const fn round(mut acc: u32, lane: u32) -> u32 {
 
 #[cfg(test)]
 mod test {
-    use core::array;
+    use core::{array, hash::Hasher as _};
 
     use super::*;
 
@@ -363,13 +366,13 @@ mod test {
     fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
         let bytes = [0; 32];
 
-        let mut byte_by_byte = XxHash32::with_seed(0);
+        let mut byte_by_byte = Hasher::with_seed(0);
         for byte in bytes.chunks(1) {
             byte_by_byte.write(byte);
         }
         let byte_by_byte = byte_by_byte.finish();
 
-        let mut one_chunk = XxHash32::with_seed(0);
+        let mut one_chunk = Hasher::with_seed(0);
         one_chunk.write(&bytes);
         let one_chunk = one_chunk.finish();
 
@@ -378,21 +381,21 @@ mod test {
 
     #[test]
     fn hash_of_nothing_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(&[]);
         assert_eq!(hasher.finish(), 0x02cc_5d05);
     }
 
     #[test]
     fn hash_of_single_byte_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(&[42]);
         assert_eq!(hasher.finish(), 0xe0fe_705f);
     }
 
     #[test]
     fn hash_of_multiple_bytes_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(b"Hello, world!\0");
         assert_eq!(hasher.finish(), 0x9e5e_7e93);
     }
@@ -400,14 +403,14 @@ mod test {
     #[test]
     fn hash_of_multiple_chunks_matches_c_implementation() {
         let bytes: [u8; 100] = array::from_fn(|i| i as u8);
-        let mut hasher = XxHash32::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(&bytes);
         assert_eq!(hasher.finish(), 0x7f89_ba44);
     }
 
     #[test]
     fn hash_with_different_seed_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0x42c9_1977);
+        let mut hasher = Hasher::with_seed(0x42c9_1977);
         hasher.write(&[]);
         assert_eq!(hasher.finish(), 0xd6bf_8459);
     }
@@ -415,7 +418,7 @@ mod test {
     #[test]
     fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
         let bytes: [u8; 100] = array::from_fn(|i| i as u8);
-        let mut hasher = XxHash32::with_seed(0x42c9_1977);
+        let mut hasher = Hasher::with_seed(0x42c9_1977);
         hasher.write(&bytes);
         assert_eq!(hasher.finish(), 0x6d2f_6c17);
     }
@@ -423,12 +426,12 @@ mod test {
     #[test]
     fn hashes_with_different_offsets_are_the_same() {
         let bytes = [0x7c; 4096];
-        let expected = XxHash32::oneshot(0, &[0x7c; 64]);
+        let expected = Hasher::oneshot(0, &[0x7c; 64]);
 
         let the_same = bytes
             .windows(64)
             .map(|w| {
-                let mut hasher = XxHash32::with_seed(0);
+                let mut hasher = Hasher::with_seed(0);
                 hasher.write(w);
                 hasher.finish_32()
             })
@@ -446,7 +449,7 @@ mod test {
         // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32.
         let bytes200: [u8; 200] = array::from_fn(|i| i as _);
 
-        let mut hasher = XxHash32::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         for _ in 0..(4_300_000_000 / bytes200.len()) {
             hasher.write(&bytes200);
         }
@@ -465,27 +468,46 @@ mod std_impl {
 
     use super::*;
 
+    /// Constructs [`Hasher`][] for multiple hasher instances. See
+    /// the [usage warning][Hasher#caution].
+    pub struct State(u32);
+
+    impl State {
+        /// Constructs the hasher with an initial seed.
+        pub fn with_seed(seed: u32) -> Self {
+            Self(seed)
+        }
+    }
+
+    impl BuildHasher for State {
+        type Hasher = Hasher;
+
+        fn build_hasher(&self) -> Self::Hasher {
+            Hasher::with_seed(self.0)
+        }
+    }
+
     /// Constructs a randomized seed and reuses it for multiple hasher
-    /// instances. See the usage warning on [`XxHash32`][].
-    pub struct RandomXxHash32Builder(u32);
+    /// instances. See the [usage warning][Hasher#caution].
+    pub struct RandomState(u32);
 
-    impl Default for RandomXxHash32Builder {
+    impl Default for RandomState {
         fn default() -> Self {
             Self::new()
         }
     }
 
-    impl RandomXxHash32Builder {
+    impl RandomState {
         fn new() -> Self {
             Self(rand::random())
         }
     }
 
-    impl BuildHasher for RandomXxHash32Builder {
-        type Hasher = XxHash32;
+    impl BuildHasher for RandomState {
+        type Hasher = Hasher;
 
         fn build_hasher(&self) -> Self::Hasher {
-            XxHash32::with_seed(self.0)
+            Hasher::with_seed(self.0)
         }
     }
 
@@ -498,14 +520,14 @@ mod std_impl {
 
         #[test]
         fn can_be_used_in_a_hashmap_with_a_default_seed() {
-            let mut hash: HashMap<_, _, BuildHasherDefault<XxHash32>> = Default::default();
+            let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
             hash.insert(42, "the answer");
             assert_eq!(hash.get(&42), Some(&"the answer"));
         }
 
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
-            let mut hash: HashMap<_, _, RandomXxHash32Builder> = Default::default();
+            let mut hash: HashMap<_, _, RandomState> = Default::default();
             hash.insert(42, "the answer");
             assert_eq!(hash.get(&42), Some(&"the answer"));
         }
@@ -521,7 +543,7 @@ mod serialize_impl {
 
     use super::*;
 
-    impl<'de> Deserialize<'de> for XxHash32 {
+    impl<'de> Deserialize<'de> for Hasher {
         fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
         where
             D: serde::Deserializer<'de>,
@@ -540,7 +562,7 @@ mod serialize_impl {
             let mut buffer_data = BufferData::new();
             buffer_data.bytes_mut().copy_from_slice(&buffer);
 
-            Ok(XxHash32 {
+            Ok(Hasher {
                 seed,
                 accumulators: Accumulators([v1, v2, v3, v4]),
                 buffer: Buffer {
@@ -552,12 +574,12 @@ mod serialize_impl {
         }
     }
 
-    impl Serialize for XxHash32 {
+    impl Serialize for Hasher {
         fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
         where
             S: serde::Serializer,
         {
-            let XxHash32 {
+            let Hasher {
                 seed,
                 ref accumulators,
                 ref buffer,
@@ -604,19 +626,19 @@ mod serialize_impl {
 
         #[test]
         fn test_serialization_cycle() -> Result {
-            let mut hasher = XxHash32::with_seed(0);
+            let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
             hasher.finish();
 
             let serialized = serde_json::to_string(&hasher)?;
-            let unserialized: XxHash32 = serde_json::from_str(&serialized)?;
+            let unserialized: Hasher = serde_json::from_str(&serialized)?;
             assert_eq!(hasher, unserialized);
             Ok(())
         }
 
         #[test]
         fn test_serialization_stability() -> Result {
-            let mut hasher = XxHash32::with_seed(0);
+            let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
             hasher.finish();
 
@@ -636,7 +658,7 @@ mod serialize_impl {
                 "buffer_usage": 14
             }"#;
 
-            let unserialized: XxHash32 = serde_json::from_str(expected_serialized)?;
+            let unserialized: Hasher = serde_json::from_str(expected_serialized)?;
             assert_eq!(hasher, unserialized);
 
             let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 7c988a098..2deb0d997 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -1,4 +1,6 @@
-use core::{fmt, hash::Hasher, mem};
+//! The implementation of XXH64.
+
+use core::{fmt, hash, mem};
 
 use crate::IntoU64;
 
@@ -61,7 +63,7 @@ impl Buffer {
     fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
         // Most of the slice methods we use here have `_unchecked` variants, but
         //
-        // 1. this method is called one time per `XxHash64::write` call
+        // 1. this method is called one time per `Hasher::write` call
         // 2. this method early exits if we don't have anything in the buffer
         //
         // Because of this, removing the panics via `unsafe` doesn't
@@ -205,20 +207,20 @@ impl fmt::Debug for Accumulators {
 
 /// Calculates the 64-bit hash.
 #[derive(Debug, PartialEq)]
-pub struct XxHash64 {
+pub struct Hasher {
     seed: u64,
     accumulators: Accumulators,
     buffer: Buffer,
     length: u64,
 }
 
-impl Default for XxHash64 {
+impl Default for Hasher {
     fn default() -> Self {
         Self::with_seed(0)
     }
 }
 
-impl XxHash64 {
+impl Hasher {
     /// Hash all data at once. If you can use this function, you may
     /// see noticable speed gains for certain types of input.
     #[must_use]
@@ -326,7 +328,7 @@ impl XxHash64 {
     }
 }
 
-impl Hasher for XxHash64 {
+impl hash::Hasher for Hasher {
     // RATIONALE: See RATIONALE[inline]
     #[inline]
     fn write(&mut self, data: &[u8]) {
@@ -368,7 +370,7 @@ const fn round(mut acc: u64, lane: u64) -> u64 {
 
 #[cfg(test)]
 mod test {
-    use core::array;
+    use core::{array, hash::Hasher as _};
 
     use super::*;
 
@@ -376,13 +378,13 @@ mod test {
     fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
         let bytes = [0x9c; 32];
 
-        let mut byte_by_byte = XxHash64::with_seed(0);
+        let mut byte_by_byte = Hasher::with_seed(0);
         for byte in bytes.chunks(1) {
             byte_by_byte.write(byte);
         }
         let byte_by_byte = byte_by_byte.finish();
 
-        let mut one_chunk = XxHash64::with_seed(0);
+        let mut one_chunk = Hasher::with_seed(0);
         one_chunk.write(&bytes);
         let one_chunk = one_chunk.finish();
 
@@ -391,21 +393,21 @@ mod test {
 
     #[test]
     fn hash_of_nothing_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(&[]);
         assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
     }
 
     #[test]
     fn hash_of_single_byte_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(&[42]);
         assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
     }
 
     #[test]
     fn hash_of_multiple_bytes_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(b"Hello, world!\0");
         assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
     }
@@ -413,14 +415,14 @@ mod test {
     #[test]
     fn hash_of_multiple_chunks_matches_c_implementation() {
         let bytes: [u8; 100] = array::from_fn(|i| i as u8);
-        let mut hasher = XxHash64::with_seed(0);
+        let mut hasher = Hasher::with_seed(0);
         hasher.write(&bytes);
         assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
     }
 
     #[test]
     fn hash_with_different_seed_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
+        let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
         hasher.write(&[]);
         assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
     }
@@ -428,7 +430,7 @@ mod test {
     #[test]
     fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
         let bytes: [u8; 100] = array::from_fn(|i| i as u8);
-        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
+        let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
         hasher.write(&bytes);
         assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
     }
@@ -436,12 +438,12 @@ mod test {
     #[test]
     fn hashes_with_different_offsets_are_the_same() {
         let bytes = [0x7c; 4096];
-        let expected = XxHash64::oneshot(0, &[0x7c; 64]);
+        let expected = Hasher::oneshot(0, &[0x7c; 64]);
 
         let the_same = bytes
             .windows(64)
             .map(|w| {
-                let mut hasher = XxHash64::with_seed(0);
+                let mut hasher = Hasher::with_seed(0);
                 hasher.write(w);
                 hasher.finish()
             })
@@ -456,27 +458,45 @@ mod std_impl {
 
     use super::*;
 
+    /// Constructs [`Hasher`][] for multiple hasher instances.
+    pub struct State(u64);
+
+    impl State {
+        /// Constructs the hasher with an initial seed.
+        pub fn with_seed(seed: u64) -> Self {
+            Self(seed)
+        }
+    }
+
+    impl BuildHasher for State {
+        type Hasher = Hasher;
+
+        fn build_hasher(&self) -> Self::Hasher {
+            Hasher::with_seed(self.0)
+        }
+    }
+
     /// Constructs a randomized seed and reuses it for multiple hasher
     /// instances.
-    pub struct RandomXxHash64Builder(u64);
+    pub struct RandomState(u64);
 
-    impl Default for RandomXxHash64Builder {
+    impl Default for RandomState {
         fn default() -> Self {
             Self::new()
         }
     }
 
-    impl RandomXxHash64Builder {
+    impl RandomState {
         fn new() -> Self {
             Self(rand::random())
         }
     }
 
-    impl BuildHasher for RandomXxHash64Builder {
-        type Hasher = XxHash64;
+    impl BuildHasher for RandomState {
+        type Hasher = Hasher;
 
         fn build_hasher(&self) -> Self::Hasher {
-            XxHash64::with_seed(self.0)
+            Hasher::with_seed(self.0)
         }
     }
 
@@ -489,14 +509,14 @@ mod std_impl {
 
         #[test]
         fn can_be_used_in_a_hashmap_with_a_default_seed() {
-            let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+            let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
             hash.insert(42, "the answer");
             assert_eq!(hash.get(&42), Some(&"the answer"));
         }
 
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
-            let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default();
+            let mut hash: HashMap<_, _, RandomState> = Default::default();
             hash.insert(42, "the answer");
             assert_eq!(hash.get(&42), Some(&"the answer"));
         }
@@ -512,7 +532,7 @@ mod serialize_impl {
 
     use super::*;
 
-    impl<'de> Deserialize<'de> for XxHash64 {
+    impl<'de> Deserialize<'de> for Hasher {
         fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
         where
             D: serde::Deserializer<'de>,
@@ -531,7 +551,7 @@ mod serialize_impl {
             let mut buffer_data = BufferData::new();
             buffer_data.bytes_mut().copy_from_slice(&buffer);
 
-            Ok(XxHash64 {
+            Ok(Hasher {
                 seed,
                 accumulators: Accumulators([v1, v2, v3, v4]),
                 buffer: Buffer {
@@ -543,12 +563,12 @@ mod serialize_impl {
         }
     }
 
-    impl Serialize for XxHash64 {
+    impl Serialize for Hasher {
         fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
         where
             S: serde::Serializer,
         {
-            let XxHash64 {
+            let Hasher {
                 seed,
                 ref accumulators,
                 ref buffer,
@@ -595,19 +615,19 @@ mod serialize_impl {
 
         #[test]
         fn test_serialization_cycle() -> Result {
-            let mut hasher = XxHash64::with_seed(0);
+            let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
             hasher.finish();
 
             let serialized = serde_json::to_string(&hasher)?;
-            let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
+            let unserialized: Hasher = serde_json::from_str(&serialized)?;
             assert_eq!(hasher, unserialized);
             Ok(())
         }
 
         #[test]
         fn test_serialization_stability() -> Result {
-            let mut hasher = XxHash64::with_seed(0);
+            let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
             hasher.finish();
 
@@ -629,7 +649,7 @@ mod serialize_impl {
                 "buffer_usage": 14
             }"#;
 
-            let unserialized: XxHash64 = serde_json::from_str(expected_serialized)?;
+            let unserialized: Hasher = serde_json::from_str(expected_serialized)?;
             assert_eq!(hasher, unserialized);
 
             let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;

From 7c0f281a4a4e324bd713926e58b6f76f05ee566e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 4 Jul 2024 08:31:43 -0400
Subject: [PATCH 041/166] error check

---
 xx_hash-sys/src/lib.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 0d691790f..6f915cbb7 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -24,7 +24,7 @@ extern "C" {
         length: libc::size_t,
     ) -> XXH_errorcode;
     fn XXH32_digest(state: *mut XXH32_state_t) -> XXH32_hash_t;
-    fn XXH32_freeState(state: *mut XXH32_state_t);
+    fn XXH32_freeState(state: *mut XXH32_state_t) -> XXH_errorcode;
 }
 
 pub struct XxHash32(*mut XXH32_state_t);
@@ -56,7 +56,8 @@ impl XxHash32 {
 
 impl Drop for XxHash32 {
     fn drop(&mut self) {
-        unsafe { XXH32_freeState(self.0) }
+        let retval = unsafe { XXH32_freeState(self.0) };
+        assert_eq!(retval, XXH_OK);
     }
 }
 
@@ -81,7 +82,7 @@ extern "C" {
         length: libc::size_t,
     ) -> XXH_errorcode;
     fn XXH64_digest(state: *mut XXH64_state_t) -> XXH64_hash_t;
-    fn XXH64_freeState(state: *mut XXH64_state_t);
+    fn XXH64_freeState(state: *mut XXH64_state_t) -> XXH_errorcode;
 }
 
 pub struct XxHash64(*mut XXH64_state_t);
@@ -113,6 +114,7 @@ impl XxHash64 {
 
 impl Drop for XxHash64 {
     fn drop(&mut self) {
-        unsafe { XXH64_freeState(self.0) }
+        let retval = unsafe { XXH64_freeState(self.0) };
+        assert_eq!(retval, XXH_OK);
     }
 }

From b784c6181ce192fc94f27428022796b53beb61fa Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 4 Jul 2024 08:40:21 -0400
Subject: [PATCH 042/166] rename random

---
 Cargo.toml      |  4 +--
 src/xxhash32.rs | 77 +++++++++++++++++++++++++------------------------
 src/xxhash64.rs | 72 ++++++++++++++++++++++-----------------------
 3 files changed, 76 insertions(+), 77 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 2c44331ff..99b104214 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,9 +11,9 @@ members = [
 ]
 
 [features]
-default = ["std", "xxhash32", "xxhash64"]
+default = ["random", "xxhash32", "xxhash64"]
 
-std = ["dep:rand"]
+random = ["dep:rand"]
 
 serialize = ["dep:serde"]
 
diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index ad2b4e988..725976b44 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -1,6 +1,6 @@
 //! The implementation of XXH32.
 
-use core::{fmt, hash, mem};
+use core::{fmt, hash::{self, BuildHasher}, mem};
 
 use crate::{IntoU32, IntoU64};
 
@@ -356,9 +356,32 @@ const fn round(mut acc: u32, lane: u32) -> u32 {
     acc.wrapping_mul(PRIME32_1)
 }
 
+
+/// Constructs [`Hasher`][] for multiple hasher instances. See
+/// the [usage warning][Hasher#caution].
+pub struct State(u32);
+
+impl State {
+    /// Constructs the hasher with an initial seed.
+    pub fn with_seed(seed: u32) -> Self {
+        Self(seed)
+    }
+}
+
+impl BuildHasher for State {
+    type Hasher = Hasher;
+
+    fn build_hasher(&self) -> Self::Hasher {
+        Hasher::with_seed(self.0)
+    }
+}
+
+
+
 #[cfg(test)]
 mod test {
-    use core::{array, hash::Hasher as _};
+    use core::{array, hash::{BuildHasherDefault, Hasher as _}};
+    use std::collections::HashMap;
 
     use super::*;
 
@@ -460,36 +483,22 @@ mod test {
         // compared against the C implementation
         assert_eq!(hasher.finish(), 0x1522_4ca7);
     }
-}
-
-#[cfg(feature = "std")]
-mod std_impl {
-    use core::hash::BuildHasher;
-
-    use super::*;
 
-    /// Constructs [`Hasher`][] for multiple hasher instances. See
-    /// the [usage warning][Hasher#caution].
-    pub struct State(u32);
-
-    impl State {
-        /// Constructs the hasher with an initial seed.
-        pub fn with_seed(seed: u32) -> Self {
-            Self(seed)
-        }
+    #[test]
+    fn can_be_used_in_a_hashmap_with_a_default_seed() {
+        let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
+        hash.insert(42, "the answer");
+        assert_eq!(hash.get(&42), Some(&"the answer"));
     }
+}
 
-    impl BuildHasher for State {
-        type Hasher = Hasher;
-
-        fn build_hasher(&self) -> Self::Hasher {
-            Hasher::with_seed(self.0)
-        }
-    }
+#[cfg(feature = "random")]
+mod random_impl {
+    use super::*;
 
     /// Constructs a randomized seed and reuses it for multiple hasher
     /// instances. See the [usage warning][Hasher#caution].
-    pub struct RandomState(u32);
+    pub struct RandomState(State);
 
     impl Default for RandomState {
         fn default() -> Self {
@@ -499,7 +508,7 @@ mod std_impl {
 
     impl RandomState {
         fn new() -> Self {
-            Self(rand::random())
+            Self(State::with_seed(rand::random()))
         }
     }
 
@@ -507,24 +516,16 @@ mod std_impl {
         type Hasher = Hasher;
 
         fn build_hasher(&self) -> Self::Hasher {
-            Hasher::with_seed(self.0)
+            self.0.build_hasher()
         }
     }
 
     #[cfg(test)]
     mod test {
-        use core::hash::BuildHasherDefault;
         use std::collections::HashMap;
 
         use super::*;
 
-        #[test]
-        fn can_be_used_in_a_hashmap_with_a_default_seed() {
-            let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
-            hash.insert(42, "the answer");
-            assert_eq!(hash.get(&42), Some(&"the answer"));
-        }
-
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
             let mut hash: HashMap<_, _, RandomState> = Default::default();
@@ -534,8 +535,8 @@ mod std_impl {
     }
 }
 
-#[cfg(feature = "std")]
-pub use std_impl::*;
+#[cfg(feature = "random")]
+pub use random_impl::*;
 
 #[cfg(feature = "serialize")]
 mod serialize_impl {
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 2deb0d997..086a21d4e 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -1,6 +1,6 @@
 //! The implementation of XXH64.
 
-use core::{fmt, hash, mem};
+use core::{fmt, hash::{self, BuildHasher}, mem};
 
 use crate::IntoU64;
 
@@ -368,9 +368,28 @@ const fn round(mut acc: u64, lane: u64) -> u64 {
     acc.wrapping_mul(PRIME64_1)
 }
 
+/// Constructs [`Hasher`][] for multiple hasher instances.
+pub struct State(u64);
+
+impl State {
+    /// Constructs the hasher with an initial seed.
+    pub fn with_seed(seed: u64) -> Self {
+        Self(seed)
+    }
+}
+
+impl BuildHasher for State {
+    type Hasher = Hasher;
+
+    fn build_hasher(&self) -> Self::Hasher {
+        Hasher::with_seed(self.0)
+    }
+}
+
 #[cfg(test)]
 mod test {
-    use core::{array, hash::Hasher as _};
+    use core::{array, hash::{BuildHasherDefault, Hasher as _}};
+    use std::collections::HashMap;
 
     use super::*;
 
@@ -450,35 +469,22 @@ mod test {
             .all(|h| h == expected);
         assert!(the_same);
     }
-}
-
-#[cfg(feature = "std")]
-mod std_impl {
-    use core::hash::BuildHasher;
-
-    use super::*;
-
-    /// Constructs [`Hasher`][] for multiple hasher instances.
-    pub struct State(u64);
 
-    impl State {
-        /// Constructs the hasher with an initial seed.
-        pub fn with_seed(seed: u64) -> Self {
-            Self(seed)
-        }
+    #[test]
+    fn can_be_used_in_a_hashmap_with_a_default_seed() {
+        let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
+        hash.insert(42, "the answer");
+        assert_eq!(hash.get(&42), Some(&"the answer"));
     }
+}
 
-    impl BuildHasher for State {
-        type Hasher = Hasher;
-
-        fn build_hasher(&self) -> Self::Hasher {
-            Hasher::with_seed(self.0)
-        }
-    }
+#[cfg(feature = "random")]
+mod random_impl {
+    use super::*;
 
     /// Constructs a randomized seed and reuses it for multiple hasher
     /// instances.
-    pub struct RandomState(u64);
+    pub struct RandomState(State);
 
     impl Default for RandomState {
         fn default() -> Self {
@@ -488,7 +494,7 @@ mod std_impl {
 
     impl RandomState {
         fn new() -> Self {
-            Self(rand::random())
+            Self(State::with_seed(rand::random()))
         }
     }
 
@@ -496,24 +502,16 @@ mod std_impl {
         type Hasher = Hasher;
 
         fn build_hasher(&self) -> Self::Hasher {
-            Hasher::with_seed(self.0)
+            self.0.build_hasher()
         }
     }
 
     #[cfg(test)]
     mod test {
-        use core::hash::BuildHasherDefault;
         use std::collections::HashMap;
 
         use super::*;
 
-        #[test]
-        fn can_be_used_in_a_hashmap_with_a_default_seed() {
-            let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
-            hash.insert(42, "the answer");
-            assert_eq!(hash.get(&42), Some(&"the answer"));
-        }
-
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
             let mut hash: HashMap<_, _, RandomState> = Default::default();
@@ -523,8 +521,8 @@ mod std_impl {
     }
 }
 
-#[cfg(feature = "std")]
-pub use std_impl::*;
+#[cfg(feature = "random")]
+pub use random_impl::*;
 
 #[cfg(feature = "serialize")]
 mod serialize_impl {

From 16a5f73ea3eef9379e9a28fe61d042b3c301806f Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 4 Jul 2024 11:50:46 -0400
Subject: [PATCH 043/166] junk

---
 renu-sum/src/main.rs |  2 +-
 src/xxhash32.rs      | 14 +++++++++-----
 src/xxhash64.rs      | 11 +++++++++--
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
index 172b164c3..7a1055778 100644
--- a/renu-sum/src/main.rs
+++ b/renu-sum/src/main.rs
@@ -1,8 +1,8 @@
 use std::{
     env,
     fs::File,
-    io::Read,
     hash::Hasher as _,
+    io::Read,
     path::{Path, PathBuf},
     sync::mpsc::{self, SendError},
     thread,
diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 725976b44..d5f8a272e 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -1,6 +1,10 @@
 //! The implementation of XXH32.
 
-use core::{fmt, hash::{self, BuildHasher}, mem};
+use core::{
+    fmt,
+    hash::{self, BuildHasher},
+    mem,
+};
 
 use crate::{IntoU32, IntoU64};
 
@@ -356,7 +360,6 @@ const fn round(mut acc: u32, lane: u32) -> u32 {
     acc.wrapping_mul(PRIME32_1)
 }
 
-
 /// Constructs [`Hasher`][] for multiple hasher instances. See
 /// the [usage warning][Hasher#caution].
 pub struct State(u32);
@@ -376,11 +379,12 @@ impl BuildHasher for State {
     }
 }
 
-
-
 #[cfg(test)]
 mod test {
-    use core::{array, hash::{BuildHasherDefault, Hasher as _}};
+    use core::{
+        array,
+        hash::{BuildHasherDefault, Hasher as _},
+    };
     use std::collections::HashMap;
 
     use super::*;
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 086a21d4e..fcfa55642 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -1,6 +1,10 @@
 //! The implementation of XXH64.
 
-use core::{fmt, hash::{self, BuildHasher}, mem};
+use core::{
+    fmt,
+    hash::{self, BuildHasher},
+    mem,
+};
 
 use crate::IntoU64;
 
@@ -388,7 +392,10 @@ impl BuildHasher for State {
 
 #[cfg(test)]
 mod test {
-    use core::{array, hash::{BuildHasherDefault, Hasher as _}};
+    use core::{
+        array,
+        hash::{BuildHasherDefault, Hasher as _},
+    };
     use std::collections::HashMap;
 
     use super::*;

From 7cf082695ce69c44690eb96d0f2ee19fe90d1c26 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 4 Jul 2024 11:50:59 -0400
Subject: [PATCH 044/166] xxh3

---
 src/lib.rs             |  12 ++
 src/xxhash3_64.rs      | 383 +++++++++++++++++++++++++++++++++++++++++
 xx_hash-sys/src/lib.rs |  58 +++++++
 3 files changed, 453 insertions(+)
 create mode 100644 src/xxhash3_64.rs

diff --git a/src/lib.rs b/src/lib.rs
index 3d6713a41..130976999 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -83,6 +83,8 @@ pub mod xxhash64;
 #[cfg(feature = "xxhash64")]
 pub use xxhash64::Hasher as XxHash64;
 
+pub mod xxhash3_64;
+
 trait IntoU32 {
     fn into_u32(self) -> u32;
 }
@@ -115,3 +117,13 @@ impl IntoU64 for usize {
         self as u64
     }
 }
+
+trait IntoU128 {
+    fn into_u128(self) -> u128;
+}
+
+impl IntoU128 for u64 {
+    fn into_u128(self) -> u128 {
+        u128::from(self)
+    }
+}
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
new file mode 100644
index 000000000..26244fedd
--- /dev/null
+++ b/src/xxhash3_64.rs
@@ -0,0 +1,383 @@
+#![allow(missing_docs, dead_code, non_snake_case)]
+
+use core::{mem, slice};
+
+use crate::{IntoU128, IntoU32, IntoU64};
+
+const PRIME32_1: u64 = 0x9E3779B1;
+const PRIME32_2: u64 = 0x85EBCA77;
+const PRIME32_3: u64 = 0xC2B2AE3D;
+const PRIME64_1: u64 = 0x9E3779B185EBCA87;
+const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
+const PRIME64_3: u64 = 0x165667B19E3779F9;
+const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
+const PRIME64_5: u64 = 0x27D4EB2F165667C5;
+const PRIME_MX1: u64 = 0x165667919E3779F9;
+const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
+
+const DEFAULT_SECRET: [u8; 192] = [
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+];
+
+pub struct XxHash3_64;
+
+impl XxHash3_64 {
+    #[inline]
+    pub fn oneshot(input: &[u8]) -> u64 {
+        let seed = 0;
+        let secret = DEFAULT_SECRET;
+
+        match input.len() {
+            0 => {
+                let secret_words =
+                    unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() };
+                avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
+            }
+
+            1..=3 => {
+                let input_length = input.len() as u8; // OK as we checked that the length fits
+
+                let combined = input[input.len() - 1].into_u32()
+                    | input_length.into_u32() << 8
+                    | input[0].into_u32() << 16
+                    | input[input.len() >> 1].into_u32() << 24;
+
+                let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() };
+                let value =
+                    ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64();
+
+                // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
+                avalanche_xxh64(value)
+            }
+
+            4..=8 => {
+                let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
+                let input_last = unsafe {
+                    input
+                        .as_ptr()
+                        .add(input.len())
+                        .sub(mem::size_of::<u32>())
+                        .cast::<u32>()
+                        .read_unaligned()
+                };
+                let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
+
+                let secret_words =
+                    unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() };
+                let combined = input_last.into_u64() | (input_first.into_u64() << 32);
+
+                let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined;
+                value ^= value.rotate_left(49) ^ value.rotate_left(24);
+                value = value.wrapping_mul(PRIME_MX2);
+                value ^= (value >> 35).wrapping_add(input.len().into_u64());
+                value = value.wrapping_mul(PRIME_MX2);
+                value ^= value >> 28;
+                value
+            }
+
+            9..=16 => {
+                let inputFirst: u64 = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
+                let inputLast: u64 = unsafe {
+                    input
+                        .as_ptr()
+                        .add(input.len())
+                        .sub(mem::size_of::<u64>())
+                        .cast::<u64>()
+                        .read_unaligned()
+                };
+
+                let secretWords =
+                    unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() };
+                let low: u64 = ((secretWords[0] ^ secretWords[1]).wrapping_add(seed)) ^ inputFirst;
+                let high: u64 = ((secretWords[2] ^ secretWords[3]).wrapping_sub(seed)) ^ inputLast;
+                let mulResult: u128 = low.into_u128().wrapping_mul(high.into_u128());
+                let value: u64 = input
+                    .len()
+                    .into_u64()
+                    .wrapping_add(low.swap_bytes())
+                    .wrapping_add(high)
+                    .wrapping_add(mulResult.lower_half() ^ mulResult.upper_half());
+
+                avalanche(value)
+            }
+
+            17..=128 => {
+                let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1);
+
+                let numRounds = ((input.len() - 1) >> 5) + 1;
+
+                let mut ff = input;
+                let mut rr = input;
+
+                for i in (0..numRounds).rev() {
+                    let (ffc, ffn) = ff.split_first_chunk().unwrap();
+                    let (rrn, rrc) = rr.split_last_chunk().unwrap();
+
+                    acc = acc.wrapping_add(mix_step(ffc, &secret, i * 32, seed));
+                    acc = acc.wrapping_add(mix_step(rrc, &secret, i * 32 + 16, seed));
+
+                    ff = ffn;
+                    rr = rrn;
+                }
+
+                avalanche(acc)
+            }
+
+            129..=240 => {
+                let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1);
+
+                let (head, _tail) = input.bp_as_chunks();
+                let mut head = head.into_iter();
+
+                for (i, chunk) in head.by_ref().take(8).enumerate() {
+                    acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed));
+                }
+
+                acc = avalanche(acc);
+
+                for (i, chunk) in head.enumerate() {
+                    acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed));
+                }
+
+                acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed));
+
+                avalanche(acc)
+            }
+
+            _ => todo!(),
+        }
+    }
+}
+
+fn avalanche(mut x: u64) -> u64 {
+    x ^= x >> 37;
+    x = x.wrapping_mul(PRIME_MX1);
+    x ^= x >> 32;
+    x
+}
+
+fn avalanche_xxh64(mut x: u64) -> u64 {
+    x ^= x >> 33;
+    x = x.wrapping_mul(PRIME64_2);
+    x ^= x >> 29;
+    x = x.wrapping_mul(PRIME64_3);
+    x ^= x >> 32;
+    x
+}
+
+fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 {
+    // TODO: Should these casts / reads happen outside this function?
+    let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
+    let secret_words = unsafe {
+        secret
+            .as_ptr()
+            .add(secret_offset)
+            .cast::<[u64; 2]>()
+            .read_unaligned()
+    };
+
+    let mul_result: u128 = {
+        let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();
+        let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128();
+
+        a.wrapping_mul(b)
+    };
+
+    mul_result.lower_half() ^ mul_result.upper_half()
+}
+
+fn mixTwoChunks(
+    acc: &mut [u64; 2],
+    data1: &[u8; 16],
+    data2: &[u8; 16],
+    secret: &[u8],
+    secretOffset: usize,
+    seed: u64,
+) {
+    // TODO: Should these casts / reads happen outside this function?
+    let dataWords1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
+    let dataWords2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
+
+    acc[0] = acc[0] + mix_step(data1, secret, secretOffset, seed);
+    acc[1] = acc[1] + mix_step(data2, secret, secretOffset + 16, seed);
+    acc[0] = acc[0] ^ dataWords2[0].wrapping_add(dataWords2[1]);
+    acc[1] = acc[1] ^ dataWords1[0].wrapping_add(dataWords1[1]);
+}
+
+trait Halves {
+    type Output;
+
+    fn upper_half(self) -> Self::Output;
+    fn lower_half(self) -> Self::Output;
+}
+
+impl Halves for u64 {
+    type Output = u32;
+
+    #[inline]
+    fn upper_half(self) -> Self::Output {
+        (self >> 32) as _
+    }
+
+    #[inline]
+    fn lower_half(self) -> Self::Output {
+        self as _
+    }
+}
+
+impl Halves for u128 {
+    type Output = u64;
+
+    #[inline]
+    fn upper_half(self) -> Self::Output {
+        (self >> 64) as _
+    }
+
+    #[inline]
+    fn lower_half(self) -> Self::Output {
+        self as _
+    }
+}
+
+trait SliceBackport<T> {
+    fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
+    fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
+}
+
+impl<T> SliceBackport<T> for [T] {
+    fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
+        assert_ne!(N, 0);
+        let len = self.len() / N;
+        let (head, tail) = unsafe { self.split_at_unchecked(len) };
+        let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) };
+        (head, tail)
+    }
+
+    fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
+        assert_ne!(N, 0);
+        let len = self.len() / N;
+        let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
+        let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
+        (head, tail)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn hash_64bit_empty_matches_c_implementation() {
+        let hash = XxHash3_64::oneshot(&[]);
+        assert_eq!(hash, 0x2d06_8005_38d3_94c2);
+    }
+
+    #[test]
+    fn hash_64bit_1_to_3_bytes_matches_c_implementation() {
+        let inputs: &[&[u8]] = &[&[0; 1], &[0; 2], &[0; 3]];
+        let expected = [
+            0xc44b_dff4_074e_ecdb,
+            0x3325_230e_1f28_5505,
+            0xeb5d_658b_b22f_286b,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot(input);
+            assert_eq!(hash, expected, "input was {input:?}");
+        }
+    }
+
+    #[test]
+    fn hash_64bit_4_to_8_bytes_matches_c_implementation() {
+        let inputs: &[&[u8]] = &[&[0; 4], &[0; 5], &[0; 6], &[0; 7], &[0; 8]];
+
+        let expected = [
+            0x48b2_c926_16fc_193d,
+            0xe864_e589_3a27_3242,
+            0x06df_7381_3892_fde7,
+            0xa691_8fec_1ae6_5b70,
+            0xc77b_3abb_6f87_acd9,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot(input);
+            assert_eq!(hash, expected, "input was {input:?}");
+        }
+    }
+
+    #[test]
+    fn hash_64bit_9_to_16_bytes_matches_c_implementation() {
+        let inputs: &[&[u8]] = &[
+            &[0; 9], &[0; 10], &[0; 11], &[0; 12], &[0; 13], &[0; 14], &[0; 15], &[0; 16],
+        ];
+
+        let expected = [
+            0x3449_9569_f039_1857,
+            0x4a9f_fcfb_2837_fbcc,
+            0xae43_2800_a160_9968,
+            0xc499_8f91_69c2_a4f0,
+            0xdaef_f723_917d_5279,
+            0xf146_5eb4_188c_41e7,
+            0xba50_02d3_c3ed_6bc7,
+            0xd0a6_6a65_c752_8968,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot(input);
+            assert_eq!(hash, expected, "input was {input:?}");
+        }
+    }
+
+    #[test]
+    fn hash_64bit_17_to_128_bytes_matches_c_implementation() {
+        let inputs: &[&[u8]] = &[
+            &[0; 17], &[0; 18], &[0; 19], &[0; 126], &[0; 127], &[0; 128],
+        ];
+
+        let expected = [
+            0xc291_5ca0_df7a_d4c1,
+            0xff78_21dd_f836_d020,
+            0x8711_2824_6eb4_52b8,
+            0x3133_805e_2401_c842,
+            0x759e_ea08_c3b7_7cae,
+            0x093c_29f2_7ecf_cf21,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot(input);
+            assert_eq!(hash, expected, "input was {input:?}");
+        }
+    }
+
+    #[test]
+    fn hash_64bit_129_to_240_bytes_matches_c_implementation() {
+        let inputs: &[&[u8]] = &[
+            &[0; 129], &[0; 130], &[0; 131], &[0; 238], &[0; 239], &[0; 240],
+        ];
+
+        let expected = [
+            0x37f7_943e_b2f5_1359,
+            0x9cc8_599a_c6e3_f7c5,
+            0x9a3c_cf6f_257e_b24d,
+            0xb980_bcaf_ae82_6b6a,
+            0xf01b_b3be_cb26_4837,
+            0x053f_0744_4f70_da08,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot(input);
+            assert_eq!(hash, expected, "input was {input:?}");
+        }
+    }
+}
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 6f915cbb7..28b2298b8 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -118,3 +118,61 @@ impl Drop for XxHash64 {
         assert_eq!(retval, XXH_OK);
     }
 }
+
+// ----------
+
+// type XXH_hash_t = u64;
+
+#[repr(C)]
+pub struct XXH3_state_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+extern "C" {
+    fn XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+
+    fn XXH3_createState() -> *mut XXH3_state_t;
+    fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+    fn XXH3_64bits_update(
+        state: *mut XXH3_state_t,
+        buffer: *const libc::c_void,
+        length: libc::size_t,
+    ) -> XXH_errorcode;
+    fn XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+    fn XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
+}
+
+pub struct XxHash3_64(*mut XXH3_state_t);
+
+impl XxHash3_64 {
+    pub fn oneshot(data: &[u8]) -> u64 {
+        unsafe { XXH3_64bits(data.as_ptr().cast(), data.len()) }
+    }
+
+    pub fn with_seed() -> Self {
+        let state = unsafe {
+            let state = XXH3_createState();
+            XXH3_64bits_reset(state);
+            state
+        };
+
+        Self(state)
+    }
+
+    pub fn write(&mut self, data: &[u8]) {
+        let retval = unsafe { XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+        assert_eq!(retval, XXH_OK);
+    }
+
+    pub fn finish(&mut self) -> u64 {
+        unsafe { XXH3_64bits_digest(self.0) }
+    }
+}
+
+impl Drop for XxHash3_64 {
+    fn drop(&mut self) {
+        let retval = unsafe { XXH3_freeState(self.0) };
+        assert_eq!(retval, XXH_OK);
+    }
+}

From 67eb967bfd9083db7fc68897de2c0e139de58045 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 5 Jul 2024 11:57:20 -0400
Subject: [PATCH 045/166] xxh3

---
 src/xxhash3_64.rs | 191 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 157 insertions(+), 34 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 26244fedd..ccadcd12f 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -87,8 +87,8 @@ impl XxHash3_64 {
             }
 
             9..=16 => {
-                let inputFirst: u64 = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
-                let inputLast: u64 = unsafe {
+                let input_first: u64 = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
+                let input_last: u64 = unsafe {
                     input
                         .as_ptr()
                         .add(input.len())
@@ -97,17 +97,19 @@ impl XxHash3_64 {
                         .read_unaligned()
                 };
 
-                let secretWords =
+                let secret_words =
                     unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() };
-                let low: u64 = ((secretWords[0] ^ secretWords[1]).wrapping_add(seed)) ^ inputFirst;
-                let high: u64 = ((secretWords[2] ^ secretWords[3]).wrapping_sub(seed)) ^ inputLast;
-                let mulResult: u128 = low.into_u128().wrapping_mul(high.into_u128());
+                let low: u64 =
+                    ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
+                let high: u64 =
+                    ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
+                let mul_result: u128 = low.into_u128().wrapping_mul(high.into_u128());
                 let value: u64 = input
                     .len()
                     .into_u64()
                     .wrapping_add(low.swap_bytes())
                     .wrapping_add(high)
-                    .wrapping_add(mulResult.lower_half() ^ mulResult.upper_half());
+                    .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
 
                 avalanche(value)
             }
@@ -115,12 +117,13 @@ impl XxHash3_64 {
             17..=128 => {
                 let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1);
 
-                let numRounds = ((input.len() - 1) >> 5) + 1;
+                let num_rounds = ((input.len() - 1) >> 5) + 1;
 
+                // TODO: use some chunks
                 let mut ff = input;
                 let mut rr = input;
 
-                for i in (0..numRounds).rev() {
+                for i in (0..num_rounds).rev() {
                     let (ffc, ffn) = ff.split_first_chunk().unwrap();
                     let (rrn, rrc) = rr.split_last_chunk().unwrap();
 
@@ -135,7 +138,7 @@ impl XxHash3_64 {
             }
 
             129..=240 => {
-                let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1);
+                let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
                 let (head, _tail) = input.bp_as_chunks();
                 let mut head = head.into_iter();
@@ -155,7 +158,42 @@ impl XxHash3_64 {
                 avalanche(acc)
             }
 
-            _ => todo!(),
+            _ => {
+                #[rustfmt::skip]
+                let mut acc = [
+                    PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
+                    PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
+                ];
+
+                let secret_length = secret.len();
+                let stripes_per_block = (secret_length - 64) / 8;
+                let block_size = 64 * stripes_per_block;
+
+                let mut cc = input.chunks(block_size).fuse();
+
+                let last_block = cc.next_back().unwrap();
+
+                for block in cc {
+                    round(&mut acc, block, &secret);
+                }
+
+                let last_stripe = unsafe {
+                    &*input
+                        .as_ptr()
+                        .add(input.len())
+                        .sub(mem::size_of::<[u64; 8]>())
+                        .cast::<[u64; 8]>()
+                };
+
+                last_round(&mut acc, last_block, last_stripe, &secret);
+
+                final_merge(
+                    &mut acc,
+                    input.len().into_u64().wrapping_mul(PRIME64_1),
+                    &secret,
+                    11,
+                )
+            }
         }
     }
 }
@@ -187,7 +225,7 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) ->
             .read_unaligned()
     };
 
-    let mul_result: u128 = {
+    let mul_result = {
         let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();
         let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128();
 
@@ -197,22 +235,90 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) ->
     mul_result.lower_half() ^ mul_result.upper_half()
 }
 
-fn mixTwoChunks(
-    acc: &mut [u64; 2],
-    data1: &[u8; 16],
-    data2: &[u8; 16],
-    secret: &[u8],
-    secretOffset: usize,
-    seed: u64,
-) {
+// fn mix_two_chunks(
+//     acc: &mut [u64; 2],
+//     data1: &[u8; 16],
+//     data2: &[u8; 16],
+//     secret: &[u8],
+//     secret_offset: usize,
+//     seed: u64,
+// ) {
+//     // TODO: Should these casts / reads happen outside this function?
+//     let data_words1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
+//     let data_words2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
+
+//     acc[0] = acc[0] + mix_step(data1, secret, secret_offset, seed);
+//     acc[1] = acc[1] + mix_step(data2, secret, secret_offset + 16, seed);
+//     acc[0] = acc[0] ^ data_words2[0].wrapping_add(data_words2[1]);
+//     acc[1] = acc[1] ^ data_words1[0].wrapping_add(data_words1[1]);
+// }
+
+// Step 2-1. Process stripes in the block
+fn accumulate(acc: &mut [u64; 8], stripe: &[u64; 8], secret: &[u8], secret_offset: usize) {
     // TODO: Should these casts / reads happen outside this function?
-    let dataWords1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
-    let dataWords2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
+    let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() };
+
+    for i in 0..8 {
+        let value = stripe[i] ^ secret_words[i];
+        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]);
+        acc[i] = acc[i].wrapping_add(
+            value
+                .lower_half()
+                .into_u64()
+                .wrapping_mul(value.upper_half().into_u64()),
+        );
+    }
+}
 
-    acc[0] = acc[0] + mix_step(data1, secret, secretOffset, seed);
-    acc[1] = acc[1] + mix_step(data2, secret, secretOffset + 16, seed);
-    acc[0] = acc[0] ^ dataWords2[0].wrapping_add(dataWords2[1]);
-    acc[1] = acc[1] ^ dataWords1[0].wrapping_add(dataWords1[1]);
+fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
+    let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<[u64; 8]>() }>();
+    for (n, stripe) in stripes.iter().enumerate() {
+        let stripe = unsafe { &*stripe.as_ptr().cast() };
+        accumulate(acc, stripe, secret, n * 8);
+    }
+}
+
+fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
+    let secret_words = unsafe {
+        secret
+            .as_ptr()
+            .add(secret.len())
+            .sub(mem::size_of::<[u64; 8]>())
+            .cast::<[u64; 8]>()
+            .read_unaligned()
+    };
+
+    for i in 0..8 {
+        acc[i] ^= acc[i] >> 47;
+        acc[i] ^= secret_words[i];
+        acc[i] = acc[i] * PRIME32_1;
+    }
+}
+
+fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
+    round_accumulate(acc, block, secret);
+    round_scramble(acc, secret);
+}
+
+fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: &[u8]) {
+    let n_full_stripes: usize = (block.len() - 1) / 64;
+    for n in 0..n_full_stripes {
+        let stripe = unsafe { &*block.as_ptr().add(n * 64).cast::<[u64; 8]>() };
+        accumulate(acc, stripe, secret, n * 8);
+    }
+    accumulate(acc, last_stripe, secret, secret.len() - 71);
+}
+
+fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
+    let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() };
+    let mut result: u64 = init_value;
+    for i in 0..4 {
+        // 64-bit by 64-bit multiplication to 128-bit full result
+        let mul_result: u128 = (acc[i * 2] ^ secret_words[i * 2]).into_u128()
+            * (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
+        result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
+    }
+    avalanche(result)
 }
 
 trait Halves {
@@ -252,7 +358,7 @@ impl Halves for u128 {
 
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
-    fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
+    // fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
 }
 
 impl<T> SliceBackport<T> for [T] {
@@ -264,13 +370,13 @@ impl<T> SliceBackport<T> for [T] {
         (head, tail)
     }
 
-    fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
-        assert_ne!(N, 0);
-        let len = self.len() / N;
-        let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
-        let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
-        (head, tail)
-    }
+    // fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
+    //     assert_ne!(N, 0);
+    //     let len = self.len() / N;
+    //     let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
+    //     let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
+    //     (head, tail)
+    // }
 }
 
 #[cfg(test)]
@@ -380,4 +486,21 @@ mod test {
             assert_eq!(hash, expected, "input was {input:?}");
         }
     }
+
+    #[test]
+    fn hash_64bit_240_plus_bytes_matches_c_implementation() {
+        let inputs: &[&[u8]] = &[&[0; 241], &[0; 242], &[0; 243], &[0; 244]];
+
+        let expected = [
+            0x5c5b_5d5d_40c5_9ce3,
+            0xd619_7ac3_0eb7_e67b,
+            0x6a04_3c8a_cf2e_dfe5,
+            0x83cf_eefc_38e1_35af,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot(input);
+            assert_eq!(hash, expected, "input was {input:?}");
+        }
+    }
 }

From 56a91e71545349b82afea9c93de6d1592c066cc9 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 5 Jul 2024 13:30:19 -0400
Subject: [PATCH 046/166] xxh3

---
 compare/src/lib.rs |  99 +++++++++++++++++++++++++++++++
 src/lib.rs         |   2 +
 src/xxhash3_64.rs  | 143 +++++++++++++++++++++++++++------------------
 3 files changed, 187 insertions(+), 57 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 5be3f3f73..597a7254e 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -203,6 +203,105 @@ mod xxhash64 {
     }
 }
 
+mod xxhash3_64 {
+    use proptest::{prelude::*, test_runner::TestCaseResult};
+    use std::hash::Hasher as _;
+
+    use super::*;
+
+    proptest! {
+        // #[test]
+        // fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
+        //     oneshot_same_as_one_chunk_impl(seed, &data)?;
+        // }
+
+        // #[test]
+        // fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+        //     oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        // }
+
+        // #[test]
+        // fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
+        //     oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        // }
+
+        #[test]
+        fn oneshot(seed: u64, data: Vec<u8>) {
+            oneshot_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_impl(seed, &data[offset..])?;
+        }
+
+        // #[test]
+        // fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
+        //     streaming_one_chunk_impl(seed, &data)?;
+        // }
+
+        // #[test]
+        // fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+        //     streaming_one_chunk_impl(seed, &data[offset..])?;
+        // }
+    }
+
+    // fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+    //     let oneshot = rust::XxHash64::oneshot(seed, data);
+    //     let one_chunk = {
+    //         let mut hasher = rust::XxHash64::with_seed(seed);
+    //         hasher.write(data);
+    //         hasher.finish()
+    //     };
+
+    //     prop_assert_eq!(oneshot, one_chunk);
+    //     Ok(())
+    // }
+
+    // fn oneshot_same_as_many_chunks_impl(
+    //     seed: u64,
+    //     data: &[u8],
+    //     chunks: &[Vec<u8>],
+    // ) -> TestCaseResult {
+    //     let oneshot = rust::XxHash64::oneshot(seed, data);
+    //     let many_chunks = {
+    //         let mut hasher = rust::XxHash64::with_seed(seed);
+    //         for chunk in chunks {
+    //             hasher.write(chunk);
+    //         }
+    //         hasher.finish()
+    //     };
+
+    //     prop_assert_eq!(oneshot, many_chunks);
+    //     Ok(())
+    // }
+
+    fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash3_64::oneshot(data);
+        let rust = rust::XxHash3_64::oneshot(data);
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    // fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+    //     let native = {
+    //         let mut hasher = c::XxHash64::with_seed(seed);
+    //         hasher.write(data);
+    //         hasher.finish()
+    //     };
+
+    //     let rust = {
+    //         let mut hasher = rust::XxHash64::with_seed(seed);
+    //         hasher.write(data);
+    //         hasher.finish()
+    //     };
+
+    //     prop_assert_eq!(native, rust);
+    //     Ok(())
+    // }
+}
+
 fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
     prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| {
         let len = vec.len();
diff --git a/src/lib.rs b/src/lib.rs
index 130976999..f060aeb8d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -85,6 +85,8 @@ pub use xxhash64::Hasher as XxHash64;
 
 pub mod xxhash3_64;
 
+pub use xxhash3_64::XxHash3_64;
+
 trait IntoU32 {
     fn into_u32(self) -> u32;
 }
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index ccadcd12f..e59b799b7 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -123,7 +123,7 @@ impl XxHash3_64 {
                 let mut ff = input;
                 let mut rr = input;
 
-                for i in (0..num_rounds).rev() {
+                for i in 0..num_rounds {
                     let (ffc, ffn) = ff.split_first_chunk().unwrap();
                     let (rrn, rrc) = rr.split_last_chunk().unwrap();
 
@@ -141,7 +141,7 @@ impl XxHash3_64 {
                 let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
                 let (head, _tail) = input.bp_as_chunks();
-                let mut head = head.into_iter();
+                let mut head = head.iter();
 
                 for (i, chunk) in head.by_ref().take(8).enumerate() {
                     acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed));
@@ -291,7 +291,7 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
     for i in 0..8 {
         acc[i] ^= acc[i] >> 47;
         acc[i] ^= secret_words[i];
-        acc[i] = acc[i] * PRIME32_1;
+        acc[i] = acc[i].wrapping_mul(PRIME32_1);
     }
 }
 
@@ -314,8 +314,11 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset
     let mut result: u64 = init_value;
     for i in 0..4 {
         // 64-bit by 64-bit multiplication to 128-bit full result
-        let mul_result: u128 = (acc[i * 2] ^ secret_words[i * 2]).into_u128()
-            * (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
+        let mul_result: u128 = {
+            let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128();
+            let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
+            a.wrapping_mul(b)
+        };
         result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
     }
     avalanche(result)
@@ -381,21 +384,35 @@ impl<T> SliceBackport<T> for [T] {
 
 #[cfg(test)]
 mod test {
+    use std::array;
+
     use super::*;
 
+    macro_rules! bytes {
+        ($($n: literal),* $(,)?) => {
+            &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]]
+        };
+    }
+
+    fn gen_bytes<const N: usize>() -> [u8; N] {
+        // Picking 251 as it's a prime number, which will hopefully
+        // help avoid incidental power-of-two alignment.
+        array::from_fn(|i| (i % 251) as u8)
+    }
+
     #[test]
-    fn hash_64bit_empty_matches_c_implementation() {
+    fn hash_empty() {
         let hash = XxHash3_64::oneshot(&[]);
         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
     }
 
     #[test]
-    fn hash_64bit_1_to_3_bytes_matches_c_implementation() {
-        let inputs: &[&[u8]] = &[&[0; 1], &[0; 2], &[0; 3]];
+    fn hash_1_to_3_bytes() {
+        let inputs = bytes![1, 2, 3];
         let expected = [
             0xc44b_dff4_074e_ecdb,
-            0x3325_230e_1f28_5505,
-            0xeb5d_658b_b22f_286b,
+            0xd664_5fc3_051a_9457,
+            0x5f42_99fc_161c_9cbb,
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
@@ -405,15 +422,15 @@ mod test {
     }
 
     #[test]
-    fn hash_64bit_4_to_8_bytes_matches_c_implementation() {
-        let inputs: &[&[u8]] = &[&[0; 4], &[0; 5], &[0; 6], &[0; 7], &[0; 8]];
+    fn hash_4_to_8_bytes() {
+        let inputs = bytes![4, 5, 6, 7, 8];
 
         let expected = [
-            0x48b2_c926_16fc_193d,
-            0xe864_e589_3a27_3242,
-            0x06df_7381_3892_fde7,
-            0xa691_8fec_1ae6_5b70,
-            0xc77b_3abb_6f87_acd9,
+            0x60da_b036_a582_11f2,
+            0xb075_753a_84ca_0fbe,
+            0xa658_4d1d_9a6a_e704,
+            0x0cd2_084a_6240_6b69,
+            0x3a1c_2d7c_85af_88f8,
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
@@ -423,20 +440,18 @@ mod test {
     }
 
     #[test]
-    fn hash_64bit_9_to_16_bytes_matches_c_implementation() {
-        let inputs: &[&[u8]] = &[
-            &[0; 9], &[0; 10], &[0; 11], &[0; 12], &[0; 13], &[0; 14], &[0; 15], &[0; 16],
-        ];
+    fn hash_9_to_16_bytes() {
+        let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
 
         let expected = [
-            0x3449_9569_f039_1857,
-            0x4a9f_fcfb_2837_fbcc,
-            0xae43_2800_a160_9968,
-            0xc499_8f91_69c2_a4f0,
-            0xdaef_f723_917d_5279,
-            0xf146_5eb4_188c_41e7,
-            0xba50_02d3_c3ed_6bc7,
-            0xd0a6_6a65_c752_8968,
+            0xe961_2598_145b_b9dc,
+            0xab69_a08e_f83d_8f77,
+            0x1cf3_96aa_4de6_198d,
+            0x5ace_6a51_1c10_894b,
+            0xb7a5_d8a8_309a_2cb9,
+            0x4cf4_5c94_4a9a_2237,
+            0x55ec_edc2_b87b_b042,
+            0x8355_e3a6_f617_70db,
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
@@ -446,56 +461,70 @@ mod test {
     }
 
     #[test]
-    fn hash_64bit_17_to_128_bytes_matches_c_implementation() {
-        let inputs: &[&[u8]] = &[
-            &[0; 17], &[0; 18], &[0; 19], &[0; 126], &[0; 127], &[0; 128],
-        ];
+    fn hash_17_to_128_bytes() {
+        let lower_boundary = bytes![17, 18, 19];
+        let chunk_boundary = bytes![31, 32, 33];
+        let upper_boundary = bytes![126, 127, 128];
+
+        let inputs = lower_boundary
+            .iter()
+            .chain(chunk_boundary)
+            .chain(upper_boundary);
 
         let expected = [
-            0xc291_5ca0_df7a_d4c1,
-            0xff78_21dd_f836_d020,
-            0x8711_2824_6eb4_52b8,
-            0x3133_805e_2401_c842,
-            0x759e_ea08_c3b7_7cae,
-            0x093c_29f2_7ecf_cf21,
+            // lower_boundary
+            0x9ef3_41a9_9de3_7328,
+            0xf691_2490_d4c0_eed5,
+            0x60e7_2614_3cf5_0312,
+            // chunk_boundary
+            0x4f36_db8e_4df3_78fd,
+            0x3523_581f_e96e_4c05,
+            0xe68c_56ba_8899_1e58,
+            // upper_boundary
+            0x6c2a_9eb7_459c_dc61,
+            0x120b_9787_f842_5f2f,
+            0x85c6_174c_7ff4_c46b,
         ];
 
-        for (input, expected) in inputs.iter().zip(expected) {
+        for (input, expected) in inputs.zip(expected) {
             let hash = XxHash3_64::oneshot(input);
             assert_eq!(hash, expected, "input was {input:?}");
         }
     }
 
     #[test]
-    fn hash_64bit_129_to_240_bytes_matches_c_implementation() {
-        let inputs: &[&[u8]] = &[
-            &[0; 129], &[0; 130], &[0; 131], &[0; 238], &[0; 239], &[0; 240],
-        ];
+    fn hash_129_to_240_bytes() {
+        let lower_boundary = bytes![129, 130, 131];
+        let upper_boundary = bytes![238, 239, 240];
+
+        let inputs = lower_boundary.iter().chain(upper_boundary);
 
         let expected = [
-            0x37f7_943e_b2f5_1359,
-            0x9cc8_599a_c6e3_f7c5,
-            0x9a3c_cf6f_257e_b24d,
-            0xb980_bcaf_ae82_6b6a,
-            0xf01b_b3be_cb26_4837,
-            0x053f_0744_4f70_da08,
+            // lower_boundary
+            0xec76_42b4_31ba_3e5a,
+            0x4d32_24b1_0090_8a87,
+            0xe57f_7ea6_741f_e3a0,
+            // upper_boundary
+            0x3044_9a0b_4899_dee9,
+            0x972b_14e3_c46f_214b,
+            0x375a_384d_957f_e865,
         ];
 
-        for (input, expected) in inputs.iter().zip(expected) {
+        for (input, expected) in inputs.zip(expected) {
             let hash = XxHash3_64::oneshot(input);
             assert_eq!(hash, expected, "input was {input:?}");
         }
     }
 
     #[test]
-    fn hash_64bit_240_plus_bytes_matches_c_implementation() {
-        let inputs: &[&[u8]] = &[&[0; 241], &[0; 242], &[0; 243], &[0; 244]];
+    fn hash_240_plus_bytes() {
+        let inputs = bytes![241, 242, 243, 244];
 
         let expected = [
-            0x5c5b_5d5d_40c5_9ce3,
-            0xd619_7ac3_0eb7_e67b,
-            0x6a04_3c8a_cf2e_dfe5,
-            0x83cf_eefc_38e1_35af,
+            0x02e8_cd95_421c_6d02,
+            0xddcb_33c4_9405_1832,
+            0x8835_f952_9193_e3dc,
+            0xbc17_c91e_c3cf_8d7f,
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {

From 4c8b99e5e5dd33d020f99da5a275854c1be82bb2 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 5 Jul 2024 13:33:01 -0400
Subject: [PATCH 047/166] xxh3

---
 src/xxhash3_64.rs | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index e59b799b7..52be0f04c 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -87,8 +87,8 @@ impl XxHash3_64 {
             }
 
             9..=16 => {
-                let input_first: u64 = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
-                let input_last: u64 = unsafe {
+                let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
+                let input_last = unsafe {
                     input
                         .as_ptr()
                         .add(input.len())
@@ -99,12 +99,10 @@ impl XxHash3_64 {
 
                 let secret_words =
                     unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() };
-                let low: u64 =
-                    ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
-                let high: u64 =
-                    ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
-                let mul_result: u128 = low.into_u128().wrapping_mul(high.into_u128());
-                let value: u64 = input
+                let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
+                let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
+                let mul_result = low.into_u128().wrapping_mul(high.into_u128());
+                let value = input
                     .len()
                     .into_u64()
                     .wrapping_add(low.swap_bytes())
@@ -115,7 +113,7 @@ impl XxHash3_64 {
             }
 
             17..=128 => {
-                let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1);
+                let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
                 let num_rounds = ((input.len() - 1) >> 5) + 1;
 
@@ -301,7 +299,7 @@ fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
 }
 
 fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: &[u8]) {
-    let n_full_stripes: usize = (block.len() - 1) / 64;
+    let n_full_stripes = (block.len() - 1) / 64;
     for n in 0..n_full_stripes {
         let stripe = unsafe { &*block.as_ptr().add(n * 64).cast::<[u64; 8]>() };
         accumulate(acc, stripe, secret, n * 8);
@@ -311,10 +309,10 @@ fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret:
 
 fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
     let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() };
-    let mut result: u64 = init_value;
+    let mut result = init_value;
     for i in 0..4 {
         // 64-bit by 64-bit multiplication to 128-bit full result
-        let mul_result: u128 = {
+        let mul_result = {
             let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128();
             let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
             a.wrapping_mul(b)

From 48683cacbd057b7d2179c22222edc95c4dc6684d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 5 Jul 2024 16:34:57 -0400
Subject: [PATCH 048/166] chunks

---
 src/xxhash3_64.rs | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 52be0f04c..642140911 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -366,7 +366,7 @@ impl<T> SliceBackport<T> for [T] {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
         assert_ne!(N, 0);
         let len = self.len() / N;
-        let (head, tail) = unsafe { self.split_at_unchecked(len) };
+        let (head, tail) = unsafe { self.split_at_unchecked(len * N) };
         let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) };
         (head, tail)
     }
@@ -530,4 +530,33 @@ mod test {
             assert_eq!(hash, expected, "input was {input:?}");
         }
     }
+
+    #[test]
+    fn backported_as_chunks() {
+        let x = [1, 2, 3, 4, 5];
+
+        let (a, b) = x.bp_as_chunks::<1>();
+        assert_eq!(a, &[[1], [2], [3], [4], [5]]);
+        assert_eq!(b, &[]);
+
+        let (a, b) = x.bp_as_chunks::<2>();
+        assert_eq!(a, &[[1, 2], [3, 4]]);
+        assert_eq!(b, &[5]);
+
+        let (a, b) = x.bp_as_chunks::<3>();
+        assert_eq!(a, &[[1, 2, 3]]);
+        assert_eq!(b, &[4, 5]);
+
+        let (a, b) = x.bp_as_chunks::<4>();
+        assert_eq!(a, &[[1, 2, 3, 4]]);
+        assert_eq!(b, &[5]);
+
+        let (a, b) = x.bp_as_chunks::<5>();
+        assert_eq!(a, &[[1, 2, 3, 4, 5]]);
+        assert_eq!(b, &[]);
+
+        let (a, b) = x.bp_as_chunks::<6>();
+        assert_eq!(a, &[] as &[[i32; 6]]);
+        assert_eq!(b, &[1, 2, 3, 4, 5]);
+    }
 }

From 2934e73d6ca9c9ddec0c9876df249804e3962a9f Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 13:32:58 -0400
Subject: [PATCH 049/166] moar

---
 src/xxhash3_64.rs | 107 ++++++++++++++++++++++++++++++----------------
 1 file changed, 71 insertions(+), 36 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 642140911..9564bd4c6 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -32,8 +32,10 @@ const DEFAULT_SECRET: [u8; 192] = [
 
 pub struct XxHash3_64;
 
+type Stripe = [u64; 8];
+
 impl XxHash3_64 {
-    #[inline]
+    #[inline(never)]
     pub fn oneshot(input: &[u8]) -> u64 {
         let seed = 0;
         let secret = DEFAULT_SECRET;
@@ -117,19 +119,15 @@ impl XxHash3_64 {
 
                 let num_rounds = ((input.len() - 1) >> 5) + 1;
 
-                // TODO: use some chunks
-                let mut ff = input;
-                let mut rr = input;
-
-                for i in 0..num_rounds {
-                    let (ffc, ffn) = ff.split_first_chunk().unwrap();
-                    let (rrn, rrc) = rr.split_last_chunk().unwrap();
+                let (fwd, _) = input.bp_as_chunks();
+                let (_, bwd) = input.bp_as_rchunks();
 
-                    acc = acc.wrapping_add(mix_step(ffc, &secret, i * 32, seed));
-                    acc = acc.wrapping_add(mix_step(rrc, &secret, i * 32 + 16, seed));
+                let fwd = fwd.iter();
+                let bwd = bwd.iter().rev();
 
-                    ff = ffn;
-                    rr = rrn;
+                for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) {
+                    acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed));
+                    acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed));
                 }
 
                 avalanche(acc)
@@ -163,24 +161,23 @@ impl XxHash3_64 {
                     PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
                 ];
 
-                let secret_length = secret.len();
-                let stripes_per_block = (secret_length - 64) / 8;
+                let stripes_per_block = (secret.len() - 64) / 8;
                 let block_size = 64 * stripes_per_block;
 
-                let mut cc = input.chunks(block_size).fuse();
+                let mut blocks = input.chunks(block_size).fuse();
+                let last_block = blocks.next_back().unwrap();
 
-                let last_block = cc.next_back().unwrap();
-
-                for block in cc {
+                for block in blocks {
                     round(&mut acc, block, &secret);
                 }
 
                 let last_stripe = unsafe {
-                    &*input
+                    input
                         .as_ptr()
                         .add(input.len())
-                        .sub(mem::size_of::<[u64; 8]>())
-                        .cast::<[u64; 8]>()
+                        .sub(mem::size_of::<Stripe>())
+                        .cast::<Stripe>()
+                        .read_unaligned()
                 };
 
                 last_round(&mut acc, last_block, last_stripe, &secret);
@@ -212,6 +209,7 @@ fn avalanche_xxh64(mut x: u64) -> u64 {
     x
 }
 
+#[inline]
 fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 {
     // TODO: Should these casts / reads happen outside this function?
     let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
@@ -252,9 +250,11 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) ->
 // }
 
 // Step 2-1. Process stripes in the block
-fn accumulate(acc: &mut [u64; 8], stripe: &[u64; 8], secret: &[u8], secret_offset: usize) {
+#[inline]
+fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) {
     // TODO: Should these casts / reads happen outside this function?
-    let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() };
+    let secret = &secret[secret_offset..];
+    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() };
 
     for i in 0..8 {
         let value = stripe[i] ^ secret_words[i];
@@ -268,14 +268,16 @@ fn accumulate(acc: &mut [u64; 8], stripe: &[u64; 8], secret: &[u8], secret_offse
     }
 }
 
+#[inline]
 fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
-    let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<[u64; 8]>() }>();
+    let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<Stripe>() }>();
     for (n, stripe) in stripes.iter().enumerate() {
-        let stripe = unsafe { &*stripe.as_ptr().cast() };
+        let stripe = unsafe { stripe.as_ptr().cast::<Stripe>().read_unaligned() };
         accumulate(acc, stripe, secret, n * 8);
     }
 }
 
+#[inline]
 fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
     let secret_words = unsafe {
         secret
@@ -293,22 +295,25 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
     }
 }
 
+#[inline]
 fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
     round_accumulate(acc, block, secret);
     round_scramble(acc, secret);
 }
 
-fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: &[u8]) {
+#[inline]
+fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u8]) {
     let n_full_stripes = (block.len() - 1) / 64;
     for n in 0..n_full_stripes {
-        let stripe = unsafe { &*block.as_ptr().add(n * 64).cast::<[u64; 8]>() };
+        let stripe = unsafe { block.as_ptr().add(n * 64).cast::<Stripe>().read_unaligned() };
         accumulate(acc, stripe, secret, n * 8);
     }
     accumulate(acc, last_stripe, secret, secret.len() - 71);
 }
 
+#[inline]
 fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
-    let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() };
+    let secret_words = unsafe { secret.as_ptr().add(secret_offset).cast::<[u64; 8]>().read_unaligned() };
     let mut result = init_value;
     for i in 0..4 {
         // 64-bit by 64-bit multiplication to 128-bit full result
@@ -359,7 +364,7 @@ impl Halves for u128 {
 
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
-    // fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
+    fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
 }
 
 impl<T> SliceBackport<T> for [T] {
@@ -371,13 +376,13 @@ impl<T> SliceBackport<T> for [T] {
         (head, tail)
     }
 
-    // fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
-    //     assert_ne!(N, 0);
-    //     let len = self.len() / N;
-    //     let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
-    //     let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
-    //     (head, tail)
-    // }
+    fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
+        assert_ne!(N, 0);
+        let len = self.len() / N;
+        let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
+        let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
+        (head, tail)
+    }
 }
 
 #[cfg(test)]
@@ -407,6 +412,7 @@ mod test {
     #[test]
     fn hash_1_to_3_bytes() {
         let inputs = bytes![1, 2, 3];
+
         let expected = [
             0xc44b_dff4_074e_ecdb,
             0xd664_5fc3_051a_9457,
@@ -559,4 +565,33 @@ mod test {
         assert_eq!(a, &[] as &[[i32; 6]]);
         assert_eq!(b, &[1, 2, 3, 4, 5]);
     }
+
+    #[test]
+    fn backported_as_rchunks() {
+        let x = [1, 2, 3, 4, 5];
+
+        let (a, b) = x.bp_as_rchunks::<1>();
+        assert_eq!(a, &[]);
+        assert_eq!(b, &[[1], [2], [3], [4], [5]]);
+
+        let (a, b) = x.bp_as_rchunks::<2>();
+        assert_eq!(a, &[1]);
+        assert_eq!(b, &[[2, 3], [4, 5]]);
+
+        let (a, b) = x.bp_as_rchunks::<3>();
+        assert_eq!(a, &[1, 2]);
+        assert_eq!(b, &[[3, 4, 5]]);
+
+        let (a, b) = x.bp_as_rchunks::<4>();
+        assert_eq!(a, &[1]);
+        assert_eq!(b, &[[2, 3, 4, 5]]);
+
+        let (a, b) = x.bp_as_rchunks::<5>();
+        assert_eq!(a, &[]);
+        assert_eq!(b, &[[1, 2, 3, 4, 5]]);
+
+        let (a, b) = x.bp_as_rchunks::<6>();
+        assert_eq!(a, &[1, 2, 3, 4, 5]);
+        assert_eq!(b, &[] as &[[i32; 6]]);
+    }
 }

From 7b233ea7694b774448cb1b0d579655bfdde18d23 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 14:24:56 -0400
Subject: [PATCH 050/166] fmt

---
 src/xxhash3_64.rs | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 9564bd4c6..0c77f7f11 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -259,12 +259,11 @@ fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset:
     for i in 0..8 {
         let value = stripe[i] ^ secret_words[i];
         acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]);
-        acc[i] = acc[i].wrapping_add(
-            value
-                .lower_half()
-                .into_u64()
-                .wrapping_mul(value.upper_half().into_u64()),
-        );
+        acc[i] = acc[i].wrapping_add({
+            let a = value.lower_half().into_u64();
+            let b = value.upper_half().into_u64();
+            a.wrapping_mul(b)
+        });
     }
 }
 

From 4116f6b901a207ac6b05c8f356977e5544692371 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 14:55:34 -0400
Subject: [PATCH 051/166] recover

---
 src/xxhash3_64.rs | 55 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 0c77f7f11..340f6bb76 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -268,11 +268,34 @@ fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset:
 }
 
 #[inline]
-fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
+fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+    for i in 0..8 {
+        // TODO: Should these casts / reads happen outside this function?
+        let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+        let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+
+        let value = stripe ^ secret;
+        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+        acc[i] = acc[i].wrapping_add({
+            let a = value.lower_half().into_u64();
+            let b = value.upper_half().into_u64();
+            a.wrapping_mul(b)
+        });
+    }
+}
+
+#[inline]
+fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) {
     let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<Stripe>() }>();
-    for (n, stripe) in stripes.iter().enumerate() {
-        let stripe = unsafe { stripe.as_ptr().cast::<Stripe>().read_unaligned() };
-        accumulate(acc, stripe, secret, n * 8);
+
+    let secrets = iter::from_fn(|| {
+        let (c, _) = secret.split_first_chunk()?;
+        secret = &secret[8..];
+        Some(c)
+    });
+
+    for (stripe, secret) in stripes.iter().zip(secrets) {
+        accumulate_hot(acc, stripe, secret);
     }
 }
 
@@ -312,7 +335,13 @@ fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u
 
 #[inline]
 fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
-    let secret_words = unsafe { secret.as_ptr().add(secret_offset).cast::<[u64; 8]>().read_unaligned() };
+    let secret_words = unsafe {
+        secret
+            .as_ptr()
+            .add(secret_offset)
+            .cast::<[u64; 8]>()
+            .read_unaligned()
+    };
     let mut result = init_value;
     for i in 0..4 {
         // 64-bit by 64-bit multiplication to 128-bit full result
@@ -420,7 +449,7 @@ mod test {
 
         for (input, expected) in inputs.iter().zip(expected) {
             let hash = XxHash3_64::oneshot(input);
-            assert_eq!(hash, expected, "input was {input:?}");
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
@@ -438,7 +467,7 @@ mod test {
 
         for (input, expected) in inputs.iter().zip(expected) {
             let hash = XxHash3_64::oneshot(input);
-            assert_eq!(hash, expected, "input was {input:?}");
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
@@ -459,7 +488,7 @@ mod test {
 
         for (input, expected) in inputs.iter().zip(expected) {
             let hash = XxHash3_64::oneshot(input);
-            assert_eq!(hash, expected, "input was {input:?}");
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
@@ -491,7 +520,7 @@ mod test {
 
         for (input, expected) in inputs.zip(expected) {
             let hash = XxHash3_64::oneshot(input);
-            assert_eq!(hash, expected, "input was {input:?}");
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
@@ -515,24 +544,26 @@ mod test {
 
         for (input, expected) in inputs.zip(expected) {
             let hash = XxHash3_64::oneshot(input);
-            assert_eq!(hash, expected, "input was {input:?}");
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
     fn hash_240_plus_bytes() {
-        let inputs = bytes![241, 242, 243, 244];
+        let inputs = bytes![241, 242, 243, 244, 1024, 10240];
 
         let expected = [
             0x02e8_cd95_421c_6d02,
             0xddcb_33c4_9405_1832,
             0x8835_f952_9193_e3dc,
             0xbc17_c91e_c3cf_8d7f,
+            0xe5d7_8baf_a45b_2aa5,
+            0xbcd6_3266_df6e_2244,
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
             let hash = XxHash3_64::oneshot(input);
-            assert_eq!(hash, expected, "input was {input:?}");
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 

From 03683aaafcf44f7eb80864601fef6fba3b95ac9a Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 15:07:53 -0400
Subject: [PATCH 052/166] little helper

---
 Cargo.toml         |  1 +
 asmasm/Cargo.toml  |  7 +++++++
 asmasm/src/main.rs | 12 ++++++++++++
 3 files changed, 20 insertions(+)
 create mode 100644 asmasm/Cargo.toml
 create mode 100644 asmasm/src/main.rs

diff --git a/Cargo.toml b/Cargo.toml
index 99b104214..899e3b220 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2021"
 
 [workspace]
 members = [
+    "asmasm",
     "compare",
     "renu-sum",
     "xx_hash-sys",
diff --git a/asmasm/Cargo.toml b/asmasm/Cargo.toml
new file mode 100644
index 000000000..7134d8f83
--- /dev/null
+++ b/asmasm/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "asmasm"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+xx-renu = { path = ".." }
diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs
new file mode 100644
index 000000000..7e130d879
--- /dev/null
+++ b/asmasm/src/main.rs
@@ -0,0 +1,12 @@
+use std::{hint::black_box, time::Instant};
+use xx_renu::xxhash3_64::XxHash3_64;
+
+fn main() {
+    let filename = std::env::args().nth(1).expect("filename");
+    let file = std::fs::read(filename).expect("read");
+    let start = Instant::now();
+    let hash = XxHash3_64::oneshot(&file);
+    let elapsed = start.elapsed();
+    black_box(hash);
+    eprintln!("{elapsed:?}");
+}

From a2455467bec1c89d28b0e8cec0c2139948044fc7 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 15:08:11 -0400
Subject: [PATCH 053/166] bencha

---
 compare/benches/benchmark.rs | 38 +++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 2bce74e6c..bf0c0b821 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -166,5 +166,41 @@ fn half_sizes(max: usize) -> impl Iterator<Item = usize> {
     iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) })
 }
 
+mod xxhash3_64 {
+    use super::*;
+
+    fn oneshot(c: &mut Criterion) {
+        let (seed, data) = gen_data(BIG_DATA_SIZE);
+        let mut g = c.benchmark_group("xxhash3_64/oneshot");
+
+        for size in [data.len()] {
+            //half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE)} {
+            let data = &data[..size];
+            g.throughput(Throughput::Bytes(data.len() as _));
+
+            let id = format!("impl-c/size-{size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let hash = c::XxHash3_64::oneshot(data);
+                    black_box(hash);
+                })
+            });
+
+            let id = format!("impl-rust/size-{size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let hash = rust::XxHash3_64::oneshot(data);
+                    black_box(hash);
+                })
+            });
+        }
+
+        g.finish();
+    }
+
+    criterion_group!(benches, oneshot);
+}
+
 criterion_group!(benches, tiny_data, oneshot, streaming);
-criterion_main!(benches);
+
+criterion_main!(benches, xxhash3_64::benches);

From 460904a3992a14412dc86f6d6bdfa8ba9a612884 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 15:08:54 -0400
Subject: [PATCH 054/166] moar

---
 src/xxhash3_64.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 340f6bb76..c616ff174 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,6 +1,6 @@
-#![allow(missing_docs, dead_code, non_snake_case)]
+#![allow(missing_docs)]
 
-use core::{mem, slice};
+use core::{mem, slice, iter};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 

From 59d71b4e97119bcd325145f543a31b5b9fb1147e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 9 Jul 2024 16:07:48 -0400
Subject: [PATCH 055/166] moresafe

---
 src/xxhash3_64.rs | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index c616ff174..ffc7a6aee 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -301,19 +301,14 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) {
 
 #[inline]
 fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-    let secret_words = unsafe {
-        secret
-            .as_ptr()
-            .add(secret.len())
-            .sub(mem::size_of::<[u64; 8]>())
-            .cast::<[u64; 8]>()
-            .read_unaligned()
-    };
-
-    for i in 0..8 {
-        acc[i] ^= acc[i] >> 47;
-        acc[i] ^= secret_words[i];
-        acc[i] = acc[i].wrapping_mul(PRIME32_1);
+    let last = secret.last_chunk::<{mem::size_of::<[u8; 64]>()}>().unwrap();
+    let (last, _) = last.bp_as_chunks();
+    let last = last.iter().copied().map(u64::from_ne_bytes);
+
+    for (acc, secret) in acc.iter_mut().zip(last) {
+        *acc ^= *acc >> 47;
+        *acc ^= secret;
+        *acc = acc.wrapping_mul(PRIME32_1);
     }
 }
 

From 73ad587210f719ab6647851540f19f32368f5422 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 10 Jul 2024 11:10:39 -0400
Subject: [PATCH 056/166] reorg

---
 src/xxhash3_64.rs | 430 ++++++++++++++++++++++++----------------------
 1 file changed, 225 insertions(+), 205 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index ffc7a6aee..b0a13d9bd 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,6 +1,6 @@
 #![allow(missing_docs)]
 
-use core::{mem, slice, iter};
+use core::{mem, slice};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 
@@ -39,174 +39,141 @@ impl XxHash3_64 {
     pub fn oneshot(input: &[u8]) -> u64 {
         let seed = 0;
         let secret = DEFAULT_SECRET;
+        let secret = &secret[..];
 
         match input.len() {
-            0 => {
-                let secret_words =
-                    unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() };
-                avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
-            }
-
-            1..=3 => {
-                let input_length = input.len() as u8; // OK as we checked that the length fits
-
-                let combined = input[input.len() - 1].into_u32()
-                    | input_length.into_u32() << 8
-                    | input[0].into_u32() << 16
-                    | input[input.len() >> 1].into_u32() << 24;
-
-                let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() };
-                let value =
-                    ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64();
-
-                // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
-                avalanche_xxh64(value)
-            }
-
-            4..=8 => {
-                let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
-                let input_last = unsafe {
-                    input
-                        .as_ptr()
-                        .add(input.len())
-                        .sub(mem::size_of::<u32>())
-                        .cast::<u32>()
-                        .read_unaligned()
-                };
-                let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
-
-                let secret_words =
-                    unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() };
-                let combined = input_last.into_u64() | (input_first.into_u64() << 32);
-
-                let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined;
-                value ^= value.rotate_left(49) ^ value.rotate_left(24);
-                value = value.wrapping_mul(PRIME_MX2);
-                value ^= (value >> 35).wrapping_add(input.len().into_u64());
-                value = value.wrapping_mul(PRIME_MX2);
-                value ^= value >> 28;
-                value
-            }
-
-            9..=16 => {
-                let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
-                let input_last = unsafe {
-                    input
-                        .as_ptr()
-                        .add(input.len())
-                        .sub(mem::size_of::<u64>())
-                        .cast::<u64>()
-                        .read_unaligned()
-                };
-
-                let secret_words =
-                    unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() };
-                let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
-                let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
-                let mul_result = low.into_u128().wrapping_mul(high.into_u128());
-                let value = input
-                    .len()
-                    .into_u64()
-                    .wrapping_add(low.swap_bytes())
-                    .wrapping_add(high)
-                    .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
-
-                avalanche(value)
-            }
-
-            17..=128 => {
-                let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
-
-                let num_rounds = ((input.len() - 1) >> 5) + 1;
-
-                let (fwd, _) = input.bp_as_chunks();
-                let (_, bwd) = input.bp_as_rchunks();
-
-                let fwd = fwd.iter();
-                let bwd = bwd.iter().rev();
-
-                for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) {
-                    acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed));
-                    acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed));
-                }
-
-                avalanche(acc)
-            }
-
-            129..=240 => {
-                let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
-
-                let (head, _tail) = input.bp_as_chunks();
-                let mut head = head.iter();
-
-                for (i, chunk) in head.by_ref().take(8).enumerate() {
-                    acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed));
-                }
-
-                acc = avalanche(acc);
-
-                for (i, chunk) in head.enumerate() {
-                    acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed));
-                }
-
-                acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed));
-
-                avalanche(acc)
-            }
-
-            _ => {
-                #[rustfmt::skip]
-                let mut acc = [
-                    PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
-                    PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
-                ];
-
-                let stripes_per_block = (secret.len() - 64) / 8;
-                let block_size = 64 * stripes_per_block;
-
-                let mut blocks = input.chunks(block_size).fuse();
-                let last_block = blocks.next_back().unwrap();
-
-                for block in blocks {
-                    round(&mut acc, block, &secret);
-                }
-
-                let last_stripe = unsafe {
-                    input
-                        .as_ptr()
-                        .add(input.len())
-                        .sub(mem::size_of::<Stripe>())
-                        .cast::<Stripe>()
-                        .read_unaligned()
-                };
-
-                last_round(&mut acc, last_block, last_stripe, &secret);
-
-                final_merge(
-                    &mut acc,
-                    input.len().into_u64().wrapping_mul(PRIME64_1),
-                    &secret,
-                    11,
-                )
-            }
+            0 => impl_0_bytes(secret, seed),
+
+            1..=3 => impl_1_to_3_bytes(secret, seed, input),
+
+            4..=8 => impl_4_to_8_bytes(secret, seed, input),
+
+            9..=16 => impl_9_to_16_bytes(secret, seed, input),
+
+            17..=128 => impl_17_to_128_bytes(secret, seed, input),
+
+            129..=240 => impl_129_to_240_bytes(secret, seed, input),
+
+            _ => impl_241_plus_bytes(secret, input),
         }
     }
 }
 
-fn avalanche(mut x: u64) -> u64 {
-    x ^= x >> 37;
-    x = x.wrapping_mul(PRIME_MX1);
-    x ^= x >> 32;
-    x
+#[inline]
+fn impl_0_bytes(secret: &[u8], seed: u64) -> u64 {
+    let secret_words = unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() };
+    avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
 }
 
-fn avalanche_xxh64(mut x: u64) -> u64 {
-    x ^= x >> 33;
-    x = x.wrapping_mul(PRIME64_2);
-    x ^= x >> 29;
-    x = x.wrapping_mul(PRIME64_3);
-    x ^= x >> 32;
-    x
+#[inline]
+fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+    let input_length = input.len() as u8; // OK as we checked that the length fits
+
+    let combined = input[input.len() - 1].into_u32()
+        | input_length.into_u32() << 8
+        | input[0].into_u32() << 16
+        | input[input.len() >> 1].into_u32() << 24;
+
+    let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() };
+
+    let value = ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64();
+
+    // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
+    avalanche_xxh64(value)
+}
+
+#[inline]
+fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+    let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
+    let input_last = unsafe {
+        input
+            .as_ptr()
+            .add(input.len())
+            .sub(mem::size_of::<u32>())
+            .cast::<u32>()
+            .read_unaligned()
+    };
+
+    let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
+    let secret_words = unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() };
+
+    let combined = input_last.into_u64() | (input_first.into_u64() << 32);
+
+    let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined;
+    value ^= value.rotate_left(49) ^ value.rotate_left(24);
+    value = value.wrapping_mul(PRIME_MX2);
+    value ^= (value >> 35).wrapping_add(input.len().into_u64());
+    value = value.wrapping_mul(PRIME_MX2);
+    value ^= value >> 28;
+    value
+}
+
+#[inline]
+fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+    let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
+    let input_last = unsafe {
+        input
+            .as_ptr()
+            .add(input.len())
+            .sub(mem::size_of::<u64>())
+            .cast::<u64>()
+            .read_unaligned()
+    };
+
+    let secret_words = unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() };
+    let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
+    let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
+    let mul_result = low.into_u128().wrapping_mul(high.into_u128());
+    let value = input
+        .len()
+        .into_u64()
+        .wrapping_add(low.swap_bytes())
+        .wrapping_add(high)
+        .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
+
+    avalanche(value)
+}
+
+#[inline]
+fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+    let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
+
+    let num_rounds = ((input.len() - 1) >> 5) + 1;
+
+    let (fwd, _) = input.bp_as_chunks();
+    let (_, bwd) = input.bp_as_rchunks();
+
+    let fwd = fwd.iter();
+    let bwd = bwd.iter().rev();
+
+    for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) {
+        acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed));
+        acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed));
+    }
+
+    avalanche(acc)
+}
+
+#[inline]
+fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+    let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
+
+    let (head, _tail) = input.bp_as_chunks();
+    let mut head = head.iter();
+
+    for (i, chunk) in head.by_ref().take(8).enumerate() {
+        acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed));
+    }
+
+    acc = avalanche(acc);
+
+    for (i, chunk) in head.enumerate() {
+        acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed));
+    }
+
+    acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed));
+
+    avalanche(acc)
 }
 
 #[inline]
@@ -249,50 +216,55 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) ->
 //     acc[1] = acc[1] ^ data_words1[0].wrapping_add(data_words1[1]);
 // }
 
-// Step 2-1. Process stripes in the block
-#[inline]
-fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) {
-    // TODO: Should these casts / reads happen outside this function?
-    let secret = &secret[secret_offset..];
-    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() };
+#[rustfmt::skip]
+const INITIAL_ACCUMULATORS: [u64; 8] = [
+    PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
+    PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
+];
 
-    for i in 0..8 {
-        let value = stripe[i] ^ secret_words[i];
-        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]);
-        acc[i] = acc[i].wrapping_add({
-            let a = value.lower_half().into_u64();
-            let b = value.upper_half().into_u64();
-            a.wrapping_mul(b)
-        });
+#[inline(never)]
+fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
+    let mut acc = INITIAL_ACCUMULATORS;
+
+    let stripes_per_block = (secret.len() - 64) / 8;
+    let block_size = 64 * stripes_per_block;
+
+    let mut blocks = input.chunks(block_size).fuse();
+    let last_block = blocks.next_back().unwrap();
+    let last_stripe = unsafe {
+        input
+            .as_ptr()
+            .add(input.len())
+            .sub(mem::size_of::<Stripe>())
+            .cast::<Stripe>()
+            .read_unaligned()
+    };
+
+    for block in blocks {
+        round(&mut acc, block, secret);
     }
+
+    last_round(&mut acc, last_block, last_stripe, secret);
+
+    final_merge(
+        &mut acc,
+        input.len().into_u64().wrapping_mul(PRIME64_1),
+        secret,
+        11,
+    )
 }
 
 #[inline]
-fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-    for i in 0..8 {
-        // TODO: Should these casts / reads happen outside this function?
-        let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-        let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
-
-        let value = stripe ^ secret;
-        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-        acc[i] = acc[i].wrapping_add({
-            let a = value.lower_half().into_u64();
-            let b = value.upper_half().into_u64();
-            a.wrapping_mul(b)
-        });
-    }
+fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
+    round_accumulate(acc, block, secret);
+    round_scramble(acc, secret);
 }
 
 #[inline]
-fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) {
+fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
     let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<Stripe>() }>();
-
-    let secrets = iter::from_fn(|| {
-        let (c, _) = secret.split_first_chunk()?;
-        secret = &secret[8..];
-        Some(c)
-    });
+    let secrets =
+        (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
     for (stripe, secret) in stripes.iter().zip(secrets) {
         accumulate_hot(acc, stripe, secret);
@@ -301,7 +273,9 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) {
 
 #[inline]
 fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-    let last = secret.last_chunk::<{mem::size_of::<[u8; 64]>()}>().unwrap();
+    let last = secret
+        .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
+        .unwrap();
     let (last, _) = last.bp_as_chunks();
     let last = last.iter().copied().map(u64::from_ne_bytes);
 
@@ -312,13 +286,7 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
     }
 }
 
-#[inline]
-fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
-    round_accumulate(acc, block, secret);
-    round_scramble(acc, secret);
-}
-
-#[inline]
+#[inline(never)]
 fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u8]) {
     let n_full_stripes = (block.len() - 1) / 64;
     for n in 0..n_full_stripes {
@@ -328,7 +296,7 @@ fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u
     accumulate(acc, last_stripe, secret, secret.len() - 71);
 }
 
-#[inline]
+#[inline(never)]
 fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
     let secret_words = unsafe {
         secret
@@ -350,6 +318,58 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset
     avalanche(result)
 }
 
+#[inline(never)]
+fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) {
+    // TODO: Should these casts / reads happen outside this function?
+    let secret = &secret[secret_offset..];
+    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() };
+
+    for i in 0..8 {
+        let value = stripe[i] ^ secret_words[i];
+        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]);
+        acc[i] = acc[i].wrapping_add({
+            let a = value.lower_half().into_u64();
+            let b = value.upper_half().into_u64();
+            a.wrapping_mul(b)
+        });
+    }
+}
+
+#[inline]
+fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+    for i in 0..8 {
+        // TODO: Should these casts / reads happen outside this function?
+        let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+        let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+
+        let value = stripe ^ secret;
+        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+        acc[i] = acc[i].wrapping_add({
+            let a = value.lower_half().into_u64();
+            let b = value.upper_half().into_u64();
+            a.wrapping_mul(b)
+        });
+    }
+}
+
+#[inline]
+fn avalanche(mut x: u64) -> u64 {
+    x ^= x >> 37;
+    x = x.wrapping_mul(PRIME_MX1);
+    x ^= x >> 32;
+    x
+}
+
+#[inline]
+fn avalanche_xxh64(mut x: u64) -> u64 {
+    x ^= x >> 33;
+    x = x.wrapping_mul(PRIME64_2);
+    x ^= x >> 29;
+    x = x.wrapping_mul(PRIME64_3);
+    x ^= x >> 32;
+    x
+}
+
 trait Halves {
     type Output;
 

From b26aebc353f20cc5402e7d885ebd8fb7dc7f81fc Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 10 Jul 2024 11:43:21 -0400
Subject: [PATCH 057/166] reorg

---
 src/xxhash3_64.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index b0a13d9bd..19c38fbb9 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -38,8 +38,7 @@ impl XxHash3_64 {
     #[inline(never)]
     pub fn oneshot(input: &[u8]) -> u64 {
         let seed = 0;
-        let secret = DEFAULT_SECRET;
-        let secret = &secret[..];
+        let secret = &DEFAULT_SECRET;
 
         match input.len() {
             0 => impl_0_bytes(secret, seed),

From 485428382e85b27777a66637f845c3f977e883f3 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 10 Jul 2024 13:42:56 -0400
Subject: [PATCH 058/166] faster

---
 src/xxhash3_64.rs | 96 ++++++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 38 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 19c38fbb9..1a1b9f592 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -221,7 +221,7 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
     PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
 ];
 
-#[inline(never)]
+#[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     let mut acc = INITIAL_ACCUMULATORS;
 
@@ -230,13 +230,12 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
 
     let mut blocks = input.chunks(block_size).fuse();
     let last_block = blocks.next_back().unwrap();
-    let last_stripe = unsafe {
-        input
+    let last_stripe: &[u8; 64] = unsafe {
+        &*input
             .as_ptr()
             .add(input.len())
-            .sub(mem::size_of::<Stripe>())
-            .cast::<Stripe>()
-            .read_unaligned()
+            .sub(mem::size_of::<[u8; 64]>())
+            .cast()
     };
 
     for block in blocks {
@@ -255,6 +254,8 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
 
 #[inline]
 fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
+//    unsafe { core::arch::aarch64::_prefetch(block.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) };
+
     round_accumulate(acc, block, secret);
     round_scramble(acc, secret);
 }
@@ -266,7 +267,9 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
         (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
     for (stripe, secret) in stripes.iter().zip(secrets) {
-        accumulate_hot(acc, stripe, secret);
+        // todo cast to bigger to specify how much to fetch?
+        unsafe { core::arch::aarch64::_prefetch(stripe.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) };
+        accumulate(acc, stripe, secret);
     }
 }
 
@@ -285,17 +288,27 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
     }
 }
 
-#[inline(never)]
-fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u8]) {
-    let n_full_stripes = (block.len() - 1) / 64;
-    for n in 0..n_full_stripes {
-        let stripe = unsafe { block.as_ptr().add(n * 64).cast::<Stripe>().read_unaligned() };
-        accumulate(acc, stripe, secret, n * 8);
+#[inline]
+fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
+    // Accumulation steps are run for the stripes in the last block,
+    // except for the last stripe (whether it is full or not)
+    let stripes = match block.bp_as_chunks() {
+        ([stripes @ .., _last], []) => stripes,
+        (stripes, _last) => stripes,
+    };
+    let secrets =
+        (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
+
+    for (stripe, secret) in stripes.iter().zip(secrets) {
+        accumulate(acc, stripe, secret);
     }
-    accumulate(acc, last_stripe, secret, secret.len() - 71);
+
+    let q = &secret[secret.len() - 71..];
+    let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() };
+    accumulate(acc, last_stripe, q);
 }
 
-#[inline(never)]
+#[inline]
 fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
     let secret_words = unsafe {
         secret
@@ -317,25 +330,8 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset
     avalanche(result)
 }
 
-#[inline(never)]
-fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) {
-    // TODO: Should these casts / reads happen outside this function?
-    let secret = &secret[secret_offset..];
-    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() };
-
-    for i in 0..8 {
-        let value = stripe[i] ^ secret_words[i];
-        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]);
-        acc[i] = acc[i].wrapping_add({
-            let a = value.lower_half().into_u64();
-            let b = value.upper_half().into_u64();
-            a.wrapping_mul(b)
-        });
-    }
-}
-
 #[inline]
-fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
     for i in 0..8 {
         // TODO: Should these casts / reads happen outside this function?
         let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
@@ -343,14 +339,38 @@ fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
 
         let value = stripe ^ secret;
         acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-        acc[i] = acc[i].wrapping_add({
-            let a = value.lower_half().into_u64();
-            let b = value.upper_half().into_u64();
-            a.wrapping_mul(b)
-        });
+
+        acc[i] = multiply_and_add(value, value >> 32, acc[i]);
     }
 }
 
+#[inline]
+#[cfg(not(target_arch = "aarch64"))]
+fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+    acc.wrapping_add({
+        let a = (lhs as u32).into_u64();
+        let b = (rhs as u32).into_u64();
+        a.wrapping_mul(b)
+    })
+}
+
+#[inline]
+// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
+#[cfg(target_arch = "aarch64")]
+fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+    let res;
+
+    unsafe { asm!(
+        "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
+        lhs = in(reg) lhs,
+        rhs = in(reg) rhs,
+        acc = in(reg) acc,
+        res = out(reg) res,
+    ) }
+
+    res
+}
+
 #[inline]
 fn avalanche(mut x: u64) -> u64 {
     x ^= x >> 37;

From 273e81fa3e7ad60f9c2dc0f8ce1ec8075f7a40da Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 10 Jul 2024 13:47:39 -0400
Subject: [PATCH 059/166] faster

---
 src/xxhash3_64.rs | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 1a1b9f592..024981860 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,6 +1,6 @@
 #![allow(missing_docs)]
 
-use core::{mem, slice};
+use core::{mem, slice, arch::asm};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 
@@ -254,8 +254,6 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
 
 #[inline]
 fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
-//    unsafe { core::arch::aarch64::_prefetch(block.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) };
-
     round_accumulate(acc, block, secret);
     round_scramble(acc, secret);
 }
@@ -267,8 +265,6 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
         (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
     for (stripe, secret) in stripes.iter().zip(secrets) {
-        // todo cast to bigger to specify how much to fetch?
-        unsafe { core::arch::aarch64::_prefetch(stripe.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) };
         accumulate(acc, stripe, secret);
     }
 }
@@ -339,14 +335,13 @@ fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
 
         let value = stripe ^ secret;
         acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-
-        acc[i] = multiply_and_add(value, value >> 32, acc[i]);
+        acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
     }
 }
 
 #[inline]
 #[cfg(not(target_arch = "aarch64"))]
-fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
     acc.wrapping_add({
         let a = (lhs as u32).into_u64();
         let b = (rhs as u32).into_u64();
@@ -357,16 +352,18 @@ fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
 #[inline]
 // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
 #[cfg(target_arch = "aarch64")]
-fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
     let res;
 
-    unsafe { asm!(
-        "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
-        lhs = in(reg) lhs,
-        rhs = in(reg) rhs,
-        acc = in(reg) acc,
-        res = out(reg) res,
-    ) }
+    unsafe {
+        asm!(
+            "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
+            lhs = in(reg) lhs,
+            rhs = in(reg) rhs,
+            acc = in(reg) acc,
+            res = out(reg) res,
+        )
+    }
 
     res
 }

From 010cc9667d1d50a08a823f25d6923699f026f3d5 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 10 Jul 2024 14:30:14 -0400
Subject: [PATCH 060/166] asm compare for C

---
 asmasm/Cargo.toml  |  1 +
 asmasm/src/main.rs | 23 ++++++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/asmasm/Cargo.toml b/asmasm/Cargo.toml
index 7134d8f83..511b782de 100644
--- a/asmasm/Cargo.toml
+++ b/asmasm/Cargo.toml
@@ -5,3 +5,4 @@ edition = "2021"
 
 [dependencies]
 xx-renu = { path = ".." }
+xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs
index 7e130d879..e515bcdfc 100644
--- a/asmasm/src/main.rs
+++ b/asmasm/src/main.rs
@@ -1,12 +1,25 @@
 use std::{hint::black_box, time::Instant};
+use xx_hash_sys::XxHash3_64 as C;
 use xx_renu::xxhash3_64::XxHash3_64;
 
 fn main() {
     let filename = std::env::args().nth(1).expect("filename");
+    let use_c = std::env::args()
+        .nth(2)
+        .map_or(false, |a| a.eq_ignore_ascii_case("C"));
     let file = std::fs::read(filename).expect("read");
-    let start = Instant::now();
-    let hash = XxHash3_64::oneshot(&file);
-    let elapsed = start.elapsed();
-    black_box(hash);
-    eprintln!("{elapsed:?}");
+
+    if use_c {
+        let start = Instant::now();
+        let hash = C::oneshot(&file);
+        let elapsed = start.elapsed();
+        black_box(hash);
+        eprintln!("C    {elapsed:?}");
+    } else {
+        let start = Instant::now();
+        let hash = XxHash3_64::oneshot(&file);
+        let elapsed = start.elapsed();
+        black_box(hash);
+        eprintln!("Rust {elapsed:?}");
+    }
 }

From baa97991d527f1c3a369faf9dc7788f55d122a4e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 10 Jul 2024 14:43:02 -0400
Subject: [PATCH 061/166] clean

---
 src/xxhash3_64.rs | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 024981860..a39043574 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,6 +1,6 @@
 #![allow(missing_docs)]
 
-use core::{mem, slice, arch::asm};
+use core::{mem, slice};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 
@@ -146,8 +146,8 @@ fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     let bwd = bwd.iter().rev();
 
     for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) {
-        acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed));
-        acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed));
+        acc = acc.wrapping_add(mix_step(fwd_chunk, secret, i * 32, seed));
+        acc = acc.wrapping_add(mix_step(bwd_chunk, secret, i * 32 + 16, seed));
     }
 
     avalanche(acc)
@@ -161,16 +161,16 @@ fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     let mut head = head.iter();
 
     for (i, chunk) in head.by_ref().take(8).enumerate() {
-        acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed));
+        acc = acc.wrapping_add(mix_step(chunk, secret, i * 16, seed));
     }
 
     acc = avalanche(acc);
 
     for (i, chunk) in head.enumerate() {
-        acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed));
+        acc = acc.wrapping_add(mix_step(chunk, secret, i * 16 + 3, seed));
     }
 
-    acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed));
+    acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), secret, 119, seed));
 
     avalanche(acc)
 }
@@ -342,17 +342,19 @@ fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
 #[inline]
 #[cfg(not(target_arch = "aarch64"))]
 fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-    acc.wrapping_add({
-        let a = (lhs as u32).into_u64();
-        let b = (rhs as u32).into_u64();
-        a.wrapping_mul(b)
-    })
+    let lhs = (lhs as u32).into_u64();
+    let rhs = (rhs as u32).into_u64();
+
+    let product = lhs.wrapping_mul(rhs);
+    acc.wrapping_add(product)
 }
 
 #[inline]
 // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
 #[cfg(target_arch = "aarch64")]
 fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+    use core::arch::asm;
+
     let res;
 
     unsafe {

From d680cd95440cd5d405c8c380033d93b37fa11923 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 11 Jul 2024 12:19:55 -0400
Subject: [PATCH 062/166] doc cfgs

---
 Cargo.toml      | 3 +++
 src/lib.rs      | 9 +++++++++
 src/xxhash32.rs | 3 +++
 src/xxhash64.rs | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index 899e3b220..57d98e056 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,3 +27,6 @@ serde = { version = "1.0.0", optional = true, default-features = false, features
 
 [dev-dependencies]
 serde_json = "1.0.117"
+
+[package.metadata.docs.rs]
+all-features = true
diff --git a/src/lib.rs b/src/lib.rs
index f060aeb8d..597fb5d48 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -67,24 +67,33 @@
 #![no_std]
 #![deny(rust_2018_idioms)]
 #![deny(missing_docs)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 
 #[cfg(any(doc, test))]
 extern crate std;
 
 #[cfg(feature = "xxhash32")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
 pub mod xxhash32;
 
 #[cfg(feature = "xxhash32")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
 pub use xxhash32::Hasher as XxHash32;
 
 #[cfg(feature = "xxhash64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
 pub mod xxhash64;
 
 #[cfg(feature = "xxhash64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
 pub use xxhash64::Hasher as XxHash64;
 
+#[cfg(feature = "xxhash3_64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
 pub mod xxhash3_64;
 
+#[cfg(feature = "xxhash3_64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
 pub use xxhash3_64::XxHash3_64;
 
 trait IntoU32 {
diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index d5f8a272e..097a4c129 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -497,6 +497,7 @@ mod test {
 }
 
 #[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
 mod random_impl {
     use super::*;
 
@@ -540,9 +541,11 @@ mod random_impl {
 }
 
 #[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
 pub use random_impl::*;
 
 #[cfg(feature = "serialize")]
+#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
 mod serialize_impl {
     use serde::{Deserialize, Serialize};
 
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index fcfa55642..f488e429f 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -486,6 +486,7 @@ mod test {
 }
 
 #[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
 mod random_impl {
     use super::*;
 
@@ -529,9 +530,11 @@ mod random_impl {
 }
 
 #[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
 pub use random_impl::*;
 
 #[cfg(feature = "serialize")]
+#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
 mod serialize_impl {
     use serde::{Deserialize, Serialize};
 

From fbe55dd268b4f5656d33800f1db17bea695de288 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 11 Jul 2024 12:20:14 -0400
Subject: [PATCH 063/166] flag it

---
 Cargo.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 57d98e056..1a515e981 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ members = [
 ]
 
 [features]
-default = ["random", "xxhash32", "xxhash64"]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64"]
 
 random = ["dep:rand"]
 
@@ -20,6 +20,7 @@ serialize = ["dep:serde"]
 
 xxhash32 = []
 xxhash64 = []
+xxhash3_64 = []
 
 [dependencies]
 rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }

From 8833e84ca278a3d0f8f4e6876a73bb2e52f5dc56 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 11 Jul 2024 13:36:33 -0400
Subject: [PATCH 064/166] seed interface

---
 compare/src/lib.rs     |  4 +-
 src/xxhash3_64.rs      | 83 ++++++++++++++++++++++++++++++++++++------
 xx_hash-sys/src/lib.rs |  9 +++++
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 597a7254e..9b0e77d6d 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -277,8 +277,8 @@ mod xxhash3_64 {
     // }
 
     fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let native = c::XxHash3_64::oneshot(data);
-        let rust = rust::XxHash3_64::oneshot(data);
+        let native = c::XxHash3_64::oneshot_with_seed(seed, data);
+        let rust = rust::XxHash3_64::oneshot_with_seed(seed, data);
 
         prop_assert_eq!(native, rust);
         Ok(())
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index a39043574..79bb012a7 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -37,24 +37,58 @@ type Stripe = [u64; 8];
 impl XxHash3_64 {
     #[inline(never)]
     pub fn oneshot(input: &[u8]) -> u64 {
-        let seed = 0;
-        let secret = &DEFAULT_SECRET;
+        impl_oneshot(&DEFAULT_SECRET, 0, input)
+    }
 
-        match input.len() {
-            0 => impl_0_bytes(secret, seed),
+    #[inline(never)]
+    pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
+        let secret = if seed != 0 && input.len() > 240 {
+            &derive_secret(seed)
+        } else {
+            &DEFAULT_SECRET
+        };
 
-            1..=3 => impl_1_to_3_bytes(secret, seed, input),
+        impl_oneshot(secret, seed, input)
+    }
+}
 
-            4..=8 => impl_4_to_8_bytes(secret, seed, input),
+fn derive_secret(seed: u64) -> [u8; 192] {
+    let mut derived_secret = DEFAULT_SECRET;
+    let base = derived_secret.as_mut_ptr().cast::<u64>();
 
-            9..=16 => impl_9_to_16_bytes(secret, seed, input),
+    for i in 0..12 {
+        let a_p = unsafe { base.add(i * 2) };
+        let b_p = unsafe { base.add(i * 2 + 1) };
 
-            17..=128 => impl_17_to_128_bytes(secret, seed, input),
+        let mut a = unsafe { a_p.read_unaligned() };
+        let mut b = unsafe { b_p.read_unaligned() };
 
-            129..=240 => impl_129_to_240_bytes(secret, seed, input),
+        a = a.wrapping_add(seed);
+        b = b.wrapping_sub(seed);
 
-            _ => impl_241_plus_bytes(secret, input),
-        }
+        unsafe { a_p.write_unaligned(a) };
+        unsafe { b_p.write_unaligned(b) };
+    }
+
+    derived_secret
+}
+
+#[inline]
+fn impl_oneshot(secret: &[u8; 192], seed: u64, input: &[u8]) -> u64 {
+    match input.len() {
+        0 => impl_0_bytes(secret, seed),
+
+        1..=3 => impl_1_to_3_bytes(secret, seed, input),
+
+        4..=8 => impl_4_to_8_bytes(secret, seed, input),
+
+        9..=16 => impl_9_to_16_bytes(secret, seed, input),
+
+        17..=128 => impl_17_to_128_bytes(secret, seed, input),
+
+        129..=240 => impl_129_to_240_bytes(secret, seed, input),
+
+        _ => impl_241_plus_bytes(secret, input),
     }
 }
 
@@ -98,7 +132,11 @@ fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 
     let combined = input_last.into_u64() | (input_first.into_u64() << 32);
 
-    let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined;
+    let mut value = {
+        let a = secret_words[0] ^ secret_words[1];
+        let b = a.wrapping_sub(modified_seed);
+        b ^ combined
+    };
     value ^= value.rotate_left(49) ^ value.rotate_left(24);
     value = value.wrapping_mul(PRIME_MX2);
     value ^= (value >> 35).wrapping_add(input.len().into_u64());
@@ -600,6 +638,27 @@ mod test {
         }
     }
 
+    #[test]
+    fn hash_with_seed() {
+        let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024];
+
+        let expected = [
+            0x4aed_e683_89c0_e311,
+            0x78fc_079a_75aa_f3c0,
+            0x1b73_06b8_9f25_4507,
+            0x7df7_627f_d1f9_39b6,
+            0x49ca_0fff_0950_1622,
+            0x2bfd_caec_30ff_3000,
+            0xf984_56bc_25be_0901,
+            0x2483_9f0f_cdf4_d078,
+        ];
+
+        for (input, expected) in inputs.iter().zip(expected) {
+            let hash = XxHash3_64::oneshot_with_seed(0xdead_cafe, input);
+            assert_eq!(hash, expected, "input was {} bytes", input.len());
+        }
+    }
+
     #[test]
     fn backported_as_chunks() {
         let x = [1, 2, 3, 4, 5];
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 28b2298b8..25ffc7ee1 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -131,6 +131,11 @@ pub struct XXH3_state_t {
 
 extern "C" {
     fn XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+    fn XXH3_64bits_withSeed(
+        input: *const libc::c_void,
+        length: libc::size_t,
+        seed: XXH64_hash_t,
+    ) -> XXH64_hash_t;
 
     fn XXH3_createState() -> *mut XXH3_state_t;
     fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
@@ -150,6 +155,10 @@ impl XxHash3_64 {
         unsafe { XXH3_64bits(data.as_ptr().cast(), data.len()) }
     }
 
+    pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+        unsafe { XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+    }
+
     pub fn with_seed() -> Self {
         let state = unsafe {
             let state = XXH3_createState();

From 5ebb61e326f1d8454860bdefcd52315174bc1288 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 11 Jul 2024 13:53:38 -0400
Subject: [PATCH 065/166] secret interface

---
 compare/src/lib.rs     | 14 ++++++++++++++
 src/xxhash3_64.rs      | 15 +++++++++++++--
 xx_hash-sys/src/lib.rs | 10 ++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 9b0e77d6d..c369a9181 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -205,6 +205,7 @@ mod xxhash64 {
 
 mod xxhash3_64 {
     use proptest::{prelude::*, test_runner::TestCaseResult};
+    use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH;
     use std::hash::Hasher as _;
 
     use super::*;
@@ -235,6 +236,11 @@ mod xxhash3_64 {
             oneshot_impl(seed, &data[offset..])?;
         }
 
+        #[test]
+        fn oneshot_with_a_secret(secret in prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024), data: Vec<u8>) {
+            oneshot_with_secret_impl(&secret, &data)?;
+        }
+
         // #[test]
         // fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
         //     streaming_one_chunk_impl(seed, &data)?;
@@ -284,6 +290,14 @@ mod xxhash3_64 {
         Ok(())
     }
 
+    fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash3_64::oneshot_with_secret(secret, data);
+        let rust = rust::XxHash3_64::oneshot_with_secret(secret, data);
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
     // fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
     //     let native = {
     //         let mut hasher = c::XxHash64::with_seed(seed);
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 79bb012a7..be34bf042 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -15,6 +15,8 @@ const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 const PRIME_MX1: u64 = 0x165667919E3779F9;
 const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
 
+const DEFAULT_SEED: u64 = 0;
+
 const DEFAULT_SECRET: [u8; 192] = [
     0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
     0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
@@ -30,6 +32,8 @@ const DEFAULT_SECRET: [u8; 192] = [
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 ];
 
+pub const SECRET_MINIMUM_LENGTH: usize = 136;
+
 pub struct XxHash3_64;
 
 type Stripe = [u64; 8];
@@ -37,7 +41,7 @@ type Stripe = [u64; 8];
 impl XxHash3_64 {
     #[inline(never)]
     pub fn oneshot(input: &[u8]) -> u64 {
-        impl_oneshot(&DEFAULT_SECRET, 0, input)
+        impl_oneshot(&DEFAULT_SECRET, DEFAULT_SEED, input)
     }
 
     #[inline(never)]
@@ -50,8 +54,15 @@ impl XxHash3_64 {
 
         impl_oneshot(secret, seed, input)
     }
+
+    #[inline(never)]
+    pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 {
+        assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: ERROR
+        impl_oneshot(secret, DEFAULT_SEED, input)
+    }
 }
 
+#[inline]
 fn derive_secret(seed: u64) -> [u8; 192] {
     let mut derived_secret = DEFAULT_SECRET;
     let base = derived_secret.as_mut_ptr().cast::<u64>();
@@ -74,7 +85,7 @@ fn derive_secret(seed: u64) -> [u8; 192] {
 }
 
 #[inline]
-fn impl_oneshot(secret: &[u8; 192], seed: u64, input: &[u8]) -> u64 {
+fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     match input.len() {
         0 => impl_0_bytes(secret, seed),
 
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 25ffc7ee1..40fcc6256 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -136,6 +136,12 @@ extern "C" {
         length: libc::size_t,
         seed: XXH64_hash_t,
     ) -> XXH64_hash_t;
+    fn XXH3_64bits_withSecret(
+        input: *const libc::c_void,
+        length: libc::size_t,
+        secret: *const libc::c_void,
+        secret_length: libc::size_t,
+    ) -> XXH64_hash_t;
 
     fn XXH3_createState() -> *mut XXH3_state_t;
     fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
@@ -159,6 +165,10 @@ impl XxHash3_64 {
         unsafe { XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
     }
 
+    pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+        unsafe { XXH3_64bits_withSecret(data.as_ptr().cast(), data.len(), secret.as_ptr().cast(), secret.len()) }
+    }
+
     pub fn with_seed() -> Self {
         let state = unsafe {
             let state = XXH3_createState();

From 215d2c381dcae28e2c271ed9ca5d1e700710abd6 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 11 Jul 2024 14:53:21 -0400
Subject: [PATCH 066/166] build scalar and optimized and compare head-to-head

---
 compare/benches/benchmark.rs | 18 ++++----
 compare/src/lib.rs           |  2 +-
 xx_hash-sys/build.rs         | 55 +++++++++++++++++++++--
 xx_hash-sys/src/lib.rs       | 87 ++++++++++++++++++++++++++++++++++--
 4 files changed, 145 insertions(+), 17 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index bf0c0b821..18a9415c6 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -173,25 +173,23 @@ mod xxhash3_64 {
         let (seed, data) = gen_data(BIG_DATA_SIZE);
         let mut g = c.benchmark_group("xxhash3_64/oneshot");
 
-        for size in [data.len()] {
-            //half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE)} {
+        for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
             let data = &data[..size];
             g.throughput(Throughput::Bytes(data.len() as _));
 
             let id = format!("impl-c/size-{size:07}");
             g.bench_function(id, |b| {
-                b.iter(|| {
-                    let hash = c::XxHash3_64::oneshot(data);
-                    black_box(hash);
-                })
+                b.iter(|| c::XxHash3_64::oneshot_with_seed(seed, data))
+            });
+
+            let id = format!("impl-c-scalar/size-{size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| c::ScalarXxHash3_64::oneshot_with_seed(seed, data))
             });
 
             let id = format!("impl-rust/size-{size:07}");
             g.bench_function(id, |b| {
-                b.iter(|| {
-                    let hash = rust::XxHash3_64::oneshot(data);
-                    black_box(hash);
-                })
+                b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data))
             });
         }
 
diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index c369a9181..b85b1c167 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -205,8 +205,8 @@ mod xxhash64 {
 
 mod xxhash3_64 {
     use proptest::{prelude::*, test_runner::TestCaseResult};
-    use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH;
     use std::hash::Hasher as _;
+    use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH;
 
     use super::*;
 
diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
index cdc31eb97..fb0ef2515 100644
--- a/xx_hash-sys/build.rs
+++ b/xx_hash-sys/build.rs
@@ -1,10 +1,59 @@
-use std::{env, path::PathBuf};
+use std::{env, fs, path::PathBuf, process::Command};
 
 fn main() {
     let base = env::var_os("CARGO_MANIFEST_DIR").unwrap();
     let base: PathBuf = base.into();
     let xxhash = base.join("xxHash");
 
-    println!("cargo::rustc-link-lib=static=xxhash");
-    println!("cargo::rustc-link-search={}", xxhash.display());
+    let out = env::var("OUT_DIR").expect("no OUT_DIR");
+    let mut out = PathBuf::from(out);
+    out.push("xxhash");
+    fs::create_dir_all(&out).expect("make it");
+
+    let make_cmd = || {
+        let mut c = Command::new("make");
+        c.current_dir(&xxhash);
+        c
+    };
+
+    let s = make_cmd()
+        .arg("clean")
+        .status()
+        .expect("Could not run clean for scalar build");
+    assert!(s.success(), "Scalar clean failed");
+
+    let s = make_cmd()
+        .arg("libxxhash.a")
+        .env(
+            "CFLAGS",
+            "-O3 -DXXH_VECTOR=XXH_SCALAR -DXXH_NAMESPACE=scalar_",
+        )
+        .status()
+        .expect("Could not run scalar build");
+    assert!(s.success(), "Scalar build failed");
+
+    let name = xxhash.join("libxxhash.a");
+    let new =  out.join("libxxhash_scalar.a");
+    fs::copy(name, new).expect("Copy scalar");
+
+    let s = make_cmd()
+        .arg("clean")
+        .status()
+        .expect("Could not run clean for optimized build");
+    assert!(s.success(), "Optimized clean failed");
+
+    let s = make_cmd()
+        .arg("libxxhash.a")
+        .status()
+        .expect("Could not run optimized build");
+    assert!(s.success(), "Optimized build failed");
+
+    let name = xxhash.join("libxxhash.a");
+    let new =  out.join("libxxhash_optimized.a");
+    fs::copy(name, new).expect("Copy scalar");
+
+
+    println!("cargo::rustc-link-lib=static=xxhash_scalar");
+    println!("cargo::rustc-link-lib=static=xxhash_optimized");
+    println!("cargo::rustc-link-search={}", out.display());
 }
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 40fcc6256..a8ae42811 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -121,8 +121,6 @@ impl Drop for XxHash64 {
 
 // ----------
 
-// type XXH_hash_t = u64;
-
 #[repr(C)]
 pub struct XXH3_state_t {
     _data: [u8; 0],
@@ -166,7 +164,14 @@ impl XxHash3_64 {
     }
 
     pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-        unsafe { XXH3_64bits_withSecret(data.as_ptr().cast(), data.len(), secret.as_ptr().cast(), secret.len()) }
+        unsafe {
+            XXH3_64bits_withSecret(
+                data.as_ptr().cast(),
+                data.len(),
+                secret.as_ptr().cast(),
+                secret.len(),
+            )
+        }
     }
 
     pub fn with_seed() -> Self {
@@ -195,3 +200,79 @@ impl Drop for XxHash3_64 {
         assert_eq!(retval, XXH_OK);
     }
 }
+
+// ----------
+
+extern "C" {
+    fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+    fn scalar_XXH3_64bits_withSeed(
+        input: *const libc::c_void,
+        length: libc::size_t,
+        seed: XXH64_hash_t,
+    ) -> XXH64_hash_t;
+    fn scalar_XXH3_64bits_withSecret(
+        input: *const libc::c_void,
+        length: libc::size_t,
+        secret: *const libc::c_void,
+        secret_length: libc::size_t,
+    ) -> XXH64_hash_t;
+
+    fn scalar_XXH3_createState() -> *mut XXH3_state_t;
+    fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+    fn scalar_XXH3_64bits_update(
+        state: *mut XXH3_state_t,
+        buffer: *const libc::c_void,
+        length: libc::size_t,
+    ) -> XXH_errorcode;
+    fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+    fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
+}
+
+pub struct ScalarXxHash3_64(*mut XXH3_state_t);
+
+impl ScalarXxHash3_64 {
+    pub fn oneshot(data: &[u8]) -> u64 {
+        unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) }
+    }
+
+    pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+        unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+    }
+
+    pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+        unsafe {
+            scalar_XXH3_64bits_withSecret(
+                data.as_ptr().cast(),
+                data.len(),
+                secret.as_ptr().cast(),
+                secret.len(),
+            )
+        }
+    }
+
+    pub fn with_seed() -> Self {
+        let state = unsafe {
+            let state = scalar_XXH3_createState();
+            scalar_XXH3_64bits_reset(state);
+            state
+        };
+
+        Self(state)
+    }
+
+    pub fn write(&mut self, data: &[u8]) {
+        let retval = unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+        assert_eq!(retval, XXH_OK);
+    }
+
+    pub fn finish(&mut self) -> u64 {
+        unsafe { scalar_XXH3_64bits_digest(self.0) }
+    }
+}
+
+impl Drop for ScalarXxHash3_64 {
+    fn drop(&mut self) {
+        let retval = unsafe { scalar_XXH3_freeState(self.0) };
+        assert_eq!(retval, XXH_OK);
+    }
+}

From c6c9a129b34f143609c8d2a2b7978ad6ea95e0e8 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 14 Jul 2024 12:10:45 -0400
Subject: [PATCH 067/166] simd some

---
 src/xxhash3_64.rs    | 94 +++++++++++++++++++++++++++++++++++++++-----
 xx_hash-sys/build.rs |  5 +--
 2 files changed, 87 insertions(+), 12 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index be34bf042..6905bef46 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -377,15 +377,82 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset
 
 #[inline]
 fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-    for i in 0..8 {
-        // TODO: Should these casts / reads happen outside this function?
-        let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-        let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
-
-        let value = stripe ^ secret;
-        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-        acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+    use core::arch::aarch64::*;
+
+    // unsafe {
+    //     _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast());
+    //     _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast());
+    // }
+
+    // eprintln!("{acc:x?}");
+    // for i in 0..8 {
+    //     // TODO: Should these casts / reads happen outside this function?
+    //     let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+    //     let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+
+    //     eprintln!("{:x?}, {:x?}", stripe, secret);
+
+    //     let value = stripe ^ secret;
+    //     acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+    //     acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+    // }
+
+    // We process 4x u64 at a time as that allows us to completely
+    // fill a `uint64x2_t` with useful values when performing the
+    // `vmull_{high_}u32`.
+    let (acc2, _) = acc.bp_as_chunks_mut::<4>();
+    for (i, acc) in acc2.into_iter().enumerate() {
+        unsafe {
+            let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
+            let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
+            let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
+            let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
+            let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
+            let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
+
+            let value_0 = veorq_u64(stripe_0, secret_0);
+            let value_1 = veorq_u64(stripe_1, secret_1);
+
+            let parts_0 = vreinterpretq_u32_u64(value_0);
+            let parts_1 = vreinterpretq_u32_u64(value_1);
+
+            let hi = vuzp1q_u32(parts_0, parts_1);
+            let lo = vuzp2q_u32(parts_0, parts_1);
+
+            let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo));
+            let product_1 = vmull_high_u32(hi, lo);
+
+            accv_0 = vaddq_u64(accv_0, product_0);
+            accv_1 = vaddq_u64(accv_1, product_1);
+
+            let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+            let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+            accv_0 = vaddq_u64(accv_0, stripe_rot_0);
+            accv_1 = vaddq_u64(accv_1, stripe_rot_1);
+
+            vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
+            vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
+        };
     }
+
+
+    // Pseudo-SIMD
+
+    // for (acc, (str, sec)) in acc.iter_mut().zip(stripe_x.into_iter().zip(secret_x)) {
+    //     let value = str ^ sec;
+    //     *acc = multiply_64_as_32_and_add(value, value >> 32, *acc);
+    // }
+
+    // let mut stripe_x = stripe_x;
+
+    // stripe_x.swap(0, 1);
+    // stripe_x.swap(2, 3);
+    // stripe_x.swap(4, 5);
+    // stripe_x.swap(6, 7);
+
+    // for (acc, str) in acc.iter_mut().zip(stripe_x) {
+    //     *acc = acc.wrapping_add(str);
+    // }
 }
 
 #[inline]
@@ -474,6 +541,7 @@ impl Halves for u128 {
 
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
+    fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]);
     fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
 }
 
@@ -486,6 +554,14 @@ impl<T> SliceBackport<T> for [T] {
         (head, tail)
     }
 
+    fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
+        assert_ne!(N, 0);
+        let len = self.len() / N;
+        let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) };
+        let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) };
+        (head, tail)
+    }
+
     fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
         assert_ne!(N, 0);
         let len = self.len() / N;
@@ -631,7 +707,7 @@ mod test {
     }
 
     #[test]
-    fn hash_240_plus_bytes() {
+    fn hash_241_plus_bytes() {
         let inputs = bytes![241, 242, 243, 244, 1024, 10240];
 
         let expected = [
diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
index fb0ef2515..417dedd70 100644
--- a/xx_hash-sys/build.rs
+++ b/xx_hash-sys/build.rs
@@ -33,7 +33,7 @@ fn main() {
     assert!(s.success(), "Scalar build failed");
 
     let name = xxhash.join("libxxhash.a");
-    let new =  out.join("libxxhash_scalar.a");
+    let new = out.join("libxxhash_scalar.a");
     fs::copy(name, new).expect("Copy scalar");
 
     let s = make_cmd()
@@ -49,10 +49,9 @@ fn main() {
     assert!(s.success(), "Optimized build failed");
 
     let name = xxhash.join("libxxhash.a");
-    let new =  out.join("libxxhash_optimized.a");
+    let new = out.join("libxxhash_optimized.a");
     fs::copy(name, new).expect("Copy scalar");
 
-
     println!("cargo::rustc-link-lib=static=xxhash_scalar");
     println!("cargo::rustc-link-lib=static=xxhash_optimized");
     println!("cargo::rustc-link-search={}", out.display());

From 843e7621c3129bdb2a3f26e019f0f1f8443ee94a Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 14 Jul 2024 18:03:01 -0400
Subject: [PATCH 068/166] checkpoint simd scramble

---
 src/xxhash3_64.rs | 98 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 88 insertions(+), 10 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 6905bef46..61e3e05ed 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -320,17 +320,95 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
 
 #[inline]
 fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-    let last = secret
-        .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
-        .unwrap();
-    let (last, _) = last.bp_as_chunks();
-    let last = last.iter().copied().map(u64::from_ne_bytes);
 
-    for (acc, secret) in acc.iter_mut().zip(last) {
-        *acc ^= *acc >> 47;
-        *acc ^= secret;
-        *acc = acc.wrapping_mul(PRIME32_1);
-    }
+
+    // let last = secret
+    //     .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
+    //     .unwrap();
+    // let (last, _) = last.bp_as_chunks();
+    // let last = last.iter().copied().map(u64::from_ne_bytes);
+
+    // for (acc, secret) in acc.iter_mut().zip(last) {
+    //     *acc ^= *acc >> 47;
+    //     *acc ^= secret;
+    //     *acc = acc.wrapping_mul(PRIME32_1);
+    // }
+
+    unsafe {
+        use core::arch::aarch64::*;
+
+        let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
+        let (acc, _) = acc.bp_as_chunks_mut::<2>();
+        for (i, acc) in acc.iter_mut().enumerate() {
+            let mut accv = vld1q_u64(acc.as_ptr());
+            let secret = vld1q_u64(secret_base.add(i * 2));
+
+            let shifted = vshrq_n_u64::<47>(accv);
+            accv = veorq_u64(accv, shifted);
+            accv = veorq_u64(accv, secret);
+
+            // let acc0 = vgetq_lane_u64::<0>(accv);
+            // let acc1 = vgetq_lane_u64::<1>(accv);
+            // let r0 = acc0.wrapping_mul(PRIME32_1);
+            // let r1 = acc1.wrapping_mul(PRIME32_1);
+            // eprintln!("expected = {r0:x}, {r1:x}");
+
+            let prime = vdupq_n_u32(PRIME32_1 as _); // opt: always 0 in high bits cause 32bit
+
+            // eprintln!("acc = {accv:x?}");
+
+            let accv = vreinterpretq_u32_u64(accv);
+
+            // eprintln!("acc = {accv:x?}");
+
+            // eprintln!("lo(acc) = {:x?} lo(pp) = {:x?}", vget_low_u32(accv), vget_low_u32(pp));
+
+            let lo = vmull_u32(vget_low_u32(accv), vget_low_u32(prime));
+            let hi = vmull_high_u32(accv, prime);
+
+            // eprintln!("lo = {lo:x?} hi = {hi:x?}");
+
+            let a = vuzp1q_u64(lo, hi);
+            let b = vuzp2q_u64(lo, hi);
+
+            // eprintln!("a = {a:x?} b = {b:x?}");
+
+            let b  = vshlq_n_u64::<32>(b);
+
+            let s = vaddq_u64(a, b);
+
+            // eprintln!("s = {s:x?}");
+            // let accv = vreinterpretq_u64_u32(accv);
+            // eprintln!("acc = {accv:x?}");
+
+            let accv = s;
+
+//            panic!();
+
+           vst1q_u64(acc.as_mut_ptr(), accv);
+        }
+        }
+
+    // eprintln!("{acc:x?}");
+    // eprintln!("----------");
+
+
+    // scalar
+    // acc
+    //
+    // [23e3a8c41a04e6b, e0ab8aff41b10f66,
+    //  fd0385440ae4def7, ac00b2db47f23b90,
+    //  60a911d92c86ff3b, ad3b37a550927c9c,
+    //  211896d1cfc9b1b9, 66ceedfabb78caeb]
+    //
+    // simd
+    // acc
+    //
+    // [b66e3311c60fb961, c8a474f8ebf44757,
+    //  6810423fba6d7ed0, d41b8185aab06f3,
+    //  9f012bd957bae2ba, b3ce30e7c301b27e,
+    //  c2090074dc5d2070, dadf4f22e5e02bdd]
+
 }
 
 #[inline]

From 39ec48a758985aca92fc097dac59868170a553ad Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 15 Jul 2024 09:06:04 -0400
Subject: [PATCH 069/166] checkpoint simd scramble

---
 src/xxhash3_64.rs | 115 ++++++++++++++++++++++++----------------------
 1 file changed, 61 insertions(+), 54 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 61e3e05ed..cc70bc29b 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -347,68 +347,75 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
             accv = veorq_u64(accv, shifted);
             accv = veorq_u64(accv, secret);
 
-            // let acc0 = vgetq_lane_u64::<0>(accv);
-            // let acc1 = vgetq_lane_u64::<1>(accv);
-            // let r0 = acc0.wrapping_mul(PRIME32_1);
-            // let r1 = acc1.wrapping_mul(PRIME32_1);
-            // eprintln!("expected = {r0:x}, {r1:x}");
+            accv = neon::xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
 
-            let prime = vdupq_n_u32(PRIME32_1 as _); // opt: always 0 in high bits cause 32bit
-
-            // eprintln!("acc = {accv:x?}");
-
-            let accv = vreinterpretq_u32_u64(accv);
-
-            // eprintln!("acc = {accv:x?}");
-
-            // eprintln!("lo(acc) = {:x?} lo(pp) = {:x?}", vget_low_u32(accv), vget_low_u32(pp));
-
-            let lo = vmull_u32(vget_low_u32(accv), vget_low_u32(prime));
-            let hi = vmull_high_u32(accv, prime);
-
-            // eprintln!("lo = {lo:x?} hi = {hi:x?}");
-
-            let a = vuzp1q_u64(lo, hi);
-            let b = vuzp2q_u64(lo, hi);
-
-            // eprintln!("a = {a:x?} b = {b:x?}");
-
-            let b  = vshlq_n_u64::<32>(b);
-
-            let s = vaddq_u64(a, b);
-
-            // eprintln!("s = {s:x?}");
-            // let accv = vreinterpretq_u64_u32(accv);
-            // eprintln!("acc = {accv:x?}");
-
-            let accv = s;
-
-//            panic!();
-
-           vst1q_u64(acc.as_mut_ptr(), accv);
+            vst1q_u64(acc.as_mut_ptr(), accv);
         }
-        }
-
-    // eprintln!("{acc:x?}");
-    // eprintln!("----------");
+    }
+}
 
+mod neon {
+    use core::arch::aarch64::*;
 
-    // scalar
-    // acc
+    // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
+    // lower 64 bits of the result) operation, so we have to make our
+    // own out of 32-bit operations . We can simplify by realizing
+    // that we are always multiplying by a 32-bit number.
+    //
+    // The basic algorithm is traditional long multiplication. `[]`
+    // denotes groups of 32 bits.
+    //
+    //         [AAAA][BBBB]
+    // x             [CCCC]
+    // --------------------
+    //         [BCBC][BCBC]
+    // + [ACAC][ACAC]
+    // --------------------
+    //         [ACBC][BCBC] // 64-bit truncation occurs
+    //
+    // This can be written in NEON as a vectorwise wrapping
+    // multiplication of the high-order chunk of the input (`A`)
+    // against the constant and then a multiply-widen-and-accumulate
+    // of the low-order chunk of the input and the constant:
+    //
+    // 1. High-order, vectorwise
     //
-    // [23e3a8c41a04e6b, e0ab8aff41b10f66,
-    //  fd0385440ae4def7, ac00b2db47f23b90,
-    //  60a911d92c86ff3b, ad3b37a550927c9c,
-    //  211896d1cfc9b1b9, 66ceedfabb78caeb]
+    //         [AAAA][BBBB]
+    // x       [CCCC][0000]
+    // --------------------
+    //         [ACAC][0000]
     //
-    // simd
-    // acc
+    // 2. Low-order, widening
     //
-    // [b66e3311c60fb961, c8a474f8ebf44757,
-    //  6810423fba6d7ed0, d41b8185aab06f3,
-    //  9f012bd957bae2ba, b3ce30e7c301b27e,
-    //  c2090074dc5d2070, dadf4f22e5e02bdd]
+    //               [BBBB]
+    // x             [CCCC] // widening
+    // --------------------
+    //         [BCBC][BCBC]
+    //
+    // 3. Accumulation
+    //
+    //         [ACAC][0000]
+    // +       [BCBC][BCBC] // vectorwise
+    // --------------------
+    //         [ACBC][BCBC]
+    //
+    // Thankfully, NEON has a single multiply-widen-and-accumulate
+    // operation.
+    #[inline]
+    pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
+        unsafe {
+            let input_as_u32 = vreinterpretq_u32_u64(input);
+            let factor = vmov_n_u32(og_factor);
+            let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
+            let factor_striped = vreinterpretq_u32_u64(factor_striped);
+
+            let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
+            let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
 
+            let input_lo = vmovn_u64(input);
+            vmlal_u32(high_shifted, input_lo, factor)
+        }
+    }
 }
 
 #[inline]

From 4ca95734d7d1d22953d73dc4acff74b99fea1880 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 15 Jul 2024 09:52:40 -0400
Subject: [PATCH 070/166] organize

---
 src/xxhash3_64.rs | 193 ++++++++++++++++++++++------------------------
 1 file changed, 94 insertions(+), 99 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index cc70bc29b..202658e5b 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -319,43 +319,99 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
 }
 
 #[inline]
+#[cfg(not(target_arch = "aarch64"))]
 fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
+    let last = secret
+        .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
+        .unwrap();
+    let (last, _) = last.bp_as_chunks();
+    let last = last.iter().copied().map(u64::from_ne_bytes);
 
+    for (acc, secret) in acc.iter_mut().zip(last) {
+        *acc ^= *acc >> 47;
+        *acc ^= secret;
+        *acc = acc.wrapping_mul(PRIME32_1);
+    }
+}
 
-    // let last = secret
-    //     .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
-    //     .unwrap();
-    // let (last, _) = last.bp_as_chunks();
-    // let last = last.iter().copied().map(u64::from_ne_bytes);
+#[cfg(target_arch = "aarch64")]
+use neon::{accumulate, round_scramble};
 
-    // for (acc, secret) in acc.iter_mut().zip(last) {
-    //     *acc ^= *acc >> 47;
-    //     *acc ^= secret;
-    //     *acc = acc.wrapping_mul(PRIME32_1);
-    // }
+mod neon {
+    use core::arch::aarch64::*;
 
-    unsafe {
-        use core::arch::aarch64::*;
+    use super::{SliceBackport as _, PRIME32_1};
+
+    #[inline]
+    pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
+        unsafe {
+            let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
+            let (acc, _) = acc.bp_as_chunks_mut::<2>();
+            for (i, acc) in acc.iter_mut().enumerate() {
+                let mut accv = vld1q_u64(acc.as_ptr());
+                let secret = vld1q_u64(secret_base.add(i * 2));
 
-        let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
-        let (acc, _) = acc.bp_as_chunks_mut::<2>();
-        for (i, acc) in acc.iter_mut().enumerate() {
-            let mut accv = vld1q_u64(acc.as_ptr());
-            let secret = vld1q_u64(secret_base.add(i * 2));
+                // tmp[i] = acc[i] >> 47
+                let shifted = vshrq_n_u64::<47>(accv);
 
-            let shifted = vshrq_n_u64::<47>(accv);
-            accv = veorq_u64(accv, shifted);
-            accv = veorq_u64(accv, secret);
+                // acc[i] ^= tmp[i]
+                accv = veorq_u64(accv, shifted);
 
-            accv = neon::xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+                // acc[i] ^= secret[i]
+                accv = veorq_u64(accv, secret);
 
-            vst1q_u64(acc.as_mut_ptr(), accv);
+                // acc[i] *= PRIME32_1
+                accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+
+                vst1q_u64(acc.as_mut_ptr(), accv);
+            }
         }
     }
-}
 
-mod neon {
-    use core::arch::aarch64::*;
+    // We process 4x u64 at a time as that allows us to completely
+    // fill a `uint64x2_t` with useful values when performing the
+    // `vmull_{high_}u32`.
+    #[inline]
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        let (acc2, _) = acc.bp_as_chunks_mut::<4>();
+        for (i, acc) in acc2.into_iter().enumerate() {
+            unsafe {
+                let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
+                let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
+                let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
+                let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
+                let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
+                let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
+
+                // value[i] = stripe[i] ^ secret[i];
+                let value_0 = veorq_u64(stripe_0, secret_0);
+                let value_1 = veorq_u64(stripe_1, secret_1);
+
+                // tmp[i] = value[i] * (value[i] >> 32)
+                let parts_0 = vreinterpretq_u32_u64(value_0);
+                let parts_1 = vreinterpretq_u32_u64(value_1);
+
+                let hi = vuzp1q_u32(parts_0, parts_1);
+                let lo = vuzp2q_u32(parts_0, parts_1);
+
+                let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo));
+                let product_1 = vmull_high_u32(hi, lo);
+
+                // acc[i] += tmp[i]
+                accv_0 = vaddq_u64(accv_0, product_0);
+                accv_1 = vaddq_u64(accv_1, product_1);
+
+                // acc[i ^ 1] = acc[i ^ 1] + stripe[i];
+                let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+                let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+                accv_0 = vaddq_u64(accv_0, stripe_rot_0);
+                accv_1 = vaddq_u64(accv_1, stripe_rot_1);
+
+                vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
+                vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
+            };
+        }
+    }
 
     // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
     // lower 64 bits of the result) operation, so we have to make our
@@ -416,6 +472,11 @@ mod neon {
             vmlal_u32(high_shifted, input_lo, factor)
         }
     }
+
+    // unsafe {
+    //   _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast());
+    //   _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast());
+    // }
 }
 
 #[inline]
@@ -461,83 +522,17 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset
 }
 
 #[inline]
+#[cfg(not(target_arch = "aarch64"))]
 fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-    use core::arch::aarch64::*;
+    for i in 0..8 {
+        // TODO: Should these casts / reads happen outside this function?
+        let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+        let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
 
-    // unsafe {
-    //     _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast());
-    //     _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast());
-    // }
-
-    // eprintln!("{acc:x?}");
-    // for i in 0..8 {
-    //     // TODO: Should these casts / reads happen outside this function?
-    //     let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-    //     let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
-
-    //     eprintln!("{:x?}, {:x?}", stripe, secret);
-
-    //     let value = stripe ^ secret;
-    //     acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-    //     acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
-    // }
-
-    // We process 4x u64 at a time as that allows us to completely
-    // fill a `uint64x2_t` with useful values when performing the
-    // `vmull_{high_}u32`.
-    let (acc2, _) = acc.bp_as_chunks_mut::<4>();
-    for (i, acc) in acc2.into_iter().enumerate() {
-        unsafe {
-            let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
-            let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
-            let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
-            let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
-            let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
-            let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
-
-            let value_0 = veorq_u64(stripe_0, secret_0);
-            let value_1 = veorq_u64(stripe_1, secret_1);
-
-            let parts_0 = vreinterpretq_u32_u64(value_0);
-            let parts_1 = vreinterpretq_u32_u64(value_1);
-
-            let hi = vuzp1q_u32(parts_0, parts_1);
-            let lo = vuzp2q_u32(parts_0, parts_1);
-
-            let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo));
-            let product_1 = vmull_high_u32(hi, lo);
-
-            accv_0 = vaddq_u64(accv_0, product_0);
-            accv_1 = vaddq_u64(accv_1, product_1);
-
-            let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
-            let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
-            accv_0 = vaddq_u64(accv_0, stripe_rot_0);
-            accv_1 = vaddq_u64(accv_1, stripe_rot_1);
-
-            vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
-            vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
-        };
+        let value = stripe ^ secret;
+        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+        acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
     }
-
-
-    // Pseudo-SIMD
-
-    // for (acc, (str, sec)) in acc.iter_mut().zip(stripe_x.into_iter().zip(secret_x)) {
-    //     let value = str ^ sec;
-    //     *acc = multiply_64_as_32_and_add(value, value >> 32, *acc);
-    // }
-
-    // let mut stripe_x = stripe_x;
-
-    // stripe_x.swap(0, 1);
-    // stripe_x.swap(2, 3);
-    // stripe_x.swap(4, 5);
-    // stripe_x.swap(6, 7);
-
-    // for (acc, str) in acc.iter_mut().zip(stripe_x) {
-    //     *acc = acc.wrapping_add(str);
-    // }
 }
 
 #[inline]

From feb485b01a1929793691240465174476f7013e92 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 21 Jul 2024 08:51:44 -0400
Subject: [PATCH 071/166] more link

---
 src/xxhash3_64.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 202658e5b..67db79bf4 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -547,6 +547,7 @@ fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
 
 #[inline]
 // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
+// https://github.com/llvm/llvm-project/issues/98481
 #[cfg(target_arch = "aarch64")]
 fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
     use core::arch::asm;

From 5835fdf700b8033847ae490c306806a9590957ea Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 21 Jul 2024 08:49:30 -0400
Subject: [PATCH 072/166] NEON performance parity

---
 src/xxhash3_64.rs | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 67db79bf4..c6420f8d9 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -370,7 +370,7 @@ mod neon {
 
     // We process 4x u64 at a time as that allows us to completely
     // fill a `uint64x2_t` with useful values when performing the
-    // `vmull_{high_}u32`.
+    // multiplication.
     #[inline]
     pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
         let (acc2, _) = acc.bp_as_chunks_mut::<4>();
@@ -383,29 +383,35 @@ mod neon {
                 let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
                 let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
 
+                // stripe_rot[i ^ 1] = stripe[i];
+                let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+                let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+
                 // value[i] = stripe[i] ^ secret[i];
                 let value_0 = veorq_u64(stripe_0, secret_0);
                 let value_1 = veorq_u64(stripe_1, secret_1);
 
-                // tmp[i] = value[i] * (value[i] >> 32)
+                // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
+                //
+                // Each vector has 64-bit values, but we treat them as
+                // 32-bit and then unzip them. This naturally splits
+                // the upper and lower 32 bits.
                 let parts_0 = vreinterpretq_u32_u64(value_0);
                 let parts_1 = vreinterpretq_u32_u64(value_1);
 
                 let hi = vuzp1q_u32(parts_0, parts_1);
                 let lo = vuzp2q_u32(parts_0, parts_1);
 
-                let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo));
-                let product_1 = vmull_high_u32(hi, lo);
+                let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
+                let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
 
-                // acc[i] += tmp[i]
-                accv_0 = vaddq_u64(accv_0, product_0);
-                accv_1 = vaddq_u64(accv_1, product_1);
+                // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
+                core::arch::asm!("/* {x:v} */", x = in(vreg) sum_0);
+                core::arch::asm!("/* {x:v} */", x = in(vreg) sum_1);
 
-                // acc[i ^ 1] = acc[i ^ 1] + stripe[i];
-                let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
-                let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
-                accv_0 = vaddq_u64(accv_0, stripe_rot_0);
-                accv_1 = vaddq_u64(accv_1, stripe_rot_1);
+                // acc[i] += sum[i]
+                accv_0 = vaddq_u64(accv_0, sum_0);
+                accv_1 = vaddq_u64(accv_1, sum_1);
 
                 vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
                 vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);

From 938d94eb3b0217cb07a5496962e7451b7ae09299 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 21 Jul 2024 18:59:14 -0400
Subject: [PATCH 073/166] organize simd

---
 Cargo.toml        |   4 +-
 src/xxhash3_64.rs | 137 +++++++++++++++++++++++++---------------------
 2 files changed, 79 insertions(+), 62 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 1a515e981..6c660495d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ members = [
 ]
 
 [features]
-default = ["random", "xxhash32", "xxhash64", "xxhash3_64"]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd"]
 
 random = ["dep:rand"]
 
@@ -22,6 +22,8 @@ xxhash32 = []
 xxhash64 = []
 xxhash3_64 = []
 
+simd = []
+
 [dependencies]
 rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
 serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index c6420f8d9..02c6db296 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -318,25 +318,77 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
     }
 }
 
-#[inline]
-#[cfg(not(target_arch = "aarch64"))]
-fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-    let last = secret
-        .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
-        .unwrap();
-    let (last, _) = last.bp_as_chunks();
-    let last = last.iter().copied().map(u64::from_ne_bytes);
-
-    for (acc, secret) in acc.iter_mut().zip(last) {
-        *acc ^= *acc >> 47;
-        *acc ^= secret;
-        *acc = acc.wrapping_mul(PRIME32_1);
+#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))]
+mod scalar {
+    use core::mem;
+
+    use super::{SliceBackport as _, PRIME32_1};
+
+    #[inline]
+    pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
+        let last = secret
+            .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
+            .unwrap();
+        let (last, _) = last.bp_as_chunks();
+        let last = last.iter().copied().map(u64::from_ne_bytes);
+
+        for (acc, secret) in acc.iter_mut().zip(last) {
+            *acc ^= *acc >> 47;
+            *acc ^= secret;
+            *acc = acc.wrapping_mul(PRIME32_1);
+        }
+    }
+
+    #[inline]
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        for i in 0..8 {
+            // TODO: Should these casts / reads happen outside this function?
+            let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+            let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+
+            let value = stripe ^ secret;
+            acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+            acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+        }
+    }
+
+    #[inline]
+    #[cfg(not(target_arch = "aarch64"))]
+    fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+        let lhs = (lhs as u32).into_u64();
+        let rhs = (rhs as u32).into_u64();
+
+        let product = lhs.wrapping_mul(rhs);
+        acc.wrapping_add(product)
+    }
+
+    #[inline]
+    // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
+    // https://github.com/llvm/llvm-project/issues/98481
+    #[cfg(target_arch = "aarch64")]
+    fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+        use core::arch::asm;
+
+        let res;
+
+        unsafe {
+            asm!(
+                "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
+                lhs = in(reg) lhs,
+                rhs = in(reg) rhs,
+                acc = in(reg) acc,
+                res = out(reg) res,
+            )
+        }
+
+        res
     }
 }
 
-#[cfg(target_arch = "aarch64")]
-use neon::{accumulate, round_scramble};
+#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))]
+use scalar as vector_impl;
 
+#[cfg(all(target_arch = "aarch64", feature = "simd"))]
 mod neon {
     use core::arch::aarch64::*;
 
@@ -485,6 +537,11 @@ mod neon {
     // }
 }
 
+#[cfg(all(target_arch = "aarch64", feature = "simd"))]
+use neon as vector_impl;
+
+use vector_impl::{accumulate, round_scramble};
+
 #[inline]
 fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
     // Accumulation steps are run for the stripes in the last block,
@@ -527,52 +584,6 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset
     avalanche(result)
 }
 
-#[inline]
-#[cfg(not(target_arch = "aarch64"))]
-fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-    for i in 0..8 {
-        // TODO: Should these casts / reads happen outside this function?
-        let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-        let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
-
-        let value = stripe ^ secret;
-        acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-        acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
-    }
-}
-
-#[inline]
-#[cfg(not(target_arch = "aarch64"))]
-fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-    let lhs = (lhs as u32).into_u64();
-    let rhs = (rhs as u32).into_u64();
-
-    let product = lhs.wrapping_mul(rhs);
-    acc.wrapping_add(product)
-}
-
-#[inline]
-// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
-// https://github.com/llvm/llvm-project/issues/98481
-#[cfg(target_arch = "aarch64")]
-fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-    use core::arch::asm;
-
-    let res;
-
-    unsafe {
-        asm!(
-            "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
-            lhs = in(reg) lhs,
-            rhs = in(reg) rhs,
-            acc = in(reg) acc,
-            res = out(reg) res,
-        )
-    }
-
-    res
-}
-
 #[inline]
 fn avalanche(mut x: u64) -> u64 {
     x ^= x >> 37;
@@ -628,7 +639,10 @@ impl Halves for u128 {
 
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
+
+    #[cfg(all(target_arch = "aarch64", feature = "simd"))]
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]);
+
     fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
 }
 
@@ -641,6 +655,7 @@ impl<T> SliceBackport<T> for [T] {
         (head, tail)
     }
 
+    #[cfg(all(target_arch = "aarch64", feature = "simd"))]
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
         assert_ne!(N, 0);
         let len = self.len() / N;

From de5b5d7da07d46eb6f3132b762bb388169316beb Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 22 Jul 2024 11:12:09 -0400
Subject: [PATCH 074/166] bench simd on off

---
 compare/Cargo.toml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index 44ef771ee..084a5b2b3 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -3,6 +3,11 @@ name = "compare"
 version = "0.1.0"
 edition = "2021"
 
+[features]
+default = ["simd"]
+
+simd = ["xx-renu/simd"]
+
 [[bench]]
 name = "benchmark"
 harness = false
@@ -12,5 +17,5 @@ criterion = "0.5.1"
 proptest = "1.5.0"
 rand = "0.8.5"
 twox-hash = "1.6.3"
-xx-renu = { path = ".." }
+xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64"] }
 xx_hash-sys = { path = "../xx_hash-sys" }

From 50da6239b7a56113d67c446b42688a62b707ab0d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 22 Jul 2024 11:16:17 -0400
Subject: [PATCH 075/166] Simplify the control flow

---
 src/xxhash3_64.rs | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 02c6db296..e955f8c5f 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -36,8 +36,6 @@ pub const SECRET_MINIMUM_LENGTH: usize = 136;
 
 pub struct XxHash3_64;
 
-type Stripe = [u64; 8];
-
 impl XxHash3_64 {
     #[inline(never)]
     pub fn oneshot(input: &[u8]) -> u64 {
@@ -274,11 +272,20 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     let mut acc = INITIAL_ACCUMULATORS;
 
+    assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
+    assert!(input.len() >= 241);
+
     let stripes_per_block = (secret.len() - 64) / 8;
     let block_size = 64 * stripes_per_block;
 
-    let mut blocks = input.chunks(block_size).fuse();
-    let last_block = blocks.next_back().unwrap();
+    let mut blocks = input.chunks_exact(block_size);
+    let last_block =
+        if blocks.remainder().is_empty() {
+            unsafe { blocks.next_back().unwrap_unchecked() }
+        } else {
+            blocks.remainder()
+        };
+
     let last_stripe: &[u8; 64] = unsafe {
         &*input
             .as_ptr()
@@ -288,7 +295,9 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     };
 
     for block in blocks {
-        round(&mut acc, block, secret);
+        let (stripes, _) = block.bp_as_chunks();
+
+        round(&mut acc, stripes, secret);
     }
 
     last_round(&mut acc, last_block, last_stripe, secret);
@@ -302,14 +311,13 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
 }
 
 #[inline]
-fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
-    round_accumulate(acc, block, secret);
+fn round(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
+    round_accumulate(acc, stripes, secret);
     round_scramble(acc, secret);
 }
 
 #[inline]
-fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) {
-    let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<Stripe>() }>();
+fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
     let secrets =
         (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 

From 3408fa7694b94c93f0ff04ffdcd75b02b2e8b953 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 22 Jul 2024 11:33:33 -0400
Subject: [PATCH 076/166] cleanup

---
 src/xxhash3_64.rs | 36 +++++++++++-------------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index e955f8c5f..96e59ee3e 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -244,24 +244,6 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) ->
     mul_result.lower_half() ^ mul_result.upper_half()
 }
 
-// fn mix_two_chunks(
-//     acc: &mut [u64; 2],
-//     data1: &[u8; 16],
-//     data2: &[u8; 16],
-//     secret: &[u8],
-//     secret_offset: usize,
-//     seed: u64,
-// ) {
-//     // TODO: Should these casts / reads happen outside this function?
-//     let data_words1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
-//     let data_words2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion
-
-//     acc[0] = acc[0] + mix_step(data1, secret, secret_offset, seed);
-//     acc[1] = acc[1] + mix_step(data2, secret, secret_offset + 16, seed);
-//     acc[0] = acc[0] ^ data_words2[0].wrapping_add(data_words2[1]);
-//     acc[1] = acc[1] ^ data_words1[0].wrapping_add(data_words1[1]);
-// }
-
 #[rustfmt::skip]
 const INITIAL_ACCUMULATORS: [u64; 8] = [
     PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
@@ -281,6 +263,10 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     let mut blocks = input.chunks_exact(block_size);
     let last_block =
         if blocks.remainder().is_empty() {
+            // SAFETY: We know that `input` is non-empty, which means
+            // that either there will be a remainder or one or more
+            // full blocks. That info isn't flowing to the optimizer,
+            // so we use `unwrap_unchecked`.
             unsafe { blocks.next_back().unwrap_unchecked() }
         } else {
             blocks.remainder()
@@ -465,9 +451,8 @@ mod neon {
                 let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
                 let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
 
-                // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
-                core::arch::asm!("/* {x:v} */", x = in(vreg) sum_0);
-                core::arch::asm!("/* {x:v} */", x = in(vreg) sum_1);
+                reordering_barrier(sum_0);
+                reordering_barrier(sum_1);
 
                 // acc[i] += sum[i]
                 accv_0 = vaddq_u64(accv_0, sum_0);
@@ -539,10 +524,11 @@ mod neon {
         }
     }
 
-    // unsafe {
-    //   _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast());
-    //   _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast());
-    // }
+    // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
+    #[inline]
+    fn reordering_barrier(r: uint64x2_t) {
+        unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
+    }
 }
 
 #[cfg(all(target_arch = "aarch64", feature = "simd"))]

From 087edbf6f86ace2fe06709a20b858c0f5e4612d1 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 22 Jul 2024 16:43:27 -0400
Subject: [PATCH 077/166] stub out x64 simd

---
 src/xxhash3_64.rs | 64 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 96e59ee3e..d3c6067ab 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -312,11 +312,14 @@ fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
     }
 }
 
-#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))]
+#[cfg(all(
+    not(all(target_feature = "neon", feature = "simd")),
+    not(all(target_feature = "avx2", feature = "simd")),
+))]
 mod scalar {
     use core::mem;
 
-    use super::{SliceBackport as _, PRIME32_1};
+    use super::{SliceBackport as _, PRIME32_1, IntoU64};
 
     #[inline]
     pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
@@ -379,10 +382,13 @@ mod scalar {
     }
 }
 
-#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))]
+#[cfg(all(
+    not(all(target_feature = "neon", feature = "simd")),
+    not(all(target_feature = "avx2", feature = "simd")),
+))]
 use scalar as vector_impl;
 
-#[cfg(all(target_arch = "aarch64", feature = "simd"))]
+#[cfg(all(target_feature = "neon", feature = "simd"))]
 mod neon {
     use core::arch::aarch64::*;
 
@@ -531,9 +537,57 @@ mod neon {
     }
 }
 
-#[cfg(all(target_arch = "aarch64", feature = "simd"))]
+#[cfg(all(target_feature = "neon", feature = "simd"))]
 use neon as vector_impl;
 
+#[cfg(all(target_feature = "avx2", feature = "simd"))]
+mod avx2 {
+    use core::mem;
+
+    use super::{SliceBackport as _, PRIME32_1, IntoU64};
+
+    #[inline]
+    pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
+        let last = secret
+            .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
+            .unwrap();
+        let (last, _) = last.bp_as_chunks();
+        let last = last.iter().copied().map(u64::from_ne_bytes);
+
+        for (acc, secret) in acc.iter_mut().zip(last) {
+            *acc ^= *acc >> 47;
+            *acc ^= secret;
+            *acc = acc.wrapping_mul(PRIME32_1);
+        }
+    }
+
+    #[inline]
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        for i in 0..8 {
+            // TODO: Should these casts / reads happen outside this function?
+            let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+            let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+
+            let value = stripe ^ secret;
+            acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+
+            acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+        }
+    }
+
+    #[inline]
+    fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+        let lhs = (lhs as u32).into_u64();
+        let rhs = (rhs as u32).into_u64();
+
+        let product = lhs.wrapping_mul(rhs);
+        acc.wrapping_add(product)
+    }
+}
+
+#[cfg(all(target_feature = "avx2", feature = "simd"))]
+use avx2 as vector_impl;
+
 use vector_impl::{accumulate, round_scramble};
 
 #[inline]

From 51ded36f0b0d1edd9c7fb20a00e1bcb97e47a1b1 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 23 Jul 2024 15:10:33 -0400
Subject: [PATCH 078/166] hack in one simd

---
 src/xxhash3_64.rs | 48 +++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index d3c6067ab..5e2b319d0 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -542,7 +542,7 @@ use neon as vector_impl;
 
 #[cfg(all(target_feature = "avx2", feature = "simd"))]
 mod avx2 {
-    use core::mem;
+    use core::{mem, arch::x86_64::*};
 
     use super::{SliceBackport as _, PRIME32_1, IntoU64};
 
@@ -551,6 +551,7 @@ mod avx2 {
         let last = secret
             .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
             .unwrap();
+        
         let (last, _) = last.bp_as_chunks();
         let last = last.iter().copied().map(u64::from_ne_bytes);
 
@@ -563,25 +564,40 @@ mod avx2 {
 
     #[inline]
     pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        for i in 0..8 {
-            // TODO: Should these casts / reads happen outside this function?
-            let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-            let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+        for i in 0..2 {
+            unsafe {
+                // todo: align the accumulator and avoid the unaligned load and store
+                let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast());
+                let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::<u64>().add(4 * i).cast());
+                let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::<u64>().add(4 * i).cast());
 
-            let value = stripe ^ secret;
-            acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+                // let value[i] = stripe[i] ^ secret[i];
+                let value_0 = _mm256_xor_si256(stripe_0, secret_0);
 
-            acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
-        }
-    }
+                // TODO: "rotate" is not quite correct
+                // stripe_rot[i] = stripe[i ^ 1]
+                let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0);
 
-    #[inline]
-    fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-        let lhs = (lhs as u32).into_u64();
-        let rhs = (rhs as u32).into_u64();
+                // acc[i] += stripe_rot[i]
+                acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0);
 
-        let product = lhs.wrapping_mul(rhs);
-        acc.wrapping_add(product)
+                // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i])
+                let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0);
+
+                // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
+                let product_0 = _mm256_mul_epu32(value_0, value_swap_0);
+
+                // eprintln!();
+                // eprintln!("{value_0:016x?}");
+                // eprintln!("{value_swap_0:016x?}");
+                // eprintln!("{product_0:016x?}");
+
+                // acc[i] += product[i]
+                acc_0 = _mm256_add_epi64(acc_0, product_0);
+
+                _mm256_storeu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast(), acc_0);
+            }
+        }
     }
 }
 

From 2c7b465af345d046312c7c62ba78ec266d1f6d75 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 23 Jul 2024 15:10:33 -0400
Subject: [PATCH 079/166] simd cleanup

---
 compare/benches/benchmark.rs |  5 +++++
 src/xxhash3_64.rs            | 35 ++++++-----------------------------
 2 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 18a9415c6..ba2318b68 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -187,6 +187,11 @@ mod xxhash3_64 {
                 b.iter(|| c::ScalarXxHash3_64::oneshot_with_seed(seed, data))
             });
 
+            let id = format!("impl-c-avx2/size-{size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| c::Avx2XxHash3_64::oneshot_with_seed(seed, data))
+            });
+
             let id = format!("impl-rust/size-{size:07}");
             g.bench_function(id, |b| {
                 b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data))
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 5e2b319d0..6820e4777 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -224,7 +224,6 @@ fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 
 #[inline]
 fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 {
-    // TODO: Should these casts / reads happen outside this function?
     let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
     let secret_words = unsafe {
         secret
@@ -312,10 +311,8 @@ fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
     }
 }
 
-#[cfg(all(
-    not(all(target_feature = "neon", feature = "simd")),
-    not(all(target_feature = "avx2", feature = "simd")),
-))]
+// This module is not `cfg`-gated because it is used by some of the
+// SIMD implementations.
 mod scalar {
     use core::mem;
 
@@ -337,9 +334,9 @@ mod scalar {
     }
 
     #[inline]
+    #[allow(dead_code)]
     pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
         for i in 0..8 {
-            // TODO: Should these casts / reads happen outside this function?
             let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
             let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
 
@@ -542,25 +539,10 @@ use neon as vector_impl;
 
 #[cfg(all(target_feature = "avx2", feature = "simd"))]
 mod avx2 {
-    use core::{mem, arch::x86_64::*};
+    use core::arch::x86_64::*;
 
-    use super::{SliceBackport as _, PRIME32_1, IntoU64};
-
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-        let last = secret
-            .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
-            .unwrap();
-        
-        let (last, _) = last.bp_as_chunks();
-        let last = last.iter().copied().map(u64::from_ne_bytes);
-
-        for (acc, secret) in acc.iter_mut().zip(last) {
-            *acc ^= *acc >> 47;
-            *acc ^= secret;
-            *acc = acc.wrapping_mul(PRIME32_1);
-        }
-    }
+    // The scalar implementation is autovectorized nicely enough
+    pub use super::scalar::round_scramble;
 
     #[inline]
     pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
@@ -587,11 +569,6 @@ mod avx2 {
                 // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
                 let product_0 = _mm256_mul_epu32(value_0, value_swap_0);
 
-                // eprintln!();
-                // eprintln!("{value_0:016x?}");
-                // eprintln!("{value_swap_0:016x?}");
-                // eprintln!("{product_0:016x?}");
-
                 // acc[i] += product[i]
                 acc_0 = _mm256_add_epi64(acc_0, product_0);
 

From be7325c8d47b1a6bd6c70c7ab9b7a6d24c72aa44 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 23 Jul 2024 15:10:33 -0400
Subject: [PATCH 080/166] use cc for builds and a forced avx2 variant

---
 xx_hash-sys/Cargo.toml |  3 ++
 xx_hash-sys/build.rs   | 78 ++++++++++++++----------------------------
 xx_hash-sys/src/lib.rs | 76 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 52 deletions(-)

diff --git a/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml
index d385daf66..dd96308c0 100644
--- a/xx_hash-sys/Cargo.toml
+++ b/xx_hash-sys/Cargo.toml
@@ -6,3 +6,6 @@ publish = false
 
 [dependencies]
 libc = { version = "0.2.155", default-features = false }
+
+[build-dependencies]
+cc = { version = "1.1.6", default-features = false }
diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
index 417dedd70..2edfce466 100644
--- a/xx_hash-sys/build.rs
+++ b/xx_hash-sys/build.rs
@@ -1,58 +1,32 @@
-use std::{env, fs, path::PathBuf, process::Command};
+use std::{env, path::PathBuf};
 
 fn main() {
-    let base = env::var_os("CARGO_MANIFEST_DIR").unwrap();
-    let base: PathBuf = base.into();
-    let xxhash = base.join("xxHash");
-
-    let out = env::var("OUT_DIR").expect("no OUT_DIR");
-    let mut out = PathBuf::from(out);
-    out.push("xxhash");
-    fs::create_dir_all(&out).expect("make it");
+    // TODO: CARGO_CFG_TARGET_FEATURE has `Some(adx,aes,avx,avx2,...`
 
-    let make_cmd = || {
-        let mut c = Command::new("make");
-        c.current_dir(&xxhash);
-        c
+    let base = env::var_os("CARGO_MANIFEST_DIR").unwrap();
+    let mut base: PathBuf = base.into();
+    base.push("xxHash");
+    base.push("xxhash.c");
+
+    let build = {
+        let mut build = cc::Build::new();
+        build.file(base);
+        build
     };
 
-    let s = make_cmd()
-        .arg("clean")
-        .status()
-        .expect("Could not run clean for scalar build");
-    assert!(s.success(), "Scalar clean failed");
-
-    let s = make_cmd()
-        .arg("libxxhash.a")
-        .env(
-            "CFLAGS",
-            "-O3 -DXXH_VECTOR=XXH_SCALAR -DXXH_NAMESPACE=scalar_",
-        )
-        .status()
-        .expect("Could not run scalar build");
-    assert!(s.success(), "Scalar build failed");
-
-    let name = xxhash.join("libxxhash.a");
-    let new = out.join("libxxhash_scalar.a");
-    fs::copy(name, new).expect("Copy scalar");
-
-    let s = make_cmd()
-        .arg("clean")
-        .status()
-        .expect("Could not run clean for optimized build");
-    assert!(s.success(), "Optimized clean failed");
-
-    let s = make_cmd()
-        .arg("libxxhash.a")
-        .status()
-        .expect("Could not run optimized build");
-    assert!(s.success(), "Optimized build failed");
-
-    let name = xxhash.join("libxxhash.a");
-    let new = out.join("libxxhash_optimized.a");
-    fs::copy(name, new).expect("Copy scalar");
-
-    println!("cargo::rustc-link-lib=static=xxhash_scalar");
-    println!("cargo::rustc-link-lib=static=xxhash_optimized");
-    println!("cargo::rustc-link-search={}", out.display());
+    let mut scalar_build = build.clone();
+    scalar_build
+        .define("XXH_VECTOR", "XXH_SCALAR")
+        .define("XXH_NAMESPACE", "scalar_")
+        .compile("xxhash_scalar");
+
+    let mut avx2_build = build.clone();
+    avx2_build
+        .flag("-march=x86-64-v3")
+        .define("XXH_VECTOR", "XXH_AVX2")
+        .define("XXH_NAMESPACE", "avx2_")
+        .compile("xxhash_avx2");
+
+    let native_build = build;
+    native_build.compile("xxhash_native");
 }
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index a8ae42811..8f10e9f6a 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -276,3 +276,79 @@ impl Drop for ScalarXxHash3_64 {
         assert_eq!(retval, XXH_OK);
     }
 }
+
+// ----------
+
+extern "C" {
+    fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+    fn avx2_XXH3_64bits_withSeed(
+        input: *const libc::c_void,
+        length: libc::size_t,
+        seed: XXH64_hash_t,
+    ) -> XXH64_hash_t;
+    fn avx2_XXH3_64bits_withSecret(
+        input: *const libc::c_void,
+        length: libc::size_t,
+        secret: *const libc::c_void,
+        secret_length: libc::size_t,
+    ) -> XXH64_hash_t;
+
+    fn avx2_XXH3_createState() -> *mut XXH3_state_t;
+    fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+    fn avx2_XXH3_64bits_update(
+        state: *mut XXH3_state_t,
+        buffer: *const libc::c_void,
+        length: libc::size_t,
+    ) -> XXH_errorcode;
+    fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+    fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
+}
+
+pub struct Avx2XxHash3_64(*mut XXH3_state_t);
+
+impl Avx2XxHash3_64 {
+    pub fn oneshot(data: &[u8]) -> u64 {
+        unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) }
+    }
+
+    pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+        unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+    }
+
+    pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+        unsafe {
+            avx2_XXH3_64bits_withSecret(
+                data.as_ptr().cast(),
+                data.len(),
+                secret.as_ptr().cast(),
+                secret.len(),
+            )
+        }
+    }
+
+    pub fn with_seed() -> Self {
+        let state = unsafe {
+            let state = avx2_XXH3_createState();
+            avx2_XXH3_64bits_reset(state);
+            state
+        };
+
+        Self(state)
+    }
+
+    pub fn write(&mut self, data: &[u8]) {
+        let retval = unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+        assert_eq!(retval, XXH_OK);
+    }
+
+    pub fn finish(&mut self) -> u64 {
+        unsafe { avx2_XXH3_64bits_digest(self.0) }
+    }
+}
+
+impl Drop for Avx2XxHash3_64 {
+    fn drop(&mut self) {
+        let retval = unsafe { avx2_XXH3_freeState(self.0) };
+        assert_eq!(retval, XXH_OK);
+    }
+}

From f7ec3bc5f92c7620cabe8927417041f6d38b0dd4 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 23 Jul 2024 15:10:33 -0400
Subject: [PATCH 081/166] better choosin

---
 src/xxhash3_64.rs | 631 +++++++++++++++++++++++++---------------------
 1 file changed, 343 insertions(+), 288 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 6820e4777..992ca7d40 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -251,98 +251,158 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
-    let mut acc = INITIAL_ACCUMULATORS;
-
-    assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
-    assert!(input.len() >= 241);
-
-    let stripes_per_block = (secret.len() - 64) / 8;
-    let block_size = 64 * stripes_per_block;
-
-    let mut blocks = input.chunks_exact(block_size);
-    let last_block =
-        if blocks.remainder().is_empty() {
-            // SAFETY: We know that `input` is non-empty, which means
-            // that either there will be a remainder or one or more
-            // full blocks. That info isn't flowing to the optimizer,
-            // so we use `unwrap_unchecked`.
-            unsafe { blocks.next_back().unwrap_unchecked() }
-        } else {
-            blocks.remainder()
+    unsafe { avx2::oneshot_unchecked(secret, input) }
+}
+
+struct Algorithm<V>(V);
+
+impl<V: Vector> Algorithm<V> {
+    fn do_it(&self, secret: &[u8], input: &[u8]) -> u64 {
+        let mut acc = INITIAL_ACCUMULATORS;
+
+        assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
+        assert!(input.len() >= 241);
+
+        let stripes_per_block = (secret.len() - 64) / 8;
+        let block_size = 64 * stripes_per_block;
+
+        let mut blocks = input.chunks_exact(block_size);
+        let last_block =
+            if blocks.remainder().is_empty() {
+                // SAFETY: We know that `input` is non-empty, which means
+                // that either there will be a remainder or one or more
+                // full blocks. That info isn't flowing to the optimizer,
+                // so we use `unwrap_unchecked`.
+                unsafe { blocks.next_back().unwrap_unchecked() }
+            } else {
+                blocks.remainder()
+            };
+
+        let last_stripe: &[u8; 64] = unsafe {
+            &*input
+                .as_ptr()
+                .add(input.len())
+                .sub(mem::size_of::<[u8; 64]>())
+                .cast()
         };
 
-    let last_stripe: &[u8; 64] = unsafe {
-        &*input
-            .as_ptr()
-            .add(input.len())
-            .sub(mem::size_of::<[u8; 64]>())
-            .cast()
-    };
+        for block in blocks {
+            let (stripes, _) = block.bp_as_chunks();
 
-    for block in blocks {
-        let (stripes, _) = block.bp_as_chunks();
+            self.round(&mut acc, stripes, secret);
+        }
+
+        self.last_round(&mut acc, last_block, last_stripe, secret);
 
-        round(&mut acc, stripes, secret);
+        self.final_merge(
+            &mut acc,
+            input.len().into_u64().wrapping_mul(PRIME64_1),
+            secret,
+            11,
+        )
     }
 
-    last_round(&mut acc, last_block, last_stripe, secret);
+    #[inline]
+    fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
+        self.round_accumulate(acc, stripes, secret);
+        self.0.round_scramble(acc, secret);
+    }
 
-    final_merge(
-        &mut acc,
-        input.len().into_u64().wrapping_mul(PRIME64_1),
-        secret,
-        11,
-    )
-}
+    #[inline]
+    fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
+        let secrets =
+            (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
-#[inline]
-fn round(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
-    round_accumulate(acc, stripes, secret);
-    round_scramble(acc, secret);
-}
+        for (stripe, secret) in stripes.iter().zip(secrets) {
+            self.0.accumulate(acc, stripe, secret);
+        }
+    }
 
-#[inline]
-fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
-    let secrets =
-        (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
-    for (stripe, secret) in stripes.iter().zip(secrets) {
-        accumulate(acc, stripe, secret);
+    #[inline]
+    fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
+        // Accumulation steps are run for the stripes in the last block,
+        // except for the last stripe (whether it is full or not)
+        let stripes = match block.bp_as_chunks() {
+            ([stripes @ .., _last], []) => stripes,
+            (stripes, _last) => stripes,
+        };
+        let secrets =
+            (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
+
+        for (stripe, secret) in stripes.iter().zip(secrets) {
+            self.0.accumulate(acc, stripe, secret);
+        }
+
+        let q = &secret[secret.len() - 71..];
+        let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() };
+        self.0.accumulate(acc, last_stripe, q);
+    }
+
+    #[inline]
+    fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
+        let secret_words = unsafe {
+            secret
+                .as_ptr()
+                .add(secret_offset)
+                .cast::<[u64; 8]>()
+                .read_unaligned()
+        };
+        let mut result = init_value;
+        for i in 0..4 {
+            // 64-bit by 64-bit multiplication to 128-bit full result
+            let mul_result = {
+                let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128();
+                let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
+                a.wrapping_mul(b)
+            };
+            result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
+        }
+        avalanche(result)
     }
 }
 
+trait Vector {
+    fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]);
+
+    fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]);
+}
+
 // This module is not `cfg`-gated because it is used by some of the
 // SIMD implementations.
 mod scalar {
     use core::mem;
 
-    use super::{SliceBackport as _, PRIME32_1, IntoU64};
+    use super::{IntoU64, SliceBackport as _, Vector, PRIME32_1};
 
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-        let last = secret
-            .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
-            .unwrap();
-        let (last, _) = last.bp_as_chunks();
-        let last = last.iter().copied().map(u64::from_ne_bytes);
-
-        for (acc, secret) in acc.iter_mut().zip(last) {
-            *acc ^= *acc >> 47;
-            *acc ^= secret;
-            *acc = acc.wrapping_mul(PRIME32_1);
+    pub struct Impl;
+
+    impl Vector for Impl {
+        #[inline]
+        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
+            let last = secret
+                .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
+                .unwrap();
+            let (last, _) = last.bp_as_chunks();
+            let last = last.iter().copied().map(u64::from_ne_bytes);
+
+            for (acc, secret) in acc.iter_mut().zip(last) {
+                *acc ^= *acc >> 47;
+                *acc ^= secret;
+                *acc = acc.wrapping_mul(PRIME32_1);
+            }
         }
-    }
 
-    #[inline]
-    #[allow(dead_code)]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        for i in 0..8 {
-            let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-            let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
-
-            let value = stripe ^ secret;
-            acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-            acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+        #[inline]
+        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+            for i in 0..8 {
+                let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
+                let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+
+                let value = stripe ^ secret;
+                acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+                acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+            }
         }
     }
 
@@ -379,251 +439,246 @@ mod scalar {
     }
 }
 
-#[cfg(all(
-    not(all(target_feature = "neon", feature = "simd")),
-    not(all(target_feature = "avx2", feature = "simd")),
-))]
-use scalar as vector_impl;
-
-#[cfg(all(target_feature = "neon", feature = "simd"))]
-mod neon {
-    use core::arch::aarch64::*;
-
-    use super::{SliceBackport as _, PRIME32_1};
-
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-        unsafe {
-            let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
-            let (acc, _) = acc.bp_as_chunks_mut::<2>();
-            for (i, acc) in acc.iter_mut().enumerate() {
-                let mut accv = vld1q_u64(acc.as_ptr());
-                let secret = vld1q_u64(secret_base.add(i * 2));
+// #[cfg(all(
+//     not(all(target_feature = "neon", feature = "simd")),
+//     not(all(target_feature = "avx2", feature = "simd")),
+// ))]
+// use scalar as vector_impl;
+
+// #[cfg(all(target_feature = "neon", feature = "simd"))]
+// mod neon {
+//     use core::arch::aarch64::*;
+
+//     use super::{SliceBackport as _, PRIME32_1};
+
+//     #[inline]
+//     pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
+//         unsafe {
+//             let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
+//             let (acc, _) = acc.bp_as_chunks_mut::<2>();
+//             for (i, acc) in acc.iter_mut().enumerate() {
+//                 let mut accv = vld1q_u64(acc.as_ptr());
+//                 let secret = vld1q_u64(secret_base.add(i * 2));
+
+//                 // tmp[i] = acc[i] >> 47
+//                 let shifted = vshrq_n_u64::<47>(accv);
+
+//                 // acc[i] ^= tmp[i]
+//                 accv = veorq_u64(accv, shifted);
+
+//                 // acc[i] ^= secret[i]
+//                 accv = veorq_u64(accv, secret);
+
+//                 // acc[i] *= PRIME32_1
+//                 accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+
+//                 vst1q_u64(acc.as_mut_ptr(), accv);
+//             }
+//         }
+//     }
+
+//     // We process 4x u64 at a time as that allows us to completely
+//     // fill a `uint64x2_t` with useful values when performing the
+//     // multiplication.
+//     #[inline]
+//     pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+//         let (acc2, _) = acc.bp_as_chunks_mut::<4>();
+//         for (i, acc) in acc2.into_iter().enumerate() {
+//             unsafe {
+//                 let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
+//                 let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
+//                 let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
+//                 let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
+//                 let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
+//                 let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
+
+//                 // stripe_rot[i ^ 1] = stripe[i];
+//                 let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+//                 let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+
+//                 // value[i] = stripe[i] ^ secret[i];
+//                 let value_0 = veorq_u64(stripe_0, secret_0);
+//                 let value_1 = veorq_u64(stripe_1, secret_1);
+
+//                 // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
+//                 //
+//                 // Each vector has 64-bit values, but we treat them as
+//                 // 32-bit and then unzip them. This naturally splits
+//                 // the upper and lower 32 bits.
+//                 let parts_0 = vreinterpretq_u32_u64(value_0);
+//                 let parts_1 = vreinterpretq_u32_u64(value_1);
+
+//                 let hi = vuzp1q_u32(parts_0, parts_1);
+//                 let lo = vuzp2q_u32(parts_0, parts_1);
+
+//                 let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
+//                 let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
+
+//                 reordering_barrier(sum_0);
+//                 reordering_barrier(sum_1);
+
+//                 // acc[i] += sum[i]
+//                 accv_0 = vaddq_u64(accv_0, sum_0);
+//                 accv_1 = vaddq_u64(accv_1, sum_1);
+
+//                 vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
+//                 vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
+//             };
+//         }
+//     }
+
+//     // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
+//     // lower 64 bits of the result) operation, so we have to make our
+//     // own out of 32-bit operations . We can simplify by realizing
+//     // that we are always multiplying by a 32-bit number.
+//     //
+//     // The basic algorithm is traditional long multiplication. `[]`
+//     // denotes groups of 32 bits.
+//     //
+//     //         [AAAA][BBBB]
+//     // x             [CCCC]
+//     // --------------------
+//     //         [BCBC][BCBC]
+//     // + [ACAC][ACAC]
+//     // --------------------
+//     //         [ACBC][BCBC] // 64-bit truncation occurs
+//     //
+//     // This can be written in NEON as a vectorwise wrapping
+//     // multiplication of the high-order chunk of the input (`A`)
+//     // against the constant and then a multiply-widen-and-accumulate
+//     // of the low-order chunk of the input and the constant:
+//     //
+//     // 1. High-order, vectorwise
+//     //
+//     //         [AAAA][BBBB]
+//     // x       [CCCC][0000]
+//     // --------------------
+//     //         [ACAC][0000]
+//     //
+//     // 2. Low-order, widening
+//     //
+//     //               [BBBB]
+//     // x             [CCCC] // widening
+//     // --------------------
+//     //         [BCBC][BCBC]
+//     //
+//     // 3. Accumulation
+//     //
+//     //         [ACAC][0000]
+//     // +       [BCBC][BCBC] // vectorwise
+//     // --------------------
+//     //         [ACBC][BCBC]
+//     //
+//     // Thankfully, NEON has a single multiply-widen-and-accumulate
+//     // operation.
+//     #[inline]
+//     pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
+//         unsafe {
+//             let input_as_u32 = vreinterpretq_u32_u64(input);
+//             let factor = vmov_n_u32(og_factor);
+//             let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
+//             let factor_striped = vreinterpretq_u32_u64(factor_striped);
+
+//             let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
+//             let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
+
+//             let input_lo = vmovn_u64(input);
+//             vmlal_u32(high_shifted, input_lo, factor)
+//         }
+//     }
+
+//     // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
+//     #[inline]
+//     fn reordering_barrier(r: uint64x2_t) {
+//         unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
+//     }
+// }
+
+// #[cfg(all(target_feature = "neon", feature = "simd"))]
+// use neon as vector_impl;
+
+#[cfg(all(target_arch = "x86_64", feature = "simd"))]
+mod avx2 {
+    use core::arch::x86_64::*;
 
-                // tmp[i] = acc[i] >> 47
-                let shifted = vshrq_n_u64::<47>(accv);
+    use super::Vector;
 
-                // acc[i] ^= tmp[i]
-                accv = veorq_u64(accv, shifted);
+    #[cfg(target_feature = "avx2")]
+    pub unsafe fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
+        unsafe { oneshot_unchecked(secret, input) }
+    }
 
-                // acc[i] ^= secret[i]
-                accv = veorq_u64(accv, secret);
+    #[target_feature(enable = "avx2")]
+    pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
+        unsafe { super::Algorithm(Impl::new_unchecked()) }.do_it(secret, input)
+    }
 
-                // acc[i] *= PRIME32_1
-                accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+    pub struct Impl(super::scalar::Impl);
 
-                vst1q_u64(acc.as_mut_ptr(), accv);
-            }
+    impl Impl {
+        #[cfg(target_feature = "avx2")]
+        pub fn new() -> Self {
+            unsafe { Self::new_unchecked() }
         }
-    }
 
-    // We process 4x u64 at a time as that allows us to completely
-    // fill a `uint64x2_t` with useful values when performing the
-    // multiplication.
-    #[inline]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        let (acc2, _) = acc.bp_as_chunks_mut::<4>();
-        for (i, acc) in acc2.into_iter().enumerate() {
-            unsafe {
-                let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
-                let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
-                let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
-                let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
-                let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
-                let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
-
-                // stripe_rot[i ^ 1] = stripe[i];
-                let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
-                let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
-
-                // value[i] = stripe[i] ^ secret[i];
-                let value_0 = veorq_u64(stripe_0, secret_0);
-                let value_1 = veorq_u64(stripe_1, secret_1);
-
-                // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
-                //
-                // Each vector has 64-bit values, but we treat them as
-                // 32-bit and then unzip them. This naturally splits
-                // the upper and lower 32 bits.
-                let parts_0 = vreinterpretq_u32_u64(value_0);
-                let parts_1 = vreinterpretq_u32_u64(value_1);
-
-                let hi = vuzp1q_u32(parts_0, parts_1);
-                let lo = vuzp2q_u32(parts_0, parts_1);
-
-                let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
-                let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
-
-                reordering_barrier(sum_0);
-                reordering_barrier(sum_1);
-
-                // acc[i] += sum[i]
-                accv_0 = vaddq_u64(accv_0, sum_0);
-                accv_1 = vaddq_u64(accv_1, sum_1);
-
-                vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
-                vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
-            };
+        /// # Safety
+        /// You must ensure that the CPU has the AVX2 feature
+        pub unsafe fn new_unchecked() -> Impl {
+            Impl(super::scalar::Impl)
         }
     }
 
-    // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
-    // lower 64 bits of the result) operation, so we have to make our
-    // own out of 32-bit operations . We can simplify by realizing
-    // that we are always multiplying by a 32-bit number.
-    //
-    // The basic algorithm is traditional long multiplication. `[]`
-    // denotes groups of 32 bits.
-    //
-    //         [AAAA][BBBB]
-    // x             [CCCC]
-    // --------------------
-    //         [BCBC][BCBC]
-    // + [ACAC][ACAC]
-    // --------------------
-    //         [ACBC][BCBC] // 64-bit truncation occurs
-    //
-    // This can be written in NEON as a vectorwise wrapping
-    // multiplication of the high-order chunk of the input (`A`)
-    // against the constant and then a multiply-widen-and-accumulate
-    // of the low-order chunk of the input and the constant:
-    //
-    // 1. High-order, vectorwise
-    //
-    //         [AAAA][BBBB]
-    // x       [CCCC][0000]
-    // --------------------
-    //         [ACAC][0000]
-    //
-    // 2. Low-order, widening
-    //
-    //               [BBBB]
-    // x             [CCCC] // widening
-    // --------------------
-    //         [BCBC][BCBC]
-    //
-    // 3. Accumulation
-    //
-    //         [ACAC][0000]
-    // +       [BCBC][BCBC] // vectorwise
-    // --------------------
-    //         [ACBC][BCBC]
-    //
-    // Thankfully, NEON has a single multiply-widen-and-accumulate
-    // operation.
-    #[inline]
-    pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
-        unsafe {
-            let input_as_u32 = vreinterpretq_u32_u64(input);
-            let factor = vmov_n_u32(og_factor);
-            let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
-            let factor_striped = vreinterpretq_u32_u64(factor_striped);
-
-            let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
-            let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
+    impl Vector for Impl {
+        #[inline]
+        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
+            // The scalar implementation is autovectorized nicely enough
+            self.0.round_scramble(acc, secret)
+        }
 
-            let input_lo = vmovn_u64(input);
-            vmlal_u32(high_shifted, input_lo, factor)
+        #[inline]
+        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+            // SAFETY: Type can only be constructed when AVX2 feature is present
+            unsafe { accumulate_avx2(acc, stripe, secret) }
         }
     }
 
-    // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
     #[inline]
-    fn reordering_barrier(r: uint64x2_t) {
-        unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
-    }
-}
-
-#[cfg(all(target_feature = "neon", feature = "simd"))]
-use neon as vector_impl;
-
-#[cfg(all(target_feature = "avx2", feature = "simd"))]
-mod avx2 {
-    use core::arch::x86_64::*;
-
-    // The scalar implementation is autovectorized nicely enough
-    pub use super::scalar::round_scramble;
-
-    #[inline]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+    #[target_feature(enable = "avx2")]
+    unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
         for i in 0..2 {
-            unsafe {
-                // todo: align the accumulator and avoid the unaligned load and store
-                let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast());
-                let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::<u64>().add(4 * i).cast());
-                let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::<u64>().add(4 * i).cast());
+            // todo: align the accumulator and avoid the unaligned load and store
+            let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast());
+            let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::<u64>().add(4 * i).cast());
+            let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::<u64>().add(4 * i).cast());
 
-                // let value[i] = stripe[i] ^ secret[i];
-                let value_0 = _mm256_xor_si256(stripe_0, secret_0);
+            // let value[i] = stripe[i] ^ secret[i];
+            let value_0 = _mm256_xor_si256(stripe_0, secret_0);
 
-                // TODO: "rotate" is not quite correct
-                // stripe_rot[i] = stripe[i ^ 1]
-                let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0);
+            // TODO: "rotate" is not quite correct
+            // stripe_rot[i] = stripe[i ^ 1]
+            let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0);
 
-                // acc[i] += stripe_rot[i]
-                acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0);
+            // acc[i] += stripe_rot[i]
+            acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0);
 
-                // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i])
-                let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0);
+            // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i])
+            let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0);
 
-                // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
-                let product_0 = _mm256_mul_epu32(value_0, value_swap_0);
+            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
+            let product_0 = _mm256_mul_epu32(value_0, value_swap_0);
 
-                // acc[i] += product[i]
-                acc_0 = _mm256_add_epi64(acc_0, product_0);
+            // acc[i] += product[i]
+            acc_0 = _mm256_add_epi64(acc_0, product_0);
 
-                _mm256_storeu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast(), acc_0);
-            }
+            _mm256_storeu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast(), acc_0);
         }
     }
 }
 
-#[cfg(all(target_feature = "avx2", feature = "simd"))]
-use avx2 as vector_impl;
-
-use vector_impl::{accumulate, round_scramble};
-
-#[inline]
-fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
-    // Accumulation steps are run for the stripes in the last block,
-    // except for the last stripe (whether it is full or not)
-    let stripes = match block.bp_as_chunks() {
-        ([stripes @ .., _last], []) => stripes,
-        (stripes, _last) => stripes,
-    };
-    let secrets =
-        (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
-
-    for (stripe, secret) in stripes.iter().zip(secrets) {
-        accumulate(acc, stripe, secret);
-    }
-
-    let q = &secret[secret.len() - 71..];
-    let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() };
-    accumulate(acc, last_stripe, q);
-}
+// #[cfg(all(target_feature = "avx2", feature = "simd"))]
+// use avx2 as vector_impl;
 
-#[inline]
-fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
-    let secret_words = unsafe {
-        secret
-            .as_ptr()
-            .add(secret_offset)
-            .cast::<[u64; 8]>()
-            .read_unaligned()
-    };
-    let mut result = init_value;
-    for i in 0..4 {
-        // 64-bit by 64-bit multiplication to 128-bit full result
-        let mul_result = {
-            let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128();
-            let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
-            a.wrapping_mul(b)
-        };
-        result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
-    }
-    avalanche(result)
-}
+// use vector_impl::{accumulate, round_scramble};
 
 #[inline]
 fn avalanche(mut x: u64) -> u64 {

From ffb2e32db8c9349018e96fe9d078cfa66d128e5a Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 23 Jul 2024 15:42:09 -0400
Subject: [PATCH 082/166] Add detect

---
 Cargo.toml         |  4 ++-
 compare/Cargo.toml |  2 +-
 src/lib.rs         |  2 +-
 src/xxhash3_64.rs  | 67 +++++++++++++++++++++++++++-------------------
 4 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6c660495d..784008721 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ members = [
 ]
 
 [features]
-default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd"]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd", "std"]
 
 random = ["dep:rand"]
 
@@ -24,6 +24,8 @@ xxhash3_64 = []
 
 simd = []
 
+std = []
+
 [dependencies]
 rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
 serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index 084a5b2b3..db575eb92 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -17,5 +17,5 @@ criterion = "0.5.1"
 proptest = "1.5.0"
 rand = "0.8.5"
 twox-hash = "1.6.3"
-xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64"] }
+xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
 xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/src/lib.rs b/src/lib.rs
index 597fb5d48..2a6b24eb7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -64,9 +64,9 @@
 //! assert_eq!(hash.get(&42), Some(&"the answer"));
 //! ```
 
-#![no_std]
 #![deny(rust_2018_idioms)]
 #![deny(missing_docs)]
+#![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
 
 #[cfg(any(doc, test))]
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 992ca7d40..01180a4df 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -251,13 +251,14 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
-    unsafe { avx2::oneshot_unchecked(secret, input) }
+//    unsafe { avx2::oneshot_unchecked(secret, input) }
+    x86_64_detect::oneshot(secret, input)
 }
 
 struct Algorithm<V>(V);
 
 impl<V: Vector> Algorithm<V> {
-    fn do_it(&self, secret: &[u8], input: &[u8]) -> u64 {
+    fn oneshot(&self, secret: &[u8], input: &[u8]) -> u64 {
         let mut acc = INITIAL_ACCUMULATORS;
 
         assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
@@ -267,16 +268,15 @@ impl<V: Vector> Algorithm<V> {
         let block_size = 64 * stripes_per_block;
 
         let mut blocks = input.chunks_exact(block_size);
-        let last_block =
-            if blocks.remainder().is_empty() {
-                // SAFETY: We know that `input` is non-empty, which means
-                // that either there will be a remainder or one or more
-                // full blocks. That info isn't flowing to the optimizer,
-                // so we use `unwrap_unchecked`.
-                unsafe { blocks.next_back().unwrap_unchecked() }
-            } else {
-                blocks.remainder()
-            };
+        let last_block = if blocks.remainder().is_empty() {
+            // SAFETY: We know that `input` is non-empty, which means
+            // that either there will be a remainder or one or more
+            // full blocks. That info isn't flowing to the optimizer,
+            // so we use `unwrap_unchecked`.
+            unsafe { blocks.next_back().unwrap_unchecked() }
+        } else {
+            blocks.remainder()
+        };
 
         let last_stripe: &[u8; 64] = unsafe {
             &*input
@@ -318,7 +318,6 @@ impl<V: Vector> Algorithm<V> {
         }
     }
 
-
     #[inline]
     fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
         // Accumulation steps are run for the stripes in the last block,
@@ -340,7 +339,13 @@ impl<V: Vector> Algorithm<V> {
     }
 
     #[inline]
-    fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 {
+    fn final_merge(
+        &self,
+        acc: &mut [u64; 8],
+        init_value: u64,
+        secret: &[u8],
+        secret_offset: usize,
+    ) -> u64 {
         let secret_words = unsafe {
             secret
                 .as_ptr()
@@ -373,6 +378,11 @@ trait Vector {
 mod scalar {
     use core::mem;
 
+    #[inline]
+    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
+        super::Algorithm(Impl).oneshot(secret, input)
+    }
+
     use super::{IntoU64, SliceBackport as _, Vector, PRIME32_1};
 
     pub struct Impl;
@@ -439,12 +449,6 @@ mod scalar {
     }
 }
 
-// #[cfg(all(
-//     not(all(target_feature = "neon", feature = "simd")),
-//     not(all(target_feature = "avx2", feature = "simd")),
-// ))]
-// use scalar as vector_impl;
-
 // #[cfg(all(target_feature = "neon", feature = "simd"))]
 // mod neon {
 //     use core::arch::aarch64::*;
@@ -594,23 +598,22 @@ mod scalar {
 //     }
 // }
 
-// #[cfg(all(target_feature = "neon", feature = "simd"))]
-// use neon as vector_impl;
-
 #[cfg(all(target_arch = "x86_64", feature = "simd"))]
 mod avx2 {
     use core::arch::x86_64::*;
 
     use super::Vector;
 
+    #[inline]
     #[cfg(target_feature = "avx2")]
-    pub unsafe fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
+    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         unsafe { oneshot_unchecked(secret, input) }
     }
 
+    #[inline]
     #[target_feature(enable = "avx2")]
     pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
-        unsafe { super::Algorithm(Impl::new_unchecked()) }.do_it(secret, input)
+        unsafe { super::Algorithm(Impl::new_unchecked()) }.oneshot(secret, input)
     }
 
     pub struct Impl(super::scalar::Impl);
@@ -675,10 +678,18 @@ mod avx2 {
     }
 }
 
-// #[cfg(all(target_feature = "avx2", feature = "simd"))]
-// use avx2 as vector_impl;
+#[cfg(all(target_arch = "x86_64", feature = "std"))]
+mod x86_64_detect {
+    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
 
-// use vector_impl::{accumulate, round_scramble};
+        #[cfg(feature = "simd")]
+        if is_x86_feature_detected!("avx2") {
+            return unsafe { super::avx2::oneshot_unchecked(secret, input) }
+        }
+
+        super::scalar::oneshot(secret, input)
+    }
+}
 
 #[inline]
 fn avalanche(mut x: u64) -> u64 {

From c00c286b2e55d43801ff05efc58704939f3bcdbb Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 23 Jul 2024 18:10:29 -0400
Subject: [PATCH 083/166] Move neon to trait impl

---
 compare/benches/benchmark.rs |  21 +-
 src/xxhash3_64.rs            | 380 ++++++++++++++++++++---------------
 xx_hash-sys/build.rs         |  48 ++++-
 xx_hash-sys/src/lib.rs       | 324 ++++++++++++++++++-----------
 4 files changed, 478 insertions(+), 295 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index ba2318b68..eddd860c4 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -184,13 +184,24 @@ mod xxhash3_64 {
 
             let id = format!("impl-c-scalar/size-{size:07}");
             g.bench_function(id, |b| {
-                b.iter(|| c::ScalarXxHash3_64::oneshot_with_seed(seed, data))
+                b.iter(|| c::scalar::XxHash3_64::oneshot_with_seed(seed, data))
             });
 
-            let id = format!("impl-c-avx2/size-{size:07}");
-            g.bench_function(id, |b| {
-                b.iter(|| c::Avx2XxHash3_64::oneshot_with_seed(seed, data))
-            });
+            #[cfg(target_arch = "aarch64")]
+            {
+                let id = format!("impl-c-neon/size-{size:07}");
+                g.bench_function(id, |b| {
+                    b.iter(|| c::neon::XxHash3_64::oneshot_with_seed(seed, data))
+                });
+            }
+
+            #[cfg(target_arch = "x86_64")]
+            {
+                let id = format!("impl-c-avx2/size-{size:07}");
+                g.bench_function(id, |b| {
+                    b.iter(|| c::avx2::XxHash3_64::oneshot_with_seed(seed, data))
+                });
+            }
 
             let id = format!("impl-rust/size-{size:07}");
             g.bench_function(id, |b| {
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 01180a4df..6b395fb6c 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -251,8 +251,7 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
-//    unsafe { avx2::oneshot_unchecked(secret, input) }
-    x86_64_detect::oneshot(secret, input)
+    detect::oneshot(secret, input)
 }
 
 struct Algorithm<V>(V);
@@ -383,7 +382,7 @@ mod scalar {
         super::Algorithm(Impl).oneshot(secret, input)
     }
 
-    use super::{IntoU64, SliceBackport as _, Vector, PRIME32_1};
+    use super::{SliceBackport as _, Vector, PRIME32_1};
 
     pub struct Impl;
 
@@ -419,6 +418,8 @@ mod scalar {
     #[inline]
     #[cfg(not(target_arch = "aarch64"))]
     fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+        use super::IntoU64;
+
         let lhs = (lhs as u32).into_u64();
         let rhs = (rhs as u32).into_u64();
 
@@ -449,181 +450,216 @@ mod scalar {
     }
 }
 
-// #[cfg(all(target_feature = "neon", feature = "simd"))]
-// mod neon {
-//     use core::arch::aarch64::*;
-
-//     use super::{SliceBackport as _, PRIME32_1};
-
-//     #[inline]
-//     pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) {
-//         unsafe {
-//             let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
-//             let (acc, _) = acc.bp_as_chunks_mut::<2>();
-//             for (i, acc) in acc.iter_mut().enumerate() {
-//                 let mut accv = vld1q_u64(acc.as_ptr());
-//                 let secret = vld1q_u64(secret_base.add(i * 2));
-
-//                 // tmp[i] = acc[i] >> 47
-//                 let shifted = vshrq_n_u64::<47>(accv);
-
-//                 // acc[i] ^= tmp[i]
-//                 accv = veorq_u64(accv, shifted);
-
-//                 // acc[i] ^= secret[i]
-//                 accv = veorq_u64(accv, secret);
-
-//                 // acc[i] *= PRIME32_1
-//                 accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
-
-//                 vst1q_u64(acc.as_mut_ptr(), accv);
-//             }
-//         }
-//     }
-
-//     // We process 4x u64 at a time as that allows us to completely
-//     // fill a `uint64x2_t` with useful values when performing the
-//     // multiplication.
-//     #[inline]
-//     pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-//         let (acc2, _) = acc.bp_as_chunks_mut::<4>();
-//         for (i, acc) in acc2.into_iter().enumerate() {
-//             unsafe {
-//                 let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
-//                 let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
-//                 let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
-//                 let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
-//                 let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
-//                 let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
-
-//                 // stripe_rot[i ^ 1] = stripe[i];
-//                 let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
-//                 let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
-
-//                 // value[i] = stripe[i] ^ secret[i];
-//                 let value_0 = veorq_u64(stripe_0, secret_0);
-//                 let value_1 = veorq_u64(stripe_1, secret_1);
-
-//                 // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
-//                 //
-//                 // Each vector has 64-bit values, but we treat them as
-//                 // 32-bit and then unzip them. This naturally splits
-//                 // the upper and lower 32 bits.
-//                 let parts_0 = vreinterpretq_u32_u64(value_0);
-//                 let parts_1 = vreinterpretq_u32_u64(value_1);
-
-//                 let hi = vuzp1q_u32(parts_0, parts_1);
-//                 let lo = vuzp2q_u32(parts_0, parts_1);
-
-//                 let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
-//                 let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
-
-//                 reordering_barrier(sum_0);
-//                 reordering_barrier(sum_1);
-
-//                 // acc[i] += sum[i]
-//                 accv_0 = vaddq_u64(accv_0, sum_0);
-//                 accv_1 = vaddq_u64(accv_1, sum_1);
-
-//                 vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
-//                 vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
-//             };
-//         }
-//     }
-
-//     // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
-//     // lower 64 bits of the result) operation, so we have to make our
-//     // own out of 32-bit operations . We can simplify by realizing
-//     // that we are always multiplying by a 32-bit number.
-//     //
-//     // The basic algorithm is traditional long multiplication. `[]`
-//     // denotes groups of 32 bits.
-//     //
-//     //         [AAAA][BBBB]
-//     // x             [CCCC]
-//     // --------------------
-//     //         [BCBC][BCBC]
-//     // + [ACAC][ACAC]
-//     // --------------------
-//     //         [ACBC][BCBC] // 64-bit truncation occurs
-//     //
-//     // This can be written in NEON as a vectorwise wrapping
-//     // multiplication of the high-order chunk of the input (`A`)
-//     // against the constant and then a multiply-widen-and-accumulate
-//     // of the low-order chunk of the input and the constant:
-//     //
-//     // 1. High-order, vectorwise
-//     //
-//     //         [AAAA][BBBB]
-//     // x       [CCCC][0000]
-//     // --------------------
-//     //         [ACAC][0000]
-//     //
-//     // 2. Low-order, widening
-//     //
-//     //               [BBBB]
-//     // x             [CCCC] // widening
-//     // --------------------
-//     //         [BCBC][BCBC]
-//     //
-//     // 3. Accumulation
-//     //
-//     //         [ACAC][0000]
-//     // +       [BCBC][BCBC] // vectorwise
-//     // --------------------
-//     //         [ACBC][BCBC]
-//     //
-//     // Thankfully, NEON has a single multiply-widen-and-accumulate
-//     // operation.
-//     #[inline]
-//     pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
-//         unsafe {
-//             let input_as_u32 = vreinterpretq_u32_u64(input);
-//             let factor = vmov_n_u32(og_factor);
-//             let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
-//             let factor_striped = vreinterpretq_u32_u64(factor_striped);
-
-//             let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
-//             let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
-
-//             let input_lo = vmovn_u64(input);
-//             vmlal_u32(high_shifted, input_lo, factor)
-//         }
-//     }
-
-//     // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
-//     #[inline]
-//     fn reordering_barrier(r: uint64x2_t) {
-//         unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
-//     }
-// }
+#[cfg(all(target_arch = "aarch64", feature = "simd"))]
+mod neon {
+    use core::arch::aarch64::*;
 
-#[cfg(all(target_arch = "x86_64", feature = "simd"))]
-mod avx2 {
-    use core::arch::x86_64::*;
+    use super::{SliceBackport as _, Vector, PRIME32_1};
 
-    use super::Vector;
+    /// # Safety
+    /// You must ensure that the CPU has the NEON feature
+    #[inline]
+    #[target_feature(enable = "neon")]
+    pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
+        super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
+    }
+
+    struct Impl(());
+
+    impl Impl {
+        /// # Safety
+        /// You must ensure that the CPU has the NEON feature
+        unsafe fn new_unchecked() -> Self {
+            Self(())
+        }
+    }
+
+    impl Vector for Impl {
+        #[inline]
+        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
+            unsafe { round_scramble_neon(acc, secret) }
+        }
+
+        #[inline]
+        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+            unsafe { accumulate_neon(acc, stripe, secret) }
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "neon")]
+    unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret: &[u8]) {
+        unsafe {
+            let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
+            let (acc, _) = acc.bp_as_chunks_mut::<2>();
+            for (i, acc) in acc.iter_mut().enumerate() {
+                let mut accv = vld1q_u64(acc.as_ptr());
+                let secret = vld1q_u64(secret_base.add(i * 2));
 
+                // tmp[i] = acc[i] >> 47
+                let shifted = vshrq_n_u64::<47>(accv);
+
+                // acc[i] ^= tmp[i]
+                accv = veorq_u64(accv, shifted);
+
+                // acc[i] ^= secret[i]
+                accv = veorq_u64(accv, secret);
+
+                // acc[i] *= PRIME32_1
+                accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+
+                vst1q_u64(acc.as_mut_ptr(), accv);
+            }
+        }
+    }
+
+    // We process 4x u64 at a time as that allows us to completely
+    // fill a `uint64x2_t` with useful values when performing the
+    // multiplication.
+    #[target_feature(enable = "neon")]
     #[inline]
-    #[cfg(target_feature = "avx2")]
+    unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        let (acc2, _) = acc.bp_as_chunks_mut::<4>();
+        for (i, acc) in acc2.into_iter().enumerate() {
+            unsafe {
+                let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
+                let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
+                let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
+                let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
+                let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
+                let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
+
+                // stripe_rot[i ^ 1] = stripe[i];
+                let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+                let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+
+                // value[i] = stripe[i] ^ secret[i];
+                let value_0 = veorq_u64(stripe_0, secret_0);
+                let value_1 = veorq_u64(stripe_1, secret_1);
+
+                // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
+                //
+                // Each vector has 64-bit values, but we treat them as
+                // 32-bit and then unzip them. This naturally splits
+                // the upper and lower 32 bits.
+                let parts_0 = vreinterpretq_u32_u64(value_0);
+                let parts_1 = vreinterpretq_u32_u64(value_1);
+
+                let hi = vuzp1q_u32(parts_0, parts_1);
+                let lo = vuzp2q_u32(parts_0, parts_1);
+
+                let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
+                let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
+
+                reordering_barrier(sum_0);
+                reordering_barrier(sum_1);
+
+                // acc[i] += sum[i]
+                accv_0 = vaddq_u64(accv_0, sum_0);
+                accv_1 = vaddq_u64(accv_1, sum_1);
+
+                vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
+                vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
+            };
+        }
+    }
+
+    // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
+    // lower 64 bits of the result) operation, so we have to make our
+    // own out of 32-bit operations . We can simplify by realizing
+    // that we are always multiplying by a 32-bit number.
+    //
+    // The basic algorithm is traditional long multiplication. `[]`
+    // denotes groups of 32 bits.
+    //
+    //         [AAAA][BBBB]
+    // x             [CCCC]
+    // --------------------
+    //         [BCBC][BCBC]
+    // + [ACAC][ACAC]
+    // --------------------
+    //         [ACBC][BCBC] // 64-bit truncation occurs
+    //
+    // This can be written in NEON as a vectorwise wrapping
+    // multiplication of the high-order chunk of the input (`A`)
+    // against the constant and then a multiply-widen-and-accumulate
+    // of the low-order chunk of the input and the constant:
+    //
+    // 1. High-order, vectorwise
+    //
+    //         [AAAA][BBBB]
+    // x       [CCCC][0000]
+    // --------------------
+    //         [ACAC][0000]
+    //
+    // 2. Low-order, widening
+    //
+    //               [BBBB]
+    // x             [CCCC] // widening
+    // --------------------
+    //         [BCBC][BCBC]
+    //
+    // 3. Accumulation
+    //
+    //         [ACAC][0000]
+    // +       [BCBC][BCBC] // vectorwise
+    // --------------------
+    //         [ACBC][BCBC]
+    //
+    // Thankfully, NEON has a single multiply-widen-and-accumulate
+    // operation.
+    #[inline]
+    pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
+        unsafe {
+            let input_as_u32 = vreinterpretq_u32_u64(input);
+            let factor = vmov_n_u32(og_factor);
+            let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
+            let factor_striped = vreinterpretq_u32_u64(factor_striped);
+
+            let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
+            let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
+
+            let input_lo = vmovn_u64(input);
+            vmlal_u32(high_shifted, input_lo, factor)
+        }
+    }
+
+    // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
+    #[inline]
+    fn reordering_barrier(r: uint64x2_t) {
+        unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
+    }
+}
+
+#[cfg(all(target_arch = "aarch64", feature = "std"))]
+mod aarch64_detect {
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        unsafe { oneshot_unchecked(secret, input) }
+        #[cfg(feature = "simd")]
+        if std::arch::is_aarch64_feature_detected!("neon") {
+            return unsafe { super::neon::oneshot_unchecked(secret, input) };
+        }
+
+        super::scalar::oneshot(secret, input)
     }
+}
+
+#[cfg(all(target_arch = "x86_64", feature = "simd"))]
+mod avx2 {
+    use core::arch::x86_64::*;
 
+    use super::Vector;
+
+    /// # Safety
+    /// You must ensure that the CPU has the AVX2 feature
     #[inline]
     #[target_feature(enable = "avx2")]
     pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
-        unsafe { super::Algorithm(Impl::new_unchecked()) }.oneshot(secret, input)
+        super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
     pub struct Impl(super::scalar::Impl);
 
     impl Impl {
-        #[cfg(target_feature = "avx2")]
-        pub fn new() -> Self {
-            unsafe { Self::new_unchecked() }
-        }
-
         /// # Safety
         /// You must ensure that the CPU has the AVX2 feature
         pub unsafe fn new_unchecked() -> Impl {
@@ -681,16 +717,28 @@ mod avx2 {
 #[cfg(all(target_arch = "x86_64", feature = "std"))]
 mod x86_64_detect {
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-
         #[cfg(feature = "simd")]
-        if is_x86_feature_detected!("avx2") {
-            return unsafe { super::avx2::oneshot_unchecked(secret, input) }
+        if std::arch::is_x86_feature_detected!("avx2") {
+            return unsafe { super::avx2::oneshot_unchecked(secret, input) };
         }
 
         super::scalar::oneshot(secret, input)
     }
 }
 
+mod detect {
+    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
+        #[cfg(all(target_arch = "aarch64", feature = "std"))]
+        return super::aarch64_detect::oneshot(secret, input);
+
+        #[cfg(all(target_arch = "x86_64", feature = "std"))]
+        return super::x86_64_detect::oneshot(secret, input);
+
+        #[allow(unreachable_code)]
+        super::scalar::oneshot(secret, input)
+    }
+}
+
 #[inline]
 fn avalanche(mut x: u64) -> u64 {
     x ^= x >> 37;
diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
index 2edfce466..8aaf4aef5 100644
--- a/xx_hash-sys/build.rs
+++ b/xx_hash-sys/build.rs
@@ -1,7 +1,8 @@
-use std::{env, path::PathBuf};
+use std::{env, path::PathBuf, str::FromStr};
 
 fn main() {
-    // TODO: CARGO_CFG_TARGET_FEATURE has `Some(adx,aes,avx,avx2,...`
+    let target_arch = env::var("CARGO_CFG_TARGET_ARCH").expect("Need to know target architecture");
+    let target_arch = target_arch.parse::<Arch>().ok();
 
     let base = env::var_os("CARGO_MANIFEST_DIR").unwrap();
     let mut base: PathBuf = base.into();
@@ -20,13 +21,44 @@ fn main() {
         .define("XXH_NAMESPACE", "scalar_")
         .compile("xxhash_scalar");
 
-    let mut avx2_build = build.clone();
-    avx2_build
-        .flag("-march=x86-64-v3")
-        .define("XXH_VECTOR", "XXH_AVX2")
-        .define("XXH_NAMESPACE", "avx2_")
-        .compile("xxhash_avx2");
+    match target_arch {
+        Some(Arch::Aarch64) => {
+            let mut neon_build = build.clone();
+            neon_build
+                .define("XXH_VECTOR", "XXH_NEON")
+                .define("XXH_NAMESPACE", "neon_")
+                .compile("xxhash_neon");
+        }
+
+        Some(Arch::X86_64) => {
+            let mut avx2_build = build.clone();
+            avx2_build
+                .flag("-march=x86-64-v3")
+                .define("XXH_VECTOR", "XXH_AVX2")
+                .define("XXH_NAMESPACE", "avx2_")
+                .compile("xxhash_avx2");
+        }
+
+        None => {}
+    }
 
     let native_build = build;
     native_build.compile("xxhash_native");
 }
+
+enum Arch {
+    Aarch64,
+    X86_64,
+}
+
+impl FromStr for Arch {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "aarch64" => Self::Aarch64,
+            "x86_64" => Self::X86_64,
+            _ => return Err(()),
+        })
+    }
+}
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 8f10e9f6a..8ca1333a6 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -203,152 +203,244 @@ impl Drop for XxHash3_64 {
 
 // ----------
 
-extern "C" {
-    fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-    fn scalar_XXH3_64bits_withSeed(
-        input: *const libc::c_void,
-        length: libc::size_t,
-        seed: XXH64_hash_t,
-    ) -> XXH64_hash_t;
-    fn scalar_XXH3_64bits_withSecret(
-        input: *const libc::c_void,
-        length: libc::size_t,
-        secret: *const libc::c_void,
-        secret_length: libc::size_t,
-    ) -> XXH64_hash_t;
-
-    fn scalar_XXH3_createState() -> *mut XXH3_state_t;
-    fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-    fn scalar_XXH3_64bits_update(
-        state: *mut XXH3_state_t,
-        buffer: *const libc::c_void,
-        length: libc::size_t,
-    ) -> XXH_errorcode;
-    fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-    fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-}
+pub mod scalar {
+    use super::*;
+
+    extern "C" {
+        fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+        fn scalar_XXH3_64bits_withSeed(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            seed: XXH64_hash_t,
+        ) -> XXH64_hash_t;
+        fn scalar_XXH3_64bits_withSecret(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            secret: *const libc::c_void,
+            secret_length: libc::size_t,
+        ) -> XXH64_hash_t;
+
+        fn scalar_XXH3_createState() -> *mut XXH3_state_t;
+        fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn scalar_XXH3_64bits_update(
+            state: *mut XXH3_state_t,
+            buffer: *const libc::c_void,
+            length: libc::size_t,
+        ) -> XXH_errorcode;
+        fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+        fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
+    }
 
-pub struct ScalarXxHash3_64(*mut XXH3_state_t);
+    pub struct XxHash3_64(*mut XXH3_state_t);
 
-impl ScalarXxHash3_64 {
-    pub fn oneshot(data: &[u8]) -> u64 {
-        unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) }
-    }
+    impl XxHash3_64 {
+        pub fn oneshot(data: &[u8]) -> u64 {
+            unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) }
+        }
 
-    pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-        unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
-    }
+        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+            unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+        }
 
-    pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-        unsafe {
-            scalar_XXH3_64bits_withSecret(
-                data.as_ptr().cast(),
-                data.len(),
-                secret.as_ptr().cast(),
-                secret.len(),
-            )
+        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+            unsafe {
+                scalar_XXH3_64bits_withSecret(
+                    data.as_ptr().cast(),
+                    data.len(),
+                    secret.as_ptr().cast(),
+                    secret.len(),
+                )
+            }
         }
-    }
 
-    pub fn with_seed() -> Self {
-        let state = unsafe {
-            let state = scalar_XXH3_createState();
-            scalar_XXH3_64bits_reset(state);
-            state
-        };
+        pub fn with_seed() -> Self {
+            let state = unsafe {
+                let state = scalar_XXH3_createState();
+                scalar_XXH3_64bits_reset(state);
+                state
+            };
 
-        Self(state)
-    }
+            Self(state)
+        }
 
-    pub fn write(&mut self, data: &[u8]) {
-        let retval = unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-        assert_eq!(retval, XXH_OK);
-    }
+        pub fn write(&mut self, data: &[u8]) {
+            let retval =
+                unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+            assert_eq!(retval, XXH_OK);
+        }
 
-    pub fn finish(&mut self) -> u64 {
-        unsafe { scalar_XXH3_64bits_digest(self.0) }
+        pub fn finish(&mut self) -> u64 {
+            unsafe { scalar_XXH3_64bits_digest(self.0) }
+        }
     }
-}
 
-impl Drop for ScalarXxHash3_64 {
-    fn drop(&mut self) {
-        let retval = unsafe { scalar_XXH3_freeState(self.0) };
-        assert_eq!(retval, XXH_OK);
+    impl Drop for XxHash3_64 {
+        fn drop(&mut self) {
+            let retval = unsafe { scalar_XXH3_freeState(self.0) };
+            assert_eq!(retval, XXH_OK);
+        }
     }
 }
 
 // ----------
 
-extern "C" {
-    fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-    fn avx2_XXH3_64bits_withSeed(
-        input: *const libc::c_void,
-        length: libc::size_t,
-        seed: XXH64_hash_t,
-    ) -> XXH64_hash_t;
-    fn avx2_XXH3_64bits_withSecret(
-        input: *const libc::c_void,
-        length: libc::size_t,
-        secret: *const libc::c_void,
-        secret_length: libc::size_t,
-    ) -> XXH64_hash_t;
+pub mod neon {
+    use super::*;
+
+    extern "C" {
+        fn neon_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+        fn neon_XXH3_64bits_withSeed(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            seed: XXH64_hash_t,
+        ) -> XXH64_hash_t;
+        fn neon_XXH3_64bits_withSecret(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            secret: *const libc::c_void,
+            secret_length: libc::size_t,
+        ) -> XXH64_hash_t;
+
+        fn neon_XXH3_createState() -> *mut XXH3_state_t;
+        fn neon_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn neon_XXH3_64bits_update(
+            state: *mut XXH3_state_t,
+            buffer: *const libc::c_void,
+            length: libc::size_t,
+        ) -> XXH_errorcode;
+        fn neon_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+        fn neon_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
+    }
 
-    fn avx2_XXH3_createState() -> *mut XXH3_state_t;
-    fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-    fn avx2_XXH3_64bits_update(
-        state: *mut XXH3_state_t,
-        buffer: *const libc::c_void,
-        length: libc::size_t,
-    ) -> XXH_errorcode;
-    fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-    fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-}
+    pub struct XxHash3_64(*mut XXH3_state_t);
 
-pub struct Avx2XxHash3_64(*mut XXH3_state_t);
+    impl XxHash3_64 {
+        pub fn oneshot(data: &[u8]) -> u64 {
+            unsafe { neon_XXH3_64bits(data.as_ptr().cast(), data.len()) }
+        }
 
-impl Avx2XxHash3_64 {
-    pub fn oneshot(data: &[u8]) -> u64 {
-        unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) }
-    }
+        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+            unsafe { neon_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+        }
 
-    pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-        unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+            unsafe {
+                neon_XXH3_64bits_withSecret(
+                    data.as_ptr().cast(),
+                    data.len(),
+                    secret.as_ptr().cast(),
+                    secret.len(),
+                )
+            }
+        }
+
+        pub fn with_seed() -> Self {
+            let state = unsafe {
+                let state = neon_XXH3_createState();
+                neon_XXH3_64bits_reset(state);
+                state
+            };
+
+            Self(state)
+        }
+
+        pub fn write(&mut self, data: &[u8]) {
+            let retval =
+                unsafe { neon_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+            assert_eq!(retval, XXH_OK);
+        }
+
+        pub fn finish(&mut self) -> u64 {
+            unsafe { neon_XXH3_64bits_digest(self.0) }
+        }
     }
 
-    pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-        unsafe {
-            avx2_XXH3_64bits_withSecret(
-                data.as_ptr().cast(),
-                data.len(),
-                secret.as_ptr().cast(),
-                secret.len(),
-            )
+    impl Drop for XxHash3_64 {
+        fn drop(&mut self) {
+            let retval = unsafe { neon_XXH3_freeState(self.0) };
+            assert_eq!(retval, XXH_OK);
         }
     }
+}
 
-    pub fn with_seed() -> Self {
-        let state = unsafe {
-            let state = avx2_XXH3_createState();
-            avx2_XXH3_64bits_reset(state);
-            state
-        };
+// ----------
 
-        Self(state)
+#[cfg(target_arch = "x86_64")]
+pub mod avx2 {
+    use super::*;
+
+    extern "C" {
+        fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+        fn avx2_XXH3_64bits_withSeed(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            seed: XXH64_hash_t,
+        ) -> XXH64_hash_t;
+        fn avx2_XXH3_64bits_withSecret(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            secret: *const libc::c_void,
+            secret_length: libc::size_t,
+        ) -> XXH64_hash_t;
+
+        fn avx2_XXH3_createState() -> *mut XXH3_state_t;
+        fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn avx2_XXH3_64bits_update(
+            state: *mut XXH3_state_t,
+            buffer: *const libc::c_void,
+            length: libc::size_t,
+        ) -> XXH_errorcode;
+        fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+        fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
     }
 
-    pub fn write(&mut self, data: &[u8]) {
-        let retval = unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-        assert_eq!(retval, XXH_OK);
-    }
+    pub struct XxHash3_64(*mut XXH3_state_t);
 
-    pub fn finish(&mut self) -> u64 {
-        unsafe { avx2_XXH3_64bits_digest(self.0) }
+    impl XxHash3_64 {
+        pub fn oneshot(data: &[u8]) -> u64 {
+            unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) }
+        }
+
+        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+            unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+        }
+
+        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+            unsafe {
+                avx2_XXH3_64bits_withSecret(
+                    data.as_ptr().cast(),
+                    data.len(),
+                    secret.as_ptr().cast(),
+                    secret.len(),
+                )
+            }
+        }
+
+        pub fn with_seed() -> Self {
+            let state = unsafe {
+                let state = avx2_XXH3_createState();
+                avx2_XXH3_64bits_reset(state);
+                state
+            };
+
+            Self(state)
+        }
+
+        pub fn write(&mut self, data: &[u8]) {
+            let retval =
+                unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+            assert_eq!(retval, XXH_OK);
+        }
+
+        pub fn finish(&mut self) -> u64 {
+            unsafe { avx2_XXH3_64bits_digest(self.0) }
+        }
     }
-}
 
-impl Drop for Avx2XxHash3_64 {
-    fn drop(&mut self) {
-        let retval = unsafe { avx2_XXH3_freeState(self.0) };
-        assert_eq!(retval, XXH_OK);
+    impl Drop for XxHash3_64 {
+        fn drop(&mut self) {
+            let retval = unsafe { avx2_XXH3_freeState(self.0) };
+            assert_eq!(retval, XXH_OK);
+        }
     }
 }

From a37289a5e5980b31210803f0ad2f653410537dbf Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 08:57:22 -0400
Subject: [PATCH 084/166] avx cleanup

---
 src/xxhash3_64.rs | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 6b395fb6c..733122464 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -693,18 +693,17 @@ mod avx2 {
             // let value[i] = stripe[i] ^ secret[i];
             let value_0 = _mm256_xor_si256(stripe_0, secret_0);
 
-            // TODO: "rotate" is not quite correct
-            // stripe_rot[i] = stripe[i ^ 1]
-            let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0);
+            // stripe_swap[i] = stripe[i ^ 1]
+            let stripe_swap_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0);
 
-            // acc[i] += stripe_rot[i]
-            acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0);
+            // acc[i] += stripe_swap[i]
+            acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
 
-            // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i])
-            let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0);
+            // value_shift[i] = value[i] >> 32
+            let value_shift_0 = _mm256_srli_epi64::<32>(value_0);
 
-            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
-            let product_0 = _mm256_mul_epu32(value_0, value_swap_0);
+            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+            let product_0 = _mm256_mul_epu32(value_0, value_shift_0);
 
             // acc[i] += product[i]
             acc_0 = _mm256_add_epi64(acc_0, product_0);

From 8e2a3593700db8b0645185d2982f42b14d56cf18 Mon Sep 17 00:00:00 2001
From: Dennis Duda <git@seri.tools>
Date: Wed, 24 Jul 2024 09:05:43 -0400
Subject: [PATCH 085/166] add sse2 implementation

---
 src/xxhash3_64.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 733122464..6568cfa14 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -713,12 +713,87 @@ mod avx2 {
     }
 }
 
+mod sse2 {
+    use core::arch::x86_64::*;
+
+    use super::Vector;
+
+    /// # Safety
+    /// You must ensure that the CPU has the SSE2 feature
+    #[inline]
+    #[target_feature(enable = "sse2")]
+    pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
+        super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
+    }
+
+    pub struct Impl(super::scalar::Impl);
+
+    impl Impl {
+        /// # Safety
+        /// You must ensure that the CPU has the SSE2 feature
+        pub unsafe fn new_unchecked() -> Impl {
+            Impl(super::scalar::Impl)
+        }
+    }
+
+    impl Vector for Impl {
+        #[inline]
+        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
+            // The scalar implementation is autovectorized nicely enough
+            self.0.round_scramble(acc, secret)
+        }
+
+        #[inline]
+        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+            // SAFETY: Type can only be constructed when SSE2 feature is present
+            unsafe { accumulate_sse2(acc, stripe, secret) }
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "sse2")]
+    unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        for i in 0..4 {
+            // todo: align the accumulator and avoid the unaligned load and store
+            let mut acc_0 = _mm_loadu_si128(acc.as_mut_ptr().cast::<u64>().add(2 * i).cast());
+            let stripe_0 = _mm_loadu_si128(stripe.as_ptr().cast::<u64>().add(2 * i).cast());
+            let secret_0 = _mm_loadu_si128(secret.as_ptr().cast::<u64>().add(2 * i).cast());
+
+            // let value[i] = stripe[i] ^ secret[i];
+            let value_0 = _mm_xor_si128(stripe_0, secret_0);
+
+            // stripe_swap[i] = stripe[i ^ 1]
+            let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+
+            // acc[i] += stripe_swap[i]
+            acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
+
+            // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i])
+            let value_swap_0 = _mm_shuffle_epi32::<0b10_11_00_01>(value_0);
+
+            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
+            let product_0 = _mm_mul_epu32(value_0, value_swap_0);
+
+            // acc[i] += product[i]
+            acc_0 = _mm_add_epi64(acc_0, product_0);
+
+            _mm_storeu_si128(acc.as_mut_ptr().cast::<u64>().add(2 * i).cast(), acc_0);
+        }
+    }
+}
+
 #[cfg(all(target_arch = "x86_64", feature = "std"))]
 mod x86_64_detect {
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         #[cfg(feature = "simd")]
-        if std::arch::is_x86_feature_detected!("avx2") {
-            return unsafe { super::avx2::oneshot_unchecked(secret, input) };
+        {
+            if std::arch::is_x86_feature_detected!("avx2") {
+                return unsafe { super::avx2::oneshot_unchecked(secret, input) };
+            }
+
+            if std::arch::is_x86_feature_detected!("sse2") {
+                return unsafe { super::sse2::oneshot_unchecked(secret, input) };
+            }
         }
 
         super::scalar::oneshot(secret, input)

From 816e8ce853d7622f3a529f1e2225f469cd2e6abf Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 10:52:42 -0400
Subject: [PATCH 086/166] Add SSE2 C code variant

---
 compare/benches/benchmark.rs |  5 +++
 xx_hash-sys/build.rs         |  6 +++
 xx_hash-sys/src/lib.rs       | 80 ++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index eddd860c4..f9b765d9b 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -201,6 +201,11 @@ mod xxhash3_64 {
                 g.bench_function(id, |b| {
                     b.iter(|| c::avx2::XxHash3_64::oneshot_with_seed(seed, data))
                 });
+
+                let id = format!("impl-c-sse2/size-{size:07}");
+                g.bench_function(id, |b| {
+                    b.iter(|| c::sse2::XxHash3_64::oneshot_with_seed(seed, data))
+                });
             }
 
             let id = format!("impl-rust/size-{size:07}");
diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
index 8aaf4aef5..73056b693 100644
--- a/xx_hash-sys/build.rs
+++ b/xx_hash-sys/build.rs
@@ -37,6 +37,12 @@ fn main() {
                 .define("XXH_VECTOR", "XXH_AVX2")
                 .define("XXH_NAMESPACE", "avx2_")
                 .compile("xxhash_avx2");
+
+            let mut sse2_build = build.clone();
+            sse2_build
+                .define("XXH_VECTOR", "XXH_SSE2")
+                .define("XXH_NAMESPACE", "sse2_")
+                .compile("xxhash_sse2");
         }
 
         None => {}
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 8ca1333a6..45bfb08e5 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -444,3 +444,83 @@ pub mod avx2 {
         }
     }
 }
+
+#[cfg(target_arch = "x86_64")]
+pub mod sse2 {
+    use super::*;
+
+    extern "C" {
+        fn sse2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
+        fn sse2_XXH3_64bits_withSeed(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            seed: XXH64_hash_t,
+        ) -> XXH64_hash_t;
+        fn sse2_XXH3_64bits_withSecret(
+            input: *const libc::c_void,
+            length: libc::size_t,
+            secret: *const libc::c_void,
+            secret_length: libc::size_t,
+        ) -> XXH64_hash_t;
+
+        fn sse2_XXH3_createState() -> *mut XXH3_state_t;
+        fn sse2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn sse2_XXH3_64bits_update(
+            state: *mut XXH3_state_t,
+            buffer: *const libc::c_void,
+            length: libc::size_t,
+        ) -> XXH_errorcode;
+        fn sse2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
+        fn sse2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
+    }
+
+    pub struct XxHash3_64(*mut XXH3_state_t);
+
+    impl XxHash3_64 {
+        pub fn oneshot(data: &[u8]) -> u64 {
+            unsafe { sse2_XXH3_64bits(data.as_ptr().cast(), data.len()) }
+        }
+
+        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+            unsafe { sse2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
+        }
+
+        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+            unsafe {
+                sse2_XXH3_64bits_withSecret(
+                    data.as_ptr().cast(),
+                    data.len(),
+                    secret.as_ptr().cast(),
+                    secret.len(),
+                )
+            }
+        }
+
+        pub fn with_seed() -> Self {
+            let state = unsafe {
+                let state = sse2_XXH3_createState();
+                sse2_XXH3_64bits_reset(state);
+                state
+            };
+
+            Self(state)
+        }
+
+        pub fn write(&mut self, data: &[u8]) {
+            let retval =
+                unsafe { sse2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
+            assert_eq!(retval, XXH_OK);
+        }
+
+        pub fn finish(&mut self) -> u64 {
+            unsafe { sse2_XXH3_64bits_digest(self.0) }
+        }
+    }
+
+    impl Drop for XxHash3_64 {
+        fn drop(&mut self) {
+            let retval = unsafe { sse2_XXH3_freeState(self.0) };
+            assert_eq!(retval, XXH_OK);
+        }
+    }
+}

From f0b3ad4cb65c6ce4d662d891d1fbedcae5dedd05 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 16:37:51 -0400
Subject: [PATCH 087/166] A few more inlines for good measure

---
 src/xxhash3_64.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 6568cfa14..1aff25fa2 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -257,6 +257,7 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
 struct Algorithm<V>(V);
 
 impl<V: Vector> Algorithm<V> {
+    #[inline]
     fn oneshot(&self, secret: &[u8], input: &[u8]) -> u64 {
         let mut acc = INITIAL_ACCUMULATORS;
 
@@ -469,6 +470,7 @@ mod neon {
     impl Impl {
         /// # Safety
         /// You must ensure that the CPU has the NEON feature
+        #[inline]
         unsafe fn new_unchecked() -> Self {
             Self(())
         }
@@ -633,6 +635,7 @@ mod neon {
 
 #[cfg(all(target_arch = "aarch64", feature = "std"))]
 mod aarch64_detect {
+    #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         #[cfg(feature = "simd")]
         if std::arch::is_aarch64_feature_detected!("neon") {
@@ -662,6 +665,7 @@ mod avx2 {
     impl Impl {
         /// # Safety
         /// You must ensure that the CPU has the AVX2 feature
+        #[inline]
         pub unsafe fn new_unchecked() -> Impl {
             Impl(super::scalar::Impl)
         }
@@ -731,6 +735,7 @@ mod sse2 {
     impl Impl {
         /// # Safety
         /// You must ensure that the CPU has the SSE2 feature
+        #[inline]
         pub unsafe fn new_unchecked() -> Impl {
             Impl(super::scalar::Impl)
         }
@@ -784,6 +789,7 @@ mod sse2 {
 
 #[cfg(all(target_arch = "x86_64", feature = "std"))]
 mod x86_64_detect {
+    #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         #[cfg(feature = "simd")]
         {
@@ -801,6 +807,7 @@ mod x86_64_detect {
 }
 
 mod detect {
+    #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         #[cfg(all(target_arch = "aarch64", feature = "std"))]
         return super::aarch64_detect::oneshot(secret, input);

From 8b5b56d498e0b601769119a20abfdae98beb2b01 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 16:37:55 -0400
Subject: [PATCH 088/166] Simplify and cross-pollinate the AVX2 and SSE2
 implementations

---
 src/xxhash3_64.rs | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 1aff25fa2..f3642a6a7 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -688,17 +688,21 @@ mod avx2 {
     #[inline]
     #[target_feature(enable = "avx2")]
     unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        let acc = acc.as_mut_ptr().cast::<__m256i>();
+        let stripe = stripe.as_ptr().cast::<__m256i>();
+        let secret = secret.as_ptr().cast::<__m256i>();
+
         for i in 0..2 {
             // todo: align the accumulator and avoid the unaligned load and store
-            let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast());
-            let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::<u64>().add(4 * i).cast());
-            let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::<u64>().add(4 * i).cast());
+            let mut acc_0 = _mm256_loadu_si256(acc.add(i));
+            let stripe_0 = _mm256_loadu_si256(stripe.add(i));
+            let secret_0 = _mm256_loadu_si256(secret.add(i));
 
             // let value[i] = stripe[i] ^ secret[i];
             let value_0 = _mm256_xor_si256(stripe_0, secret_0);
 
             // stripe_swap[i] = stripe[i ^ 1]
-            let stripe_swap_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0);
+            let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0);
 
             // acc[i] += stripe_swap[i]
             acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
@@ -712,7 +716,7 @@ mod avx2 {
             // acc[i] += product[i]
             acc_0 = _mm256_add_epi64(acc_0, product_0);
 
-            _mm256_storeu_si256(acc.as_mut_ptr().cast::<u64>().add(4 * i).cast(), acc_0);
+            _mm256_storeu_si256(acc.add(i), acc_0);
         }
     }
 }
@@ -758,11 +762,15 @@ mod sse2 {
     #[inline]
     #[target_feature(enable = "sse2")]
     unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        let acc = acc.as_mut_ptr().cast::<__m128i>();
+        let stripe = stripe.as_ptr().cast::<__m128i>();
+        let secret = secret.as_ptr().cast::<__m128i>();
+
         for i in 0..4 {
             // todo: align the accumulator and avoid the unaligned load and store
-            let mut acc_0 = _mm_loadu_si128(acc.as_mut_ptr().cast::<u64>().add(2 * i).cast());
-            let stripe_0 = _mm_loadu_si128(stripe.as_ptr().cast::<u64>().add(2 * i).cast());
-            let secret_0 = _mm_loadu_si128(secret.as_ptr().cast::<u64>().add(2 * i).cast());
+            let mut acc_0 = _mm_loadu_si128(acc.add(i));
+            let stripe_0 = _mm_loadu_si128(stripe.add(i));
+            let secret_0 = _mm_loadu_si128(secret.add(i));
 
             // let value[i] = stripe[i] ^ secret[i];
             let value_0 = _mm_xor_si128(stripe_0, secret_0);
@@ -773,16 +781,16 @@ mod sse2 {
             // acc[i] += stripe_swap[i]
             acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
 
-            // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i])
-            let value_swap_0 = _mm_shuffle_epi32::<0b10_11_00_01>(value_0);
+            // value_shift[i] = value[i] >> 32
+            let value_shift_0 = _mm_srli_epi64::<32>(value_0);
 
-            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i])
-            let product_0 = _mm_mul_epu32(value_0, value_swap_0);
+            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+            let product_0 = _mm_mul_epu32(value_0, value_shift_0);
 
             // acc[i] += product[i]
             acc_0 = _mm_add_epi64(acc_0, product_0);
 
-            _mm_storeu_si128(acc.as_mut_ptr().cast::<u64>().add(2 * i).cast(), acc_0);
+            _mm_storeu_si128(acc.add(i), acc_0);
         }
     }
 }

From 3ff0716fe6f77acbac403419f8cda6b656d465fa Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 16:49:44 -0400
Subject: [PATCH 089/166] Oops this is aarch64 only

---
 xx_hash-sys/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 45bfb08e5..14d132aa5 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -284,6 +284,7 @@ pub mod scalar {
 
 // ----------
 
+#[cfg(target_arch = "aarch64")]
 pub mod neon {
     use super::*;
 

From e5f17792f65736b41afc296142999442483fdcf5 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 17:15:09 -0400
Subject: [PATCH 090/166] flag no go on msvc

---
 xx_hash-sys/build.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
index 73056b693..d00bdc132 100644
--- a/xx_hash-sys/build.rs
+++ b/xx_hash-sys/build.rs
@@ -32,8 +32,16 @@ fn main() {
 
         Some(Arch::X86_64) => {
             let mut avx2_build = build.clone();
+
+            // TODO: check for msvc, not "windows"
+            if cfg!(target_os = "windows") {
+                // This seems to make the code slower
+                // avx2_build.flag("/arch:AVX2");
+            } else {
+                avx2_build.flag("-march=x86-64-v3");
+            }
+
             avx2_build
-                .flag("-march=x86-64-v3")
                 .define("XXH_VECTOR", "XXH_AVX2")
                 .define("XXH_NAMESPACE", "avx2_")
                 .compile("xxhash_avx2");

From 3b812a5b47df5814bb815096df20dfe761f0a38c Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 24 Jul 2024 20:20:16 -0400
Subject: [PATCH 091/166] Add cfg flags to select the implementation

---
 Cargo.toml         | 13 ++++++++++---
 README.md          |  2 ++
 compare/Cargo.toml |  5 -----
 src/xxhash3_64.rs  | 36 +++++++++++++++++++++++-------------
 4 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 784008721..53b8ee514 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ members = [
 ]
 
 [features]
-default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd", "std"]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
 
 random = ["dep:rand"]
 
@@ -22,10 +22,17 @@ xxhash32 = []
 xxhash64 = []
 xxhash3_64 = []
 
-simd = []
-
 std = []
 
+[lints.rust.unexpected_cfgs]
+level = "warn"
+check-cfg = [
+    'cfg(_internal_xxhash3_force_scalar)',
+    'cfg(_internal_xxhash3_force_neon)',
+    'cfg(_internal_xxhash3_force_sse2)',
+    'cfg(_internal_xxhash3_force_avx2)',
+]
+
 [dependencies]
 rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
 serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
diff --git a/README.md b/README.md
index e3e37f9c9..ab987c651 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@ cargo test -p comparison # proptests
 cargo miri test # unsafe
 cargo miri test --target s390x-unknown-linux-gnu # big-endian
 
+cargo -Z profile-rustflags --config 'profile.test.package.xx-renu.rustflags=["--cfg=_internal_xxhash3_force_scalar"]' test
+
 minimal versions
 no-features
 all-features
diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index db575eb92..bbb8d0a59 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -3,11 +3,6 @@ name = "compare"
 version = "0.1.0"
 edition = "2021"
 
-[features]
-default = ["simd"]
-
-simd = ["xx-renu/simd"]
-
 [[bench]]
 name = "benchmark"
 harness = false
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index f3642a6a7..f52c5d082 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -251,6 +251,19 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
+    #[cfg(_internal_xxhash3_force_scalar)]
+    return scalar::oneshot(secret, input);
+
+    #[cfg(_internal_xxhash3_force_neon)]
+    unsafe { return neon::oneshot_unchecked(secret, input) };
+
+    #[cfg(_internal_xxhash3_force_sse2)]
+    unsafe { return sse2::oneshot_unchecked(secret, input) };
+
+    #[cfg(_internal_xxhash3_force_avx2)]
+    unsafe { return avx2::oneshot_unchecked(secret, input) };
+
+    #[allow(unreachable_code)]
     detect::oneshot(secret, input)
 }
 
@@ -451,7 +464,7 @@ mod scalar {
     }
 }
 
-#[cfg(all(target_arch = "aarch64", feature = "simd"))]
+#[cfg(target_arch = "aarch64")]
 mod neon {
     use core::arch::aarch64::*;
 
@@ -637,7 +650,6 @@ mod neon {
 mod aarch64_detect {
     #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        #[cfg(feature = "simd")]
         if std::arch::is_aarch64_feature_detected!("neon") {
             return unsafe { super::neon::oneshot_unchecked(secret, input) };
         }
@@ -646,7 +658,7 @@ mod aarch64_detect {
     }
 }
 
-#[cfg(all(target_arch = "x86_64", feature = "simd"))]
+#[cfg(target_arch = "x86_64")]
 mod avx2 {
     use core::arch::x86_64::*;
 
@@ -721,6 +733,7 @@ mod avx2 {
     }
 }
 
+#[cfg(target_arch = "x86_64")]
 mod sse2 {
     use core::arch::x86_64::*;
 
@@ -799,15 +812,12 @@ mod sse2 {
 mod x86_64_detect {
     #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        #[cfg(feature = "simd")]
-        {
-            if std::arch::is_x86_feature_detected!("avx2") {
-                return unsafe { super::avx2::oneshot_unchecked(secret, input) };
-            }
+        if std::arch::is_x86_feature_detected!("avx2") {
+            return unsafe { super::avx2::oneshot_unchecked(secret, input) };
+        }
 
-            if std::arch::is_x86_feature_detected!("sse2") {
-                return unsafe { super::sse2::oneshot_unchecked(secret, input) };
-            }
+        if std::arch::is_x86_feature_detected!("sse2") {
+            return unsafe { super::sse2::oneshot_unchecked(secret, input) };
         }
 
         super::scalar::oneshot(secret, input)
@@ -884,7 +894,7 @@ impl Halves for u128 {
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
 
-    #[cfg(all(target_arch = "aarch64", feature = "simd"))]
+    #[cfg(target_arch = "aarch64")]
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]);
 
     fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
@@ -899,7 +909,7 @@ impl<T> SliceBackport<T> for [T] {
         (head, tail)
     }
 
-    #[cfg(all(target_arch = "aarch64", feature = "simd"))]
+    #[cfg(target_arch = "aarch64")]
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
         assert_ne!(N, 0);
         let len = self.len() / N;

From 113e848dcb5d5d76f474a1c0d24f51f83140a713 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 25 Jul 2024 08:34:01 -0400
Subject: [PATCH 092/166] Add benchmark for small data

---
 compare/benches/benchmark.rs | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index f9b765d9b..b85f35547 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -169,6 +169,34 @@ fn half_sizes(max: usize) -> impl Iterator<Item = usize> {
 mod xxhash3_64 {
     use super::*;
 
+    fn tiny_data(c: &mut Criterion) {
+        let (seed, data) = gen_data(TINY_DATA_SIZE);
+        let mut g = c.benchmark_group("xxhash3_64/tiny_data");
+
+        for size in 0..=data.len() {
+            let data = &data[..size];
+            g.throughput(Throughput::Bytes(data.len() as _));
+
+            let id = format!("impl-c/fn-oneshot/size-{size:02}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let hash = c::XxHash3_64::oneshot_with_seed(seed, data);
+                    black_box(hash);
+                })
+            });
+
+            let id = format!("impl-rust/fn-oneshot/size-{size:02}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let hash = rust::XxHash3_64::oneshot_with_seed(seed, data);
+                    black_box(hash);
+                })
+            });
+        }
+
+        g.finish();
+    }
+
     fn oneshot(c: &mut Criterion) {
         let (seed, data) = gen_data(BIG_DATA_SIZE);
         let mut g = c.benchmark_group("xxhash3_64/oneshot");
@@ -217,7 +245,7 @@ mod xxhash3_64 {
         g.finish();
     }
 
-    criterion_group!(benches, oneshot);
+    criterion_group!(benches, tiny_data, oneshot);
 }
 
 criterion_group!(benches, tiny_data, oneshot, streaming);

From cc1fc5a5466cd8ed440c5adcb6e9d3ab085a3485 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 25 Jul 2024 09:51:18 -0400
Subject: [PATCH 093/166] Force inling of the xxhash3_64 implementation

This allows the code to be specialized for hard-coded secrets
---
 src/xxhash3_64.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index f52c5d082..936c746c9 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -82,7 +82,7 @@ fn derive_secret(seed: u64) -> [u8; 192] {
     derived_secret
 }
 
-#[inline]
+#[inline(always)]
 fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     match input.len() {
         0 => impl_0_bytes(secret, seed),

From 3ed151602cb6628f4368a1b371a18b020322bb77 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 25 Jul 2024 11:20:59 -0400
Subject: [PATCH 094/166] format

---
 src/xxhash3_64.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 936c746c9..8e42030b9 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -255,13 +255,19 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     return scalar::oneshot(secret, input);
 
     #[cfg(_internal_xxhash3_force_neon)]
-    unsafe { return neon::oneshot_unchecked(secret, input) };
+    unsafe {
+        return neon::oneshot_unchecked(secret, input);
+    };
 
     #[cfg(_internal_xxhash3_force_sse2)]
-    unsafe { return sse2::oneshot_unchecked(secret, input) };
+    unsafe {
+        return sse2::oneshot_unchecked(secret, input);
+    };
 
     #[cfg(_internal_xxhash3_force_avx2)]
-    unsafe { return avx2::oneshot_unchecked(secret, input) };
+    unsafe {
+        return avx2::oneshot_unchecked(secret, input);
+    };
 
     #[allow(unreachable_code)]
     detect::oneshot(secret, input)

From f0b2cc27b1f65faad4eff4805e1722c8696155de Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 25 Jul 2024 14:04:05 -0400
Subject: [PATCH 095/166] manual unroll

---
 src/xxhash3_64.rs | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 8e42030b9..8509043cd 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -184,22 +184,49 @@ fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
-    let num_rounds = ((input.len() - 1) >> 5) + 1;
-
+    let (secret, _) = secret.bp_as_chunks();
+    let (secret, _) = secret.bp_as_chunks::<2>();
     let (fwd, _) = input.bp_as_chunks();
     let (_, bwd) = input.bp_as_rchunks();
 
-    let fwd = fwd.iter();
-    let bwd = bwd.iter().rev();
+    let q = bwd.len();
+
+    if input.len() > 32 {
+        if input.len() > 64 {
+            if input.len() > 96 {
+                acc = acc.wrapping_add(mix_step_ff(&fwd[3], &secret[3][0], seed));
+                acc = acc.wrapping_add(mix_step_ff(&bwd[q - 4], &secret[3][1], seed));
+            }
+
+            acc = acc.wrapping_add(mix_step_ff(&fwd[2], &secret[2][0], seed));
+            acc = acc.wrapping_add(mix_step_ff(&bwd[q - 3], &secret[2][1], seed));
+        }
 
-    for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) {
-        acc = acc.wrapping_add(mix_step(fwd_chunk, secret, i * 32, seed));
-        acc = acc.wrapping_add(mix_step(bwd_chunk, secret, i * 32 + 16, seed));
+        acc = acc.wrapping_add(mix_step_ff(&fwd[1], &secret[1][0], seed));
+        acc = acc.wrapping_add(mix_step_ff(&bwd[q - 2], &secret[1][1], seed));
     }
 
+    acc = acc.wrapping_add(mix_step_ff(&fwd[0], &secret[0][0], seed));
+    acc = acc.wrapping_add(mix_step_ff(&bwd[q - 1], &secret[0][1], seed));
+
     avalanche(acc)
 }
 
+#[inline]
+fn mix_step_ff(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 {
+    let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
+    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() };
+
+    let mul_result = {
+        let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();
+        let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128();
+
+        a.wrapping_mul(b)
+    };
+
+    mul_result.lower_half() ^ mul_result.upper_half()
+}
+
 #[inline]
 fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);

From 1a73b5943634faafe685e5816fac8992ce8738a2 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 26 Jul 2024 07:58:14 -0400
Subject: [PATCH 096/166] retarget benches

---
 compare/benches/benchmark.rs | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index b85f35547..26587bb3e 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -170,14 +170,20 @@ mod xxhash3_64 {
     use super::*;
 
     fn tiny_data(c: &mut Criterion) {
-        let (seed, data) = gen_data(TINY_DATA_SIZE);
+        let (seed, data) = gen_data(240);
         let mut g = c.benchmark_group("xxhash3_64/tiny_data");
 
-        for size in 0..=data.len() {
+        // let categories = 0..=data.len();
+
+        // Visual inspection of all the data points showed these as
+        // examples of thier nearby neighbors.
+        let categories = [0, 2, 9, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230];
+
+        for size in categories {
             let data = &data[..size];
             g.throughput(Throughput::Bytes(data.len() as _));
 
-            let id = format!("impl-c/fn-oneshot/size-{size:02}");
+            let id = format!("impl-c/fn-oneshot/size-{size:03}");
             g.bench_function(id, |b| {
                 b.iter(|| {
                     let hash = c::XxHash3_64::oneshot_with_seed(seed, data);
@@ -185,7 +191,7 @@ mod xxhash3_64 {
                 })
             });
 
-            let id = format!("impl-rust/fn-oneshot/size-{size:02}");
+            let id = format!("impl-rust/fn-oneshot/size-{size:03}");
             g.bench_function(id, |b| {
                 b.iter(|| {
                     let hash = rust::XxHash3_64::oneshot_with_seed(seed, data);

From 6c8e6de199637fc666c0b15cdc28ef4323241d8e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 26 Jul 2024 07:58:36 -0400
Subject: [PATCH 097/166] extra

---
 src/xxhash3_64.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 8509043cd..8ec726d34 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -477,6 +477,7 @@ mod scalar {
     #[inline]
     // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
     // https://github.com/llvm/llvm-project/issues/98481
+    // TODO: this is probably if NEON, yeah?
     #[cfg(target_arch = "aarch64")]
     fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
         use core::arch::asm;

From 5f4a1d88a9dab8dea6a81291e5553fb9d232fed5 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 26 Jul 2024 11:53:32 -0400
Subject: [PATCH 098/166] reorder match

Since more data takes longer to hash, prioritize it to even out the
speeds.
---
 src/xxhash3_64.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 8ec726d34..129c2cf50 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -85,19 +85,19 @@ fn derive_secret(seed: u64) -> [u8; 192] {
 #[inline(always)]
 fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     match input.len() {
-        0 => impl_0_bytes(secret, seed),
+        241.. => impl_241_plus_bytes(secret, input),
 
-        1..=3 => impl_1_to_3_bytes(secret, seed, input),
+        129..=240 => impl_129_to_240_bytes(secret, seed, input),
 
-        4..=8 => impl_4_to_8_bytes(secret, seed, input),
+        17..=128 => impl_17_to_128_bytes(secret, seed, input),
 
         9..=16 => impl_9_to_16_bytes(secret, seed, input),
 
-        17..=128 => impl_17_to_128_bytes(secret, seed, input),
+        4..=8 => impl_4_to_8_bytes(secret, seed, input),
 
-        129..=240 => impl_129_to_240_bytes(secret, seed, input),
+        1..=3 => impl_1_to_3_bytes(secret, seed, input),
 
-        _ => impl_241_plus_bytes(secret, input),
+        0 => impl_0_bytes(secret, seed),
     }
 }
 

From 90060cc264663a17c425592ca9eac9436c256cb6 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 26 Jul 2024 11:54:26 -0400
Subject: [PATCH 099/166] use array mix_step everywhere

---
 src/xxhash3_64.rs | 60 ++++++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 37 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 129c2cf50..3095b4627 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -194,71 +194,57 @@ fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     if input.len() > 32 {
         if input.len() > 64 {
             if input.len() > 96 {
-                acc = acc.wrapping_add(mix_step_ff(&fwd[3], &secret[3][0], seed));
-                acc = acc.wrapping_add(mix_step_ff(&bwd[q - 4], &secret[3][1], seed));
+                acc = acc.wrapping_add(mix_step(&fwd[3], &secret[3][0], seed));
+                acc = acc.wrapping_add(mix_step(&bwd[q - 4], &secret[3][1], seed));
             }
 
-            acc = acc.wrapping_add(mix_step_ff(&fwd[2], &secret[2][0], seed));
-            acc = acc.wrapping_add(mix_step_ff(&bwd[q - 3], &secret[2][1], seed));
+            acc = acc.wrapping_add(mix_step(&fwd[2], &secret[2][0], seed));
+            acc = acc.wrapping_add(mix_step(&bwd[q - 3], &secret[2][1], seed));
         }
 
-        acc = acc.wrapping_add(mix_step_ff(&fwd[1], &secret[1][0], seed));
-        acc = acc.wrapping_add(mix_step_ff(&bwd[q - 2], &secret[1][1], seed));
+        acc = acc.wrapping_add(mix_step(&fwd[1], &secret[1][0], seed));
+        acc = acc.wrapping_add(mix_step(&bwd[q - 2], &secret[1][1], seed));
     }
 
-    acc = acc.wrapping_add(mix_step_ff(&fwd[0], &secret[0][0], seed));
-    acc = acc.wrapping_add(mix_step_ff(&bwd[q - 1], &secret[0][1], seed));
+    acc = acc.wrapping_add(mix_step(&fwd[0], &secret[0][0], seed));
+    acc = acc.wrapping_add(mix_step(&bwd[q - 1], &secret[0][1], seed));
 
     avalanche(acc)
 }
 
-#[inline]
-fn mix_step_ff(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 {
-    let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
-    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() };
-
-    let mul_result = {
-        let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();
-        let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128();
-
-        a.wrapping_mul(b)
-    };
-
-    mul_result.lower_half() ^ mul_result.upper_half()
-}
-
 #[inline]
 fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
-    let (head, _tail) = input.bp_as_chunks();
+    let (head, _) = input.bp_as_chunks();
+    let last_chunk = input.last_chunk().unwrap();
     let mut head = head.iter();
 
-    for (i, chunk) in head.by_ref().take(8).enumerate() {
-        acc = acc.wrapping_add(mix_step(chunk, secret, i * 16, seed));
+    let (ss, _) = secret.bp_as_chunks();
+    let (ss2, _) = secret[3..].bp_as_chunks();
+
+    let qq = head.by_ref().zip(ss);
+
+    for (chunk, s) in qq.take(8) {
+        acc = acc.wrapping_add(mix_step(chunk, s, seed));
     }
 
     acc = avalanche(acc);
 
-    for (i, chunk) in head.enumerate() {
-        acc = acc.wrapping_add(mix_step(chunk, secret, i * 16 + 3, seed));
+    for (chunk, s) in head.zip(ss2) {
+        acc = acc.wrapping_add(mix_step(chunk, s, seed));
     }
 
-    acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), secret, 119, seed));
+    let ss3 = &secret[119..].first_chunk().unwrap();
+    acc = acc.wrapping_add(mix_step(last_chunk, ss3, seed));
 
     avalanche(acc)
 }
 
 #[inline]
-fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 {
+fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 {
     let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
-    let secret_words = unsafe {
-        secret
-            .as_ptr()
-            .add(secret_offset)
-            .cast::<[u64; 2]>()
-            .read_unaligned()
-    };
+    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() };
 
     let mul_result = {
         let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();

From 88a738e86e4d2b7df17c64c48b35a6fe83444248 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 12 Aug 2024 13:18:47 -0400
Subject: [PATCH 100/166] Add a streaming implementation for XxHash3_64

---
 Cargo.toml                   |   5 +-
 compare/benches/benchmark.rs |  66 +++-
 compare/src/lib.rs           | 130 +++----
 src/lib.rs                   |   5 +-
 src/xxhash3_64.rs            | 727 ++++++++++++++++++++++++++++++++---
 xx_hash-sys/src/lib.rs       |  49 ++-
 6 files changed, 846 insertions(+), 136 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 53b8ee514..5183cadfb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ members = [
 ]
 
 [features]
-default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "alloc", "std"]
 
 random = ["dep:rand"]
 
@@ -22,7 +22,8 @@ xxhash32 = []
 xxhash64 = []
 xxhash3_64 = []
 
-std = []
+alloc = []
+std = ["alloc"]
 
 [lints.rust.unexpected_cfgs]
 level = "warn"
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 26587bb3e..b009f0e72 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -251,7 +251,71 @@ mod xxhash3_64 {
         g.finish();
     }
 
-    criterion_group!(benches, tiny_data, oneshot);
+    fn streaming(c: &mut Criterion) {
+        let mut g = c.benchmark_group("xxhash3_64/streaming_many_chunks");
+
+        for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
+            for n_chunks in half_sizes(MAX_CHUNKS) {
+                let (seed, chunks) = gen_chunked_data(size, n_chunks);
+                g.throughput(Throughput::Bytes(size as _));
+
+                let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}");
+                g.bench_function(id, |b| {
+                    b.iter(|| {
+                        let mut hasher = c::XxHash3_64::with_seed(seed);
+                        for chunk in &chunks {
+                            hasher.write(chunk);
+                        }
+                        let hash = hasher.finish();
+                        black_box(hash);
+                    })
+                });
+
+                let id = format!("impl-c-scalar/size-{size:07}/chunks-{n_chunks:02}");
+                g.bench_function(id, |b| {
+                    b.iter(|| {
+                        let mut hasher = c::scalar::XxHash3_64::with_seed(seed);
+                        for chunk in &chunks {
+                            hasher.write(chunk);
+                        }
+                        let hash = hasher.finish();
+                        black_box(hash);
+                    })
+                });
+
+                #[cfg(target_arch = "aarch64")]
+                {
+                    let id = format!("impl-c-neon/size-{size:07}/chunks-{n_chunks:02}");
+                    g.bench_function(id, |b| {
+                        b.iter(|| {
+                            let mut hasher = c::neon::XxHash3_64::with_seed(seed);
+                            for chunk in &chunks {
+                                hasher.write(chunk);
+                            }
+                            let hash = hasher.finish();
+                            black_box(hash);
+                        })
+                    });
+                }
+
+                let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}");
+                g.bench_function(id, |b| {
+                    b.iter(|| {
+                        let mut hasher = rust::XxHash3_64::with_seed(seed);
+                        for chunk in &chunks {
+                            hasher.write(chunk);
+                        }
+                        let hash = hasher.finish();
+                        black_box(hash);
+                    })
+                });
+            }
+        }
+
+        g.finish();
+    }
+
+    criterion_group!(benches, tiny_data, oneshot, streaming);
 }
 
 criterion_group!(benches, tiny_data, oneshot, streaming);
diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index b85b1c167..ddda2fea2 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -211,20 +211,20 @@ mod xxhash3_64 {
     use super::*;
 
     proptest! {
-        // #[test]
-        // fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
-        //     oneshot_same_as_one_chunk_impl(seed, &data)?;
-        // }
+        #[test]
+        fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
+            oneshot_same_as_one_chunk_impl(seed, &data)?;
+        }
 
-        // #[test]
-        // fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-        //     oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
-        // }
+        #[test]
+        fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        }
 
-        // #[test]
-        // fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
-        //     oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
-        // }
+        #[test]
+        fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
+            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        }
 
         #[test]
         fn oneshot(seed: u64, data: Vec<u8>) {
@@ -241,46 +241,46 @@ mod xxhash3_64 {
             oneshot_with_secret_impl(&secret, &data)?;
         }
 
-        // #[test]
-        // fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
-        //     streaming_one_chunk_impl(seed, &data)?;
-        // }
+        #[test]
+        fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
+            streaming_one_chunk_impl(seed, &data)?;
+        }
 
-        // #[test]
-        // fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-        //     streaming_one_chunk_impl(seed, &data[offset..])?;
-        // }
+        #[test]
+        fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            streaming_one_chunk_impl(seed, &data[offset..])?;
+        }
     }
 
-    // fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-    //     let oneshot = rust::XxHash64::oneshot(seed, data);
-    //     let one_chunk = {
-    //         let mut hasher = rust::XxHash64::with_seed(seed);
-    //         hasher.write(data);
-    //         hasher.finish()
-    //     };
-
-    //     prop_assert_eq!(oneshot, one_chunk);
-    //     Ok(())
-    // }
-
-    // fn oneshot_same_as_many_chunks_impl(
-    //     seed: u64,
-    //     data: &[u8],
-    //     chunks: &[Vec<u8>],
-    // ) -> TestCaseResult {
-    //     let oneshot = rust::XxHash64::oneshot(seed, data);
-    //     let many_chunks = {
-    //         let mut hasher = rust::XxHash64::with_seed(seed);
-    //         for chunk in chunks {
-    //             hasher.write(chunk);
-    //         }
-    //         hasher.finish()
-    //     };
-
-    //     prop_assert_eq!(oneshot, many_chunks);
-    //     Ok(())
-    // }
+    fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data);
+        let one_chunk = {
+            let mut hasher = rust::XxHash3_64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(oneshot, one_chunk);
+        Ok(())
+    }
+
+    fn oneshot_same_as_many_chunks_impl(
+        seed: u64,
+        data: &[u8],
+        chunks: &[Vec<u8>],
+    ) -> TestCaseResult {
+        let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data);
+        let many_chunks = {
+            let mut hasher = rust::XxHash3_64::with_seed(seed);
+            for chunk in chunks {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
+
+        prop_assert_eq!(oneshot, many_chunks);
+        Ok(())
+    }
 
     fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
         let native = c::XxHash3_64::oneshot_with_seed(seed, data);
@@ -298,22 +298,22 @@ mod xxhash3_64 {
         Ok(())
     }
 
-    // fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-    //     let native = {
-    //         let mut hasher = c::XxHash64::with_seed(seed);
-    //         hasher.write(data);
-    //         hasher.finish()
-    //     };
-
-    //     let rust = {
-    //         let mut hasher = rust::XxHash64::with_seed(seed);
-    //         hasher.write(data);
-    //         hasher.finish()
-    //     };
-
-    //     prop_assert_eq!(native, rust);
-    //     Ok(())
-    // }
+    fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash3_64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash3_64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
 }
 
 fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
diff --git a/src/lib.rs b/src/lib.rs
index 2a6b24eb7..ad243166d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -69,7 +69,10 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
 
-#[cfg(any(doc, test))]
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+#[cfg(any(feature = "std", doc, test))]
 extern crate std;
 
 #[cfg(feature = "xxhash32")]
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 3095b4627..c0e45499a 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,6 +1,6 @@
 #![allow(missing_docs)]
 
-use core::{mem, slice};
+use core::{hash, mem, slice};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 
@@ -32,9 +32,15 @@ const DEFAULT_SECRET: [u8; 192] = [
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 ];
 
+const DEFAULT_BUFFER_LEN: usize = 1024;
+
 pub const SECRET_MINIMUM_LENGTH: usize = 136;
 
-pub struct XxHash3_64;
+pub struct XxHash3_64 {
+    #[cfg(feature = "alloc")]
+    inner: with_alloc::AllocRawHasher,
+    _private: (),
+}
 
 impl XxHash3_64 {
     #[inline(never)]
@@ -44,13 +50,15 @@ impl XxHash3_64 {
 
     #[inline(never)]
     pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
-        let secret = if seed != 0 && input.len() > 240 {
-            &derive_secret(seed)
-        } else {
-            &DEFAULT_SECRET
-        };
+        let mut secret = DEFAULT_SECRET;
 
-        impl_oneshot(secret, seed, input)
+        // We know that the secret will only be used if we have more
+        // than 240 bytes, so don't waste time computing it otherwise.
+        if input.len() > 240 {
+            derive_secret(seed, &mut secret);
+        }
+
+        impl_oneshot(&secret, seed, input)
     }
 
     #[inline(never)]
@@ -60,10 +68,354 @@ impl XxHash3_64 {
     }
 }
 
+/// Holds secret and temporary buffers that are ensured to be
+/// appropriately sized.
+pub struct SecretBuffer<S, B> {
+    seed: u64,
+    secret: S,
+    buffer: B,
+}
+
+impl<S, B> SecretBuffer<S, B>
+where
+    S: AsRef<[u8]>,
+    B: AsRef<[u8]> + AsMut<[u8]>,
+{
+    /// Takes the seed, secret, and buffer and performs no
+    /// modifications to them, only validating that the sizes are
+    /// appropriate.
+    pub fn new(seed: u64, secret: S, buffer: B) -> Result<Self, (S, B)> {
+        let this = Self {
+            seed,
+            secret,
+            buffer,
+        };
+
+        if this.is_valid() {
+            Ok(this)
+        } else {
+            Err(this.decompose())
+        }
+    }
+
+    fn is_valid(&self) -> bool {
+        let secret = self.secret.as_ref();
+
+        assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: return result
+
+        let required_buffer_len = block_size(secret);
+        let buffer_len = self.buffer.as_ref().len();
+
+        required_buffer_len == buffer_len
+    }
+
+    /// Returns the secret and buffer values.
+    pub fn decompose(self) -> (S, B) {
+        (self.secret, self.buffer)
+    }
+}
+
+impl SecretBuffer<&'static [u8; 192], [u8; 1024]> {
+    /// Use the default seed and secret values while allocating nothing.
+    ///
+    /// Note that this type may take up a surprising amount of stack space.
+    #[inline]
+    pub const fn default() -> Self {
+        SecretBuffer {
+            seed: DEFAULT_SEED,
+            secret: &DEFAULT_SECRET,
+            buffer: [0; DEFAULT_BUFFER_LEN],
+        }
+    }
+}
+
+#[cfg(feature = "alloc")]
+#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
+mod with_alloc {
+    use ::alloc::{boxed::Box, vec};
+
+    use super::*;
+
+    impl XxHash3_64 {
+        pub fn new() -> Self {
+            Self {
+                inner: RawHasher::allocate_default(),
+                _private: (),
+            }
+        }
+
+        pub fn with_seed(seed: u64) -> Self {
+            Self {
+                inner: RawHasher::allocate_with_seed(seed),
+                _private: (),
+            }
+        }
+
+        pub fn with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
+            Self {
+                inner: RawHasher::allocate_with_seed_and_secret(seed, secret),
+                _private: (),
+            }
+        }
+    }
+
+    impl Default for XxHash3_64 {
+        fn default() -> Self {
+            Self::new()
+        }
+    }
+
+    impl hash::Hasher for XxHash3_64 {
+        #[inline]
+        fn write(&mut self, input: &[u8]) {
+            self.inner.write(input)
+        }
+
+        #[inline]
+        fn finish(&self) -> u64 {
+            self.inner.finish()
+        }
+    }
+
+    type AllocSecretBuffer = SecretBuffer<Box<[u8]>, Box<[u8]>>;
+
+    impl AllocSecretBuffer {
+        /// Allocates the secret and temporary buffers and fills them
+        /// with the default seed and secret values.
+        pub fn allocate_default() -> Self {
+            Self {
+                seed: DEFAULT_SEED,
+                secret: DEFAULT_SECRET.to_vec().into(),
+                buffer: vec![0; DEFAULT_BUFFER_LEN].into(),
+            }
+        }
+
+        /// Allocates the secret and temporary buffers and uses the
+        /// provided seed to construct the secret value.
+        pub fn allocate_with_seed(seed: u64) -> Self {
+            let mut secret = DEFAULT_SECRET;
+            derive_secret(seed, &mut secret);
+
+            Self {
+                seed,
+                secret: secret.to_vec().into(),
+                buffer: vec![0; DEFAULT_BUFFER_LEN].into(),
+            }
+        }
+
+        /// Allocates the temporary buffer and uses the provided seed
+        /// and secret buffer.
+        pub fn allocate_with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
+            let secret = secret.into();
+            assert!(secret.len() > SECRET_MINIMUM_LENGTH); // todo result
+            let block_size = block_size(&secret);
+
+            Self {
+                seed,
+                secret,
+                buffer: vec![0; block_size].into(),
+            }
+        }
+    }
+
+    pub type AllocRawHasher = RawHasher<Box<[u8]>, Box<[u8]>>;
+
+    impl AllocRawHasher {
+        fn allocate_default() -> Self {
+            Self::new(SecretBuffer::allocate_default())
+        }
+
+        fn allocate_with_seed(seed: u64) -> Self {
+            Self::new(SecretBuffer::allocate_with_seed(seed))
+        }
+
+        fn allocate_with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
+            Self::new(SecretBuffer::allocate_with_seed_and_secret(seed, secret))
+        }
+    }
+}
+
+impl<S, B> SecretBuffer<S, B>
+where
+    S: AsRef<[u8]> + AsMut<[u8]>,
+    B: AsRef<[u8]> + AsMut<[u8]>,
+{
+    /// Fills the secret buffer with a secret derived from the seed
+    /// and the default secret.
+    pub fn with_seed(seed: u64, mut secret: S, buffer: B) -> Result<Self, (S, B)> {
+        let secret_slice: &mut [u8; 192] = match secret.as_mut().try_into() {
+            Ok(s) => s,
+            Err(_) => return Err((secret, buffer)),
+        };
+
+        *secret_slice = DEFAULT_SECRET;
+        derive_secret(seed, secret_slice);
+
+        Self::new(seed, secret, buffer)
+    }
+}
+
+/// A lower-level interface for computing a hash from streaming data.
+///
+/// The algorithm requires two reasonably large pieces of data: the
+/// secret and a temporary buffer. [`XxHash3_64`][] makes one concrete
+/// implementation decision that uses dynamic memory allocation, but
+/// specialized usages may desire more flexibility. This type,
+/// combined with [`SecretBuffer`][], offer that flexibility at the
+/// cost of a generic type.
+pub struct RawHasher<S, B> {
+    secret_buffer: SecretBuffer<S, B>,
+    buffer_len: usize,
+    accumulator: [u64; 8],
+    total_bytes: usize,
+}
+
+impl<S, B> RawHasher<S, B> {
+    pub fn new(secret_buffer: SecretBuffer<S, B>) -> Self {
+        Self {
+            secret_buffer,
+            buffer_len: 0,
+            accumulator: INITIAL_ACCUMULATORS,
+            total_bytes: 0,
+        }
+    }
+}
+
+impl<S, B> hash::Hasher for RawHasher<S, B>
+where
+    S: AsRef<[u8]>,
+    B: AsRef<[u8]> + AsMut<[u8]>,
+{
+    #[inline]
+    fn write(&mut self, mut input: &[u8]) {
+        if input.is_empty() {
+            return;
+        }
+
+        let Self {
+            secret_buffer,
+            buffer_len,
+            accumulator,
+            total_bytes,
+        } = self;
+        let SecretBuffer {
+            seed: _,
+            secret,
+            buffer,
+        } = secret_buffer;
+        let secret = secret.as_ref();
+        let buffer = buffer.as_mut();
+        let input_len = input.len();
+
+        // Short-circuit if the buffer is empty and we have one or
+        // more full buffers-worth on the input.
+        if buffer.is_empty() {
+            let (blocks, remainder) = unsafe { chunks_and_last(input, buffer.len()) };
+            detect::rounds(accumulator, blocks, secret);
+            input = remainder;
+        }
+
+        while !input.is_empty() {
+            let remaining = &mut buffer[*buffer_len..];
+            let n_to_copy = usize::min(remaining.len(), input.len());
+
+            let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy);
+            let (input_head, input_tail) = input.split_at(n_to_copy);
+
+            remaining_head.copy_from_slice(input_head);
+            *buffer_len += n_to_copy;
+
+            // We have not filled the whole buffer, no need to
+            // process it now
+            if !remaining_tail.is_empty() {
+                break;
+            }
+
+            // We filled the buffer, but we don't know we have
+            // more data so we have to leave it in case it is the
+            // last full block.
+            if input_tail.is_empty() {
+                break;
+            }
+
+            // We have a full buffer *and* we know there's more
+            // data after the buffer, so we can process this as a
+            // full block.
+            detect::rounds(accumulator, [&*buffer], secret);
+            *buffer_len = 0;
+
+            input = input_tail;
+        }
+
+        *total_bytes += input_len;
+    }
+
+    #[inline]
+    fn finish(&self) -> u64 {
+        let Self {
+            ref secret_buffer,
+            buffer_len,
+            accumulator,
+            total_bytes,
+        } = *self;
+        let SecretBuffer {
+            seed,
+            ref secret,
+            ref buffer,
+        } = *secret_buffer;
+
+        let secret = secret.as_ref();
+        let buffer = buffer.as_ref();
+
+        let input = &buffer[..buffer_len];
+
+        match total_bytes {
+            241.. => {
+                let mut temp = [0; 64];
+
+                let last_stripe = match input.last_chunk() {
+                    Some(chunk) => chunk,
+                    None => {
+                        let n_to_reuse = 64 - input.len();
+                        let to_reuse = buffer.len() - n_to_reuse;
+
+                        let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse);
+                        temp_head.copy_from_slice(&buffer[to_reuse..]);
+                        temp_tail.copy_from_slice(input);
+
+                        &temp
+                    }
+                };
+
+                detect::finalize(accumulator, input, last_stripe, secret, total_bytes)
+            }
+
+            129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input),
+
+            17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input),
+
+            9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input),
+
+            4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input),
+
+            1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input),
+
+            0 => impl_0_bytes(&DEFAULT_SECRET, seed),
+        }
+    }
+}
+
+/// # Correctness
+///
+/// This function assumes that the incoming buffer has been populated
+/// with the default secret.
 #[inline]
-fn derive_secret(seed: u64) -> [u8; 192] {
-    let mut derived_secret = DEFAULT_SECRET;
-    let base = derived_secret.as_mut_ptr().cast::<u64>();
+fn derive_secret(seed: u64, secret: &mut [u8; 192]) {
+    if seed == DEFAULT_SEED {
+        return;
+    }
+
+    let base = secret.as_mut_ptr().cast::<u64>();
 
     for i in 0..12 {
         let a_p = unsafe { base.add(i * 2) };
@@ -78,8 +430,6 @@ fn derive_secret(seed: u64) -> [u8; 192] {
         unsafe { a_p.write_unaligned(a) };
         unsafe { b_p.write_unaligned(b) };
     }
-
-    derived_secret
 }
 
 #[inline(always)]
@@ -286,6 +636,11 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     detect::oneshot(secret, input)
 }
 
+fn block_size(secret: &[u8]) -> usize {
+    let stripes_per_block = (secret.len() - 64) / 8;
+    64 * stripes_per_block
+}
+
 struct Algorithm<V>(V);
 
 impl<V: Vector> Algorithm<V> {
@@ -296,42 +651,37 @@ impl<V: Vector> Algorithm<V> {
         assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
         assert!(input.len() >= 241);
 
-        let stripes_per_block = (secret.len() - 64) / 8;
-        let block_size = 64 * stripes_per_block;
+        let block_size = block_size(secret);
 
-        let mut blocks = input.chunks_exact(block_size);
-        let last_block = if blocks.remainder().is_empty() {
-            // SAFETY: We know that `input` is non-empty, which means
-            // that either there will be a remainder or one or more
-            // full blocks. That info isn't flowing to the optimizer,
-            // so we use `unwrap_unchecked`.
-            unsafe { blocks.next_back().unwrap_unchecked() }
-        } else {
-            blocks.remainder()
-        };
+        let (blocks, last_block) = unsafe { chunks_and_last(input, block_size) };
+
+        self.rounds(&mut acc, blocks, secret);
+
+        let len = input.len();
 
         let last_stripe: &[u8; 64] = unsafe {
             &*input
                 .as_ptr()
-                .add(input.len())
+                .add(len)
                 .sub(mem::size_of::<[u8; 64]>())
                 .cast()
         };
 
+        self.finalize(acc, last_block, last_stripe, secret, len)
+    }
+
+    #[inline]
+    fn rounds<'a>(
+        &self,
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
         for block in blocks {
             let (stripes, _) = block.bp_as_chunks();
 
-            self.round(&mut acc, stripes, secret);
+            self.round(acc, stripes, secret);
         }
-
-        self.last_round(&mut acc, last_block, last_stripe, secret);
-
-        self.final_merge(
-            &mut acc,
-            input.len().into_u64().wrapping_mul(PRIME64_1),
-            secret,
-            11,
-        )
     }
 
     #[inline]
@@ -350,6 +700,20 @@ impl<V: Vector> Algorithm<V> {
         }
     }
 
+    #[inline]
+    fn finalize(
+        &self,
+        mut acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        self.last_round(&mut acc, last_block, last_stripe, secret);
+
+        self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret, 11)
+    }
+
     #[inline]
     fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
         // Accumulation steps are run for the stripes in the last block,
@@ -399,6 +763,26 @@ impl<V: Vector> Algorithm<V> {
     }
 }
 
+/// # Safety
+/// `input` must be non-empty.
+unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExact<'_, u8>, &[u8]) {
+    debug_assert!(!input.is_empty());
+
+    let mut blocks = input.chunks_exact(block_size);
+
+    let last_block = if blocks.remainder().is_empty() {
+        // SAFETY: We know that `input` is non-empty, which means
+        // that either there will be a remainder or one or more
+        // full blocks. That info isn't flowing to the optimizer,
+        // so we use `unwrap_unchecked`.
+        unsafe { blocks.next_back().unwrap_unchecked() }
+    } else {
+        blocks.remainder()
+    };
+
+    (blocks, last_block)
+}
+
 trait Vector {
     fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]);
 
@@ -415,6 +799,26 @@ mod scalar {
         super::Algorithm(Impl).oneshot(secret, input)
     }
 
+    #[inline]
+    pub fn rounds<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        super::Algorithm(Impl).rounds(acc, blocks, secret)
+    }
+
+    #[inline]
+    pub fn finalize(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        super::Algorithm(Impl).finalize(acc, last_block, last_stripe, secret, len)
+    }
+
     use super::{SliceBackport as _, Vector, PRIME32_1};
 
     pub struct Impl;
@@ -498,6 +902,32 @@ mod neon {
         super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
+    /// # Safety
+    /// You must ensure that the CPU has the NEON feature
+    #[inline]
+    #[target_feature(enable = "neon")]
+    pub unsafe fn rounds_unchecked<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret)
+    }
+
+    /// # Safety
+    /// You must ensure that the CPU has the NEON feature
+    #[inline]
+    #[target_feature(enable = "neon")]
+    pub unsafe fn finalize_unchecked(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len)
+    }
+
     struct Impl(());
 
     impl Impl {
@@ -668,13 +1098,40 @@ mod neon {
 
 #[cfg(all(target_arch = "aarch64", feature = "std"))]
 mod aarch64_detect {
+    macro_rules! pick {
+        ($f:ident, $s:ident, $($t:tt)+) => {
+            if std::arch::is_aarch64_feature_detected!("neon") {
+                return unsafe { super::neon::$f $($t)+ };
+            }
+
+            super::scalar::$s $($t)+
+
+        };
+    }
+
     #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        if std::arch::is_aarch64_feature_detected!("neon") {
-            return unsafe { super::neon::oneshot_unchecked(secret, input) };
-        }
+        pick! { oneshot_unchecked, oneshot, (secret, input) }
+    }
 
-        super::scalar::oneshot(secret, input)
+    #[inline]
+    pub fn rounds<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        pick! { rounds_unchecked, rounds, (acc, blocks, secret) }
+    }
+
+    #[inline]
+    pub fn finalize(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) }
     }
 }
 
@@ -845,16 +1302,49 @@ mod x86_64_detect {
 }
 
 mod detect {
+    macro_rules! pick {
+        ($e:expr) => {
+            #[cfg(all(target_arch = "aarch64", feature = "std"))]
+            {
+                use super::aarch64_detect::*;
+                return $e;
+            }
+
+            #[cfg(all(target_arch = "x86_64", feature = "std"))]
+            {
+                use super::x86_64_detect::*;
+                return $e;
+            }
+
+            use super::scalar::*;
+            #[allow(unreachable_code)]
+            $e
+        };
+    }
+
     #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        #[cfg(all(target_arch = "aarch64", feature = "std"))]
-        return super::aarch64_detect::oneshot(secret, input);
+        pick! { oneshot(secret, input) }
+    }
 
-        #[cfg(all(target_arch = "x86_64", feature = "std"))]
-        return super::x86_64_detect::oneshot(secret, input);
+    #[inline]
+    pub fn rounds<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        pick! { rounds(acc, blocks, secret) }
+    }
 
-        #[allow(unreachable_code)]
-        super::scalar::oneshot(secret, input)
+    #[inline]
+    pub fn finalize(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        pick! { finalize(acc, last_block, last_stripe, secret, len) }
     }
 }
 
@@ -949,10 +1439,31 @@ impl<T> SliceBackport<T> for [T] {
 
 #[cfg(test)]
 mod test {
-    use std::array;
+    use std::{array, hash::Hasher};
 
     use super::*;
 
+    #[test]
+    fn secret_buffer_default_is_valid() {
+        assert!(SecretBuffer::default().is_valid());
+    }
+
+    #[test]
+    fn secret_buffer_allocate_default_is_valid() {
+        assert!(SecretBuffer::allocate_default().is_valid())
+    }
+
+    #[test]
+    fn secret_buffer_allocate_with_seed_is_valid() {
+        assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid())
+    }
+
+    #[test]
+    fn secret_buffer_allocate_with_seed_and_secret_is_valid() {
+        let secret = [42; 1024];
+        assert!(SecretBuffer::allocate_with_seed_and_secret(0xdead_beef, secret).is_valid())
+    }
+
     macro_rules! bytes {
         ($($n: literal),* $(,)?) => {
             &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]]
@@ -965,14 +1476,46 @@ mod test {
         array::from_fn(|i| (i % 251) as u8)
     }
 
+    fn hash_byte_by_byte(input: &[u8]) -> u64 {
+        let mut hasher = XxHash3_64::new();
+        for byte in input.chunks(1) {
+            hasher.write(byte)
+        }
+        hasher.finish()
+    }
+
+    fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 {
+        let mut hasher = XxHash3_64::with_seed(seed);
+        for byte in input.chunks(1) {
+            hasher.write(byte)
+        }
+        hasher.finish()
+    }
+
     #[test]
-    fn hash_empty() {
+    fn oneshot_empty() {
         let hash = XxHash3_64::oneshot(&[]);
         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
     }
 
     #[test]
-    fn hash_1_to_3_bytes() {
+    fn streaming_empty() {
+        let hash = hash_byte_by_byte(&[]);
+        assert_eq!(hash, 0x2d06_8005_38d3_94c2);
+    }
+
+    #[test]
+    fn oneshot_1_to_3_bytes() {
+        test_1_to_3_bytes(XxHash3_64::oneshot)
+    }
+
+    #[test]
+    fn streaming_1_to_3_bytes() {
+        test_1_to_3_bytes(hash_byte_by_byte)
+    }
+
+    #[track_caller]
+    fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) {
         let inputs = bytes![1, 2, 3];
 
         let expected = [
@@ -982,13 +1525,23 @@ mod test {
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
-            let hash = XxHash3_64::oneshot(input);
+            let hash = f(input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
-    fn hash_4_to_8_bytes() {
+    fn oneshot_4_to_8_bytes() {
+        test_4_to_8_bytes(XxHash3_64::oneshot)
+    }
+
+    #[test]
+    fn streaming_4_to_8_bytes() {
+        test_4_to_8_bytes(hash_byte_by_byte)
+    }
+
+    #[track_caller]
+    fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) {
         let inputs = bytes![4, 5, 6, 7, 8];
 
         let expected = [
@@ -1000,13 +1553,23 @@ mod test {
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
-            let hash = XxHash3_64::oneshot(input);
+            let hash = f(input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
-    fn hash_9_to_16_bytes() {
+    fn oneshot_9_to_16_bytes() {
+        test_9_to_16_bytes(XxHash3_64::oneshot)
+    }
+
+    #[test]
+    fn streaming_9_to_16_bytes() {
+        test_9_to_16_bytes(hash_byte_by_byte)
+    }
+
+    #[track_caller]
+    fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) {
         let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
 
         let expected = [
@@ -1021,13 +1584,23 @@ mod test {
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
-            let hash = XxHash3_64::oneshot(input);
+            let hash = f(input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
-    fn hash_17_to_128_bytes() {
+    fn oneshot_17_to_128_bytes() {
+        test_17_to_128_bytes(XxHash3_64::oneshot)
+    }
+
+    #[test]
+    fn streaming_17_to_128_bytes() {
+        test_17_to_128_bytes(hash_byte_by_byte)
+    }
+
+    #[track_caller]
+    fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) {
         let lower_boundary = bytes![17, 18, 19];
         let chunk_boundary = bytes![31, 32, 33];
         let upper_boundary = bytes![126, 127, 128];
@@ -1053,13 +1626,23 @@ mod test {
         ];
 
         for (input, expected) in inputs.zip(expected) {
-            let hash = XxHash3_64::oneshot(input);
+            let hash = f(input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
-    fn hash_129_to_240_bytes() {
+    fn oneshot_129_to_240_bytes() {
+        test_129_to_240_bytes(XxHash3_64::oneshot)
+    }
+
+    #[test]
+    fn streaming_129_to_240_bytes() {
+        test_129_to_240_bytes(hash_byte_by_byte)
+    }
+
+    #[track_caller]
+    fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) {
         let lower_boundary = bytes![129, 130, 131];
         let upper_boundary = bytes![238, 239, 240];
 
@@ -1077,13 +1660,23 @@ mod test {
         ];
 
         for (input, expected) in inputs.zip(expected) {
-            let hash = XxHash3_64::oneshot(input);
+            let hash = f(input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
-    fn hash_241_plus_bytes() {
+    fn oneshot_241_plus_bytes() {
+        test_241_plus_bytes(XxHash3_64::oneshot)
+    }
+
+    #[test]
+    fn streaming_241_plus_bytes() {
+        test_241_plus_bytes(hash_byte_by_byte)
+    }
+
+    #[track_caller]
+    fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) {
         let inputs = bytes![241, 242, 243, 244, 1024, 10240];
 
         let expected = [
@@ -1096,13 +1689,23 @@ mod test {
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
-            let hash = XxHash3_64::oneshot(input);
+            let hash = f(input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
 
     #[test]
-    fn hash_with_seed() {
+    fn oneshot_with_seed() {
+        test_with_seed(XxHash3_64::oneshot_with_seed)
+    }
+
+    #[test]
+    fn streaming_with_seed() {
+        test_with_seed(hash_byte_by_byte_with_seed)
+    }
+
+    #[track_caller]
+    fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) {
         let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024];
 
         let expected = [
@@ -1117,7 +1720,7 @@ mod test {
         ];
 
         for (input, expected) in inputs.iter().zip(expected) {
-            let hash = XxHash3_64::oneshot_with_seed(0xdead_cafe, input);
+            let hash = f(0xdead_cafe, input);
             assert_eq!(hash, expected, "input was {} bytes", input.len());
         }
     }
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 14d132aa5..18b4365fb 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -143,6 +143,7 @@ extern "C" {
 
     fn XXH3_createState() -> *mut XXH3_state_t;
     fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+    fn XXH3_64bits_reset_withSeed(state: *mut XXH3_state_t, seed: XXH64_hash_t) -> XXH_errorcode;
     fn XXH3_64bits_update(
         state: *mut XXH3_state_t,
         buffer: *const libc::c_void,
@@ -174,7 +175,7 @@ impl XxHash3_64 {
         }
     }
 
-    pub fn with_seed() -> Self {
+    pub fn new() -> Self {
         let state = unsafe {
             let state = XXH3_createState();
             XXH3_64bits_reset(state);
@@ -184,6 +185,16 @@ impl XxHash3_64 {
         Self(state)
     }
 
+    pub fn with_seed(seed: u64) -> Self {
+        let state = unsafe {
+            let state = XXH3_createState();
+            XXH3_64bits_reset_withSeed(state, seed);
+            state
+        };
+
+        Self(state)
+    }
+
     pub fn write(&mut self, data: &[u8]) {
         let retval = unsafe { XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
         assert_eq!(retval, XXH_OK);
@@ -222,6 +233,10 @@ pub mod scalar {
 
         fn scalar_XXH3_createState() -> *mut XXH3_state_t;
         fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn scalar_XXH3_64bits_reset_withSeed(
+            state: *mut XXH3_state_t,
+            seed: XXH64_hash_t,
+        ) -> XXH_errorcode;
         fn scalar_XXH3_64bits_update(
             state: *mut XXH3_state_t,
             buffer: *const libc::c_void,
@@ -253,7 +268,7 @@ pub mod scalar {
             }
         }
 
-        pub fn with_seed() -> Self {
+        pub fn new() -> Self {
             let state = unsafe {
                 let state = scalar_XXH3_createState();
                 scalar_XXH3_64bits_reset(state);
@@ -263,6 +278,16 @@ pub mod scalar {
             Self(state)
         }
 
+        pub fn with_seed(seed: u64) -> Self {
+            let state = unsafe {
+                let state = scalar_XXH3_createState();
+                scalar_XXH3_64bits_reset_withSeed(state, seed);
+                state
+            };
+
+            Self(state)
+        }
+
         pub fn write(&mut self, data: &[u8]) {
             let retval =
                 unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
@@ -304,6 +329,10 @@ pub mod neon {
 
         fn neon_XXH3_createState() -> *mut XXH3_state_t;
         fn neon_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn neon_XXH3_64bits_reset_withSeed(
+            state: *mut XXH3_state_t,
+            seed: XXH64_hash_t,
+        ) -> XXH_errorcode;
         fn neon_XXH3_64bits_update(
             state: *mut XXH3_state_t,
             buffer: *const libc::c_void,
@@ -335,7 +364,7 @@ pub mod neon {
             }
         }
 
-        pub fn with_seed() -> Self {
+        pub fn new() -> Self {
             let state = unsafe {
                 let state = neon_XXH3_createState();
                 neon_XXH3_64bits_reset(state);
@@ -345,6 +374,16 @@ pub mod neon {
             Self(state)
         }
 
+        pub fn with_seed(seed: u64) -> Self {
+            let state = unsafe {
+                let state = neon_XXH3_createState();
+                neon_XXH3_64bits_reset_withSeed(state, seed);
+                state
+            };
+
+            Self(state)
+        }
+
         pub fn write(&mut self, data: &[u8]) {
             let retval =
                 unsafe { neon_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
@@ -417,7 +456,7 @@ pub mod avx2 {
             }
         }
 
-        pub fn with_seed() -> Self {
+        pub fn new() -> Self {
             let state = unsafe {
                 let state = avx2_XXH3_createState();
                 avx2_XXH3_64bits_reset(state);
@@ -497,7 +536,7 @@ pub mod sse2 {
             }
         }
 
-        pub fn with_seed() -> Self {
+        pub fn new() -> Self {
             let state = unsafe {
                 let state = sse2_XXH3_createState();
                 sse2_XXH3_64bits_reset(state);

From 2485882a08de8ca8b97585dbbc0ab43d67cc2927 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 13 Aug 2024 15:34:26 -0400
Subject: [PATCH 101/166] Testing on x86

---
 asmasm/src/main.rs           | 21 ++++++--
 compare/benches/benchmark.rs | 27 ++++++++++
 src/xxhash3_64.rs            | 97 ++++++++++++++++++++++++++++++++----
 xx_hash-sys/src/lib.rs       | 28 +++++++++++
 4 files changed, 161 insertions(+), 12 deletions(-)

diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs
index e515bcdfc..706702566 100644
--- a/asmasm/src/main.rs
+++ b/asmasm/src/main.rs
@@ -1,4 +1,4 @@
-use std::{hint::black_box, time::Instant};
+use std::{hash::Hasher, hint::black_box, time::Instant};
 use xx_hash_sys::XxHash3_64 as C;
 use xx_renu::xxhash3_64::XxHash3_64;
 
@@ -8,18 +8,33 @@ fn main() {
         .nth(2)
         .map_or(false, |a| a.eq_ignore_ascii_case("C"));
     let file = std::fs::read(filename).expect("read");
+    let seed = 0xdead_beef;
 
     if use_c {
         let start = Instant::now();
-        let hash = C::oneshot(&file);
+        let hash = do_c(seed, &file);
         let elapsed = start.elapsed();
         black_box(hash);
         eprintln!("C    {elapsed:?}");
     } else {
         let start = Instant::now();
-        let hash = XxHash3_64::oneshot(&file);
+        let hash = do_rust(seed, &file);
         let elapsed = start.elapsed();
         black_box(hash);
         eprintln!("Rust {elapsed:?}");
     }
 }
+
+#[inline(never)]
+fn do_c(seed: u64, file: &[u8]) -> u64 {
+    let mut hasher = C::with_seed(seed);
+    hasher.write(file);
+    hasher.finish()
+}
+
+#[inline(never)]
+fn do_rust(seed: u64, file: &[u8]) -> u64 {
+    let mut hasher = XxHash3_64::with_seed(seed);
+    hasher.write(&file);
+    hasher.finish()
+}
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index b009f0e72..52000e427 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -298,6 +298,33 @@ mod xxhash3_64 {
                     });
                 }
 
+                #[cfg(target_arch = "x86_64")]
+                {
+                    let id = format!("impl-c-avx2/size-{size:07}/chunks-{n_chunks:02}");
+                    g.bench_function(id, |b| {
+                        b.iter(|| {
+                            let mut hasher = c::avx2::XxHash3_64::with_seed(seed);
+                            for chunk in &chunks {
+                                hasher.write(chunk);
+                            }
+                            let hash = hasher.finish();
+                            black_box(hash);
+                        })
+                    });
+
+                    let id = format!("impl-c-sse2/size-{size:07}/chunks-{n_chunks:02}");
+                    g.bench_function(id, |b| {
+                        b.iter(|| {
+                            let mut hasher = c::sse2::XxHash3_64::with_seed(seed);
+                            for chunk in &chunks {
+                                hasher.write(chunk);
+                            }
+                            let hash = hasher.finish();
+                            black_box(hash);
+                        })
+                    });
+                }
+
                 let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}");
                 g.bench_function(id, |b| {
                     b.iter(|| {
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index c0e45499a..d47c62ca5 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -286,7 +286,7 @@ where
     S: AsRef<[u8]>,
     B: AsRef<[u8]> + AsMut<[u8]>,
 {
-    #[inline]
+    #[inline(never)]
     fn write(&mut self, mut input: &[u8]) {
         if input.is_empty() {
             return;
@@ -350,7 +350,7 @@ where
         *total_bytes += input_len;
     }
 
-    #[inline]
+    #[inline(never)]
     fn finish(&self) -> u64 {
         let Self {
             ref secret_buffer,
@@ -765,6 +765,7 @@ impl<V: Vector> Algorithm<V> {
 
 /// # Safety
 /// `input` must be non-empty.
+#[inline]
 unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExact<'_, u8>, &[u8]) {
     debug_assert!(!input.is_empty());
 
@@ -1149,6 +1150,32 @@ mod avx2 {
         super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
+    /// # Safety
+    /// You must ensure that the CPU has the AVX2 feature
+    #[inline]
+    #[target_feature(enable = "avx2")]
+    pub unsafe fn rounds_unchecked<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret)
+    }
+
+    /// # Safety
+    /// You must ensure that the CPU has the AVX2 feature
+    #[inline]
+    #[target_feature(enable = "avx2")]
+    pub unsafe fn finalize_unchecked(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len)
+    }
+
     pub struct Impl(super::scalar::Impl);
 
     impl Impl {
@@ -1224,6 +1251,32 @@ mod sse2 {
         super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
+    /// # Safety
+    /// You must ensure that the CPU has the SSE2 feature
+    #[inline]
+    #[target_feature(enable = "sse2")]
+    pub unsafe fn rounds_unchecked<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret)
+    }
+
+    /// # Safety
+    /// You must ensure that the CPU has the SSE2 feature
+    #[inline]
+    #[target_feature(enable = "sse2")]
+    pub unsafe fn finalize_unchecked(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len)
+    }
+
     pub struct Impl(super::scalar::Impl);
 
     impl Impl {
@@ -1287,17 +1340,43 @@ mod sse2 {
 
 #[cfg(all(target_arch = "x86_64", feature = "std"))]
 mod x86_64_detect {
+    macro_rules! pick {
+        ($f:ident, $s:ident, $($t:tt)+) => {
+            if std::arch::is_x86_feature_detected!("avx2") {
+                return unsafe { super::avx2::$f $($t)+ };
+            }
+
+            if std::arch::is_x86_feature_detected!("sse2") {
+                return unsafe { super::sse2::$f $($t)+ };
+            }
+
+            super::scalar::$s $($t)+
+        };
+    }
+
     #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        if std::arch::is_x86_feature_detected!("avx2") {
-            return unsafe { super::avx2::oneshot_unchecked(secret, input) };
-        }
+        pick! { oneshot_unchecked, oneshot, (secret, input) }
+    }
 
-        if std::arch::is_x86_feature_detected!("sse2") {
-            return unsafe { super::sse2::oneshot_unchecked(secret, input) };
-        }
+    #[inline]
+    pub fn rounds<'a>(
+        acc: &mut [u64; 8],
+        blocks: impl IntoIterator<Item = &'a [u8]>,
+        secret: &[u8],
+    ) {
+        pick! { rounds_unchecked, rounds, (acc, blocks, secret) }
+    }
 
-        super::scalar::oneshot(secret, input)
+    #[inline]
+    pub fn finalize(
+        acc: [u64; 8],
+        last_block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &[u8],
+        len: usize,
+    ) -> u64 {
+        pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) }
     }
 }
 
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 18b4365fb..d2aae50a8 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -425,6 +425,10 @@ pub mod avx2 {
 
         fn avx2_XXH3_createState() -> *mut XXH3_state_t;
         fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn avx2_XXH3_64bits_reset_withSeed(
+            state: *mut XXH3_state_t,
+            seed: XXH64_hash_t,
+        ) -> XXH_errorcode;
         fn avx2_XXH3_64bits_update(
             state: *mut XXH3_state_t,
             buffer: *const libc::c_void,
@@ -466,6 +470,16 @@ pub mod avx2 {
             Self(state)
         }
 
+        pub fn with_seed(seed: u64) -> Self {
+            let state = unsafe {
+                let state = avx2_XXH3_createState();
+                avx2_XXH3_64bits_reset_withSeed(state, seed);
+                state
+            };
+
+            Self(state)
+        }
+
         pub fn write(&mut self, data: &[u8]) {
             let retval =
                 unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
@@ -505,6 +519,10 @@ pub mod sse2 {
 
         fn sse2_XXH3_createState() -> *mut XXH3_state_t;
         fn sse2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
+        fn sse2_XXH3_64bits_reset_withSeed(
+            state: *mut XXH3_state_t,
+            seed: XXH64_hash_t,
+        ) -> XXH_errorcode;
         fn sse2_XXH3_64bits_update(
             state: *mut XXH3_state_t,
             buffer: *const libc::c_void,
@@ -546,6 +564,16 @@ pub mod sse2 {
             Self(state)
         }
 
+        pub fn with_seed(seed: u64) -> Self {
+            let state = unsafe {
+                let state = sse2_XXH3_createState();
+                sse2_XXH3_64bits_reset_withSeed(state, seed);
+                state
+            };
+
+            Self(state)
+        }
+
         pub fn write(&mut self, data: &[u8]) {
             let retval =
                 unsafe { sse2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };

From 79d3d996d062f052bda805ab97826178e2ff2aa5 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 13 Aug 2024 19:02:08 -0400
Subject: [PATCH 102/166] Use paste to reduce duplication of C wrappers

---
 xx_hash-sys/Cargo.toml |   1 +
 xx_hash-sys/src/lib.rs | 548 ++++++++---------------------------------
 2 files changed, 104 insertions(+), 445 deletions(-)

diff --git a/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml
index dd96308c0..4218caa0b 100644
--- a/xx_hash-sys/Cargo.toml
+++ b/xx_hash-sys/Cargo.toml
@@ -6,6 +6,7 @@ publish = false
 
 [dependencies]
 libc = { version = "0.2.155", default-features = false }
+paste = { version = "1.0.15", default-features = false }
 
 [build-dependencies]
 cc = { version = "1.1.6", default-features = false }
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index d2aae50a8..0de80c556 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -127,468 +127,126 @@ pub struct XXH3_state_t {
     _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
 }
 
-extern "C" {
-    fn XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-    fn XXH3_64bits_withSeed(
-        input: *const libc::c_void,
-        length: libc::size_t,
-        seed: XXH64_hash_t,
-    ) -> XXH64_hash_t;
-    fn XXH3_64bits_withSecret(
-        input: *const libc::c_void,
-        length: libc::size_t,
-        secret: *const libc::c_void,
-        secret_length: libc::size_t,
-    ) -> XXH64_hash_t;
-
-    fn XXH3_createState() -> *mut XXH3_state_t;
-    fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-    fn XXH3_64bits_reset_withSeed(state: *mut XXH3_state_t, seed: XXH64_hash_t) -> XXH_errorcode;
-    fn XXH3_64bits_update(
-        state: *mut XXH3_state_t,
-        buffer: *const libc::c_void,
-        length: libc::size_t,
-    ) -> XXH_errorcode;
-    fn XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-    fn XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-}
-
-pub struct XxHash3_64(*mut XXH3_state_t);
-
-impl XxHash3_64 {
-    pub fn oneshot(data: &[u8]) -> u64 {
-        unsafe { XXH3_64bits(data.as_ptr().cast(), data.len()) }
-    }
+/// Constructs a wrapper around the XXH3_64bit familiy of functions as
+/// we compile the library in multiple modes to performance test
+/// against.
+macro_rules! xxh3_64b_template {
+    () => { crate::xxh3_64b_template!(@ XXH3); };
+
+    ($prefix: ident) => { ::paste::paste! { crate::xxh3_64b_template!(@ [< $prefix _XXH3 >]); } };
+
+    (@ $prefix: ident) => {
+        ::paste::paste! {
+            extern "C" {
+                fn [<$prefix _64bits>](input: *const libc::c_void, length: libc::size_t) -> crate::XXH64_hash_t;
+                fn [<$prefix _64bits_withSeed>](
+                    input: *const libc::c_void,
+                    length: libc::size_t,
+                    seed: crate::XXH64_hash_t,
+                ) -> crate::XXH64_hash_t;
+                fn [<$prefix _64bits_withSecret>](
+                    input: *const libc::c_void,
+                    length: libc::size_t,
+                    secret: *const libc::c_void,
+                    secret_length: libc::size_t,
+                ) -> crate::XXH64_hash_t;
+
+                fn [<$prefix _createState>]() -> *mut crate::XXH3_state_t;
+                fn [<$prefix _64bits_reset>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode;
+                fn [<$prefix _64bits_reset_withSeed>](
+                    state: *mut crate::XXH3_state_t,
+                    seed: crate::XXH64_hash_t,
+                ) -> crate::XXH_errorcode;
+                fn [<$prefix _64bits_update>](
+                    state: *mut crate::XXH3_state_t,
+                    buffer: *const libc::c_void,
+                    length: libc::size_t,
+                ) -> crate::XXH_errorcode;
+                fn [<$prefix _64bits_digest>](state: *mut crate::XXH3_state_t) -> crate::XXH64_hash_t;
+                fn [<$prefix _freeState>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode;
+            }
 
-    pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-        unsafe { XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
-    }
+            pub struct XxHash3_64(*mut crate::XXH3_state_t);
+
+            impl XxHash3_64 {
+                pub fn oneshot(data: &[u8]) -> u64 {
+                    unsafe { [<$prefix _64bits>](data.as_ptr().cast(), data.len()) }
+                }
+
+                pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
+                    unsafe { [<$prefix _64bits_withSeed>](data.as_ptr().cast(), data.len(), seed) }
+                }
+
+                pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
+                    unsafe {
+                        [<$prefix _64bits_withSecret>](
+                            data.as_ptr().cast(),
+                            data.len(),
+                            secret.as_ptr().cast(),
+                            secret.len(),
+                        )
+                    }
+                }
+
+                pub fn new() -> Self {
+                    let state = unsafe {
+                        let state = [<$prefix _createState>]();
+                        [<$prefix _64bits_reset>](state);
+                        state
+                    };
+
+                    Self(state)
+                }
+
+                pub fn with_seed(seed: u64) -> Self {
+                    let state = unsafe {
+                        let state = [<$prefix _createState>]();
+                        [<$prefix _64bits_reset_withSeed>](state, seed);
+                        state
+                    };
+
+                    Self(state)
+                }
+
+                pub fn write(&mut self, data: &[u8]) {
+                    let retval =
+                    unsafe { [<$prefix _64bits_update>](self.0, data.as_ptr().cast(), data.len()) };
+                    assert_eq!(retval, crate::XXH_OK);
+                }
+
+                pub fn finish(&mut self) -> u64 {
+                    unsafe { [<$prefix _64bits_digest>](self.0) }
+                }
+            }
 
-    pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-        unsafe {
-            XXH3_64bits_withSecret(
-                data.as_ptr().cast(),
-                data.len(),
-                secret.as_ptr().cast(),
-                secret.len(),
-            )
+            impl Drop for XxHash3_64 {
+                fn drop(&mut self) {
+                    let retval = unsafe { [<$prefix _freeState>](self.0) };
+                    assert_eq!(retval, crate::XXH_OK);
+                }
+            }
         }
-    }
-
-    pub fn new() -> Self {
-        let state = unsafe {
-            let state = XXH3_createState();
-            XXH3_64bits_reset(state);
-            state
-        };
-
-        Self(state)
-    }
-
-    pub fn with_seed(seed: u64) -> Self {
-        let state = unsafe {
-            let state = XXH3_createState();
-            XXH3_64bits_reset_withSeed(state, seed);
-            state
-        };
-
-        Self(state)
-    }
-
-    pub fn write(&mut self, data: &[u8]) {
-        let retval = unsafe { XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-        assert_eq!(retval, XXH_OK);
-    }
-
-    pub fn finish(&mut self) -> u64 {
-        unsafe { XXH3_64bits_digest(self.0) }
-    }
+    };
 }
+pub(crate) use xxh3_64b_template;
 
-impl Drop for XxHash3_64 {
-    fn drop(&mut self) {
-        let retval = unsafe { XXH3_freeState(self.0) };
-        assert_eq!(retval, XXH_OK);
-    }
-}
-
-// ----------
+xxh3_64b_template!();
 
 pub mod scalar {
-    use super::*;
-
-    extern "C" {
-        fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-        fn scalar_XXH3_64bits_withSeed(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            seed: XXH64_hash_t,
-        ) -> XXH64_hash_t;
-        fn scalar_XXH3_64bits_withSecret(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            secret: *const libc::c_void,
-            secret_length: libc::size_t,
-        ) -> XXH64_hash_t;
-
-        fn scalar_XXH3_createState() -> *mut XXH3_state_t;
-        fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-        fn scalar_XXH3_64bits_reset_withSeed(
-            state: *mut XXH3_state_t,
-            seed: XXH64_hash_t,
-        ) -> XXH_errorcode;
-        fn scalar_XXH3_64bits_update(
-            state: *mut XXH3_state_t,
-            buffer: *const libc::c_void,
-            length: libc::size_t,
-        ) -> XXH_errorcode;
-        fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-        fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-    }
-
-    pub struct XxHash3_64(*mut XXH3_state_t);
-
-    impl XxHash3_64 {
-        pub fn oneshot(data: &[u8]) -> u64 {
-            unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) }
-        }
-
-        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-            unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
-        }
-
-        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-            unsafe {
-                scalar_XXH3_64bits_withSecret(
-                    data.as_ptr().cast(),
-                    data.len(),
-                    secret.as_ptr().cast(),
-                    secret.len(),
-                )
-            }
-        }
-
-        pub fn new() -> Self {
-            let state = unsafe {
-                let state = scalar_XXH3_createState();
-                scalar_XXH3_64bits_reset(state);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn with_seed(seed: u64) -> Self {
-            let state = unsafe {
-                let state = scalar_XXH3_createState();
-                scalar_XXH3_64bits_reset_withSeed(state, seed);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn write(&mut self, data: &[u8]) {
-            let retval =
-                unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-            assert_eq!(retval, XXH_OK);
-        }
-
-        pub fn finish(&mut self) -> u64 {
-            unsafe { scalar_XXH3_64bits_digest(self.0) }
-        }
-    }
-
-    impl Drop for XxHash3_64 {
-        fn drop(&mut self) {
-            let retval = unsafe { scalar_XXH3_freeState(self.0) };
-            assert_eq!(retval, XXH_OK);
-        }
-    }
+    crate::xxh3_64b_template!(scalar);
 }
 
-// ----------
-
 #[cfg(target_arch = "aarch64")]
 pub mod neon {
-    use super::*;
-
-    extern "C" {
-        fn neon_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-        fn neon_XXH3_64bits_withSeed(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            seed: XXH64_hash_t,
-        ) -> XXH64_hash_t;
-        fn neon_XXH3_64bits_withSecret(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            secret: *const libc::c_void,
-            secret_length: libc::size_t,
-        ) -> XXH64_hash_t;
-
-        fn neon_XXH3_createState() -> *mut XXH3_state_t;
-        fn neon_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-        fn neon_XXH3_64bits_reset_withSeed(
-            state: *mut XXH3_state_t,
-            seed: XXH64_hash_t,
-        ) -> XXH_errorcode;
-        fn neon_XXH3_64bits_update(
-            state: *mut XXH3_state_t,
-            buffer: *const libc::c_void,
-            length: libc::size_t,
-        ) -> XXH_errorcode;
-        fn neon_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-        fn neon_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-    }
-
-    pub struct XxHash3_64(*mut XXH3_state_t);
-
-    impl XxHash3_64 {
-        pub fn oneshot(data: &[u8]) -> u64 {
-            unsafe { neon_XXH3_64bits(data.as_ptr().cast(), data.len()) }
-        }
-
-        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-            unsafe { neon_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
-        }
-
-        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-            unsafe {
-                neon_XXH3_64bits_withSecret(
-                    data.as_ptr().cast(),
-                    data.len(),
-                    secret.as_ptr().cast(),
-                    secret.len(),
-                )
-            }
-        }
-
-        pub fn new() -> Self {
-            let state = unsafe {
-                let state = neon_XXH3_createState();
-                neon_XXH3_64bits_reset(state);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn with_seed(seed: u64) -> Self {
-            let state = unsafe {
-                let state = neon_XXH3_createState();
-                neon_XXH3_64bits_reset_withSeed(state, seed);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn write(&mut self, data: &[u8]) {
-            let retval =
-                unsafe { neon_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-            assert_eq!(retval, XXH_OK);
-        }
-
-        pub fn finish(&mut self) -> u64 {
-            unsafe { neon_XXH3_64bits_digest(self.0) }
-        }
-    }
-
-    impl Drop for XxHash3_64 {
-        fn drop(&mut self) {
-            let retval = unsafe { neon_XXH3_freeState(self.0) };
-            assert_eq!(retval, XXH_OK);
-        }
-    }
+    crate::xxh3_64b_template!(neon);
 }
 
-// ----------
-
 #[cfg(target_arch = "x86_64")]
 pub mod avx2 {
-    use super::*;
-
-    extern "C" {
-        fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-        fn avx2_XXH3_64bits_withSeed(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            seed: XXH64_hash_t,
-        ) -> XXH64_hash_t;
-        fn avx2_XXH3_64bits_withSecret(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            secret: *const libc::c_void,
-            secret_length: libc::size_t,
-        ) -> XXH64_hash_t;
-
-        fn avx2_XXH3_createState() -> *mut XXH3_state_t;
-        fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-        fn avx2_XXH3_64bits_reset_withSeed(
-            state: *mut XXH3_state_t,
-            seed: XXH64_hash_t,
-        ) -> XXH_errorcode;
-        fn avx2_XXH3_64bits_update(
-            state: *mut XXH3_state_t,
-            buffer: *const libc::c_void,
-            length: libc::size_t,
-        ) -> XXH_errorcode;
-        fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-        fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-    }
-
-    pub struct XxHash3_64(*mut XXH3_state_t);
-
-    impl XxHash3_64 {
-        pub fn oneshot(data: &[u8]) -> u64 {
-            unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) }
-        }
-
-        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-            unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
-        }
-
-        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-            unsafe {
-                avx2_XXH3_64bits_withSecret(
-                    data.as_ptr().cast(),
-                    data.len(),
-                    secret.as_ptr().cast(),
-                    secret.len(),
-                )
-            }
-        }
-
-        pub fn new() -> Self {
-            let state = unsafe {
-                let state = avx2_XXH3_createState();
-                avx2_XXH3_64bits_reset(state);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn with_seed(seed: u64) -> Self {
-            let state = unsafe {
-                let state = avx2_XXH3_createState();
-                avx2_XXH3_64bits_reset_withSeed(state, seed);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn write(&mut self, data: &[u8]) {
-            let retval =
-                unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-            assert_eq!(retval, XXH_OK);
-        }
-
-        pub fn finish(&mut self) -> u64 {
-            unsafe { avx2_XXH3_64bits_digest(self.0) }
-        }
-    }
-
-    impl Drop for XxHash3_64 {
-        fn drop(&mut self) {
-            let retval = unsafe { avx2_XXH3_freeState(self.0) };
-            assert_eq!(retval, XXH_OK);
-        }
-    }
+    crate::xxh3_64b_template!(avx2);
 }
 
 #[cfg(target_arch = "x86_64")]
 pub mod sse2 {
-    use super::*;
-
-    extern "C" {
-        fn sse2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t;
-        fn sse2_XXH3_64bits_withSeed(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            seed: XXH64_hash_t,
-        ) -> XXH64_hash_t;
-        fn sse2_XXH3_64bits_withSecret(
-            input: *const libc::c_void,
-            length: libc::size_t,
-            secret: *const libc::c_void,
-            secret_length: libc::size_t,
-        ) -> XXH64_hash_t;
-
-        fn sse2_XXH3_createState() -> *mut XXH3_state_t;
-        fn sse2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode;
-        fn sse2_XXH3_64bits_reset_withSeed(
-            state: *mut XXH3_state_t,
-            seed: XXH64_hash_t,
-        ) -> XXH_errorcode;
-        fn sse2_XXH3_64bits_update(
-            state: *mut XXH3_state_t,
-            buffer: *const libc::c_void,
-            length: libc::size_t,
-        ) -> XXH_errorcode;
-        fn sse2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t;
-        fn sse2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode;
-    }
-
-    pub struct XxHash3_64(*mut XXH3_state_t);
-
-    impl XxHash3_64 {
-        pub fn oneshot(data: &[u8]) -> u64 {
-            unsafe { sse2_XXH3_64bits(data.as_ptr().cast(), data.len()) }
-        }
-
-        pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
-            unsafe { sse2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) }
-        }
-
-        pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
-            unsafe {
-                sse2_XXH3_64bits_withSecret(
-                    data.as_ptr().cast(),
-                    data.len(),
-                    secret.as_ptr().cast(),
-                    secret.len(),
-                )
-            }
-        }
-
-        pub fn new() -> Self {
-            let state = unsafe {
-                let state = sse2_XXH3_createState();
-                sse2_XXH3_64bits_reset(state);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn with_seed(seed: u64) -> Self {
-            let state = unsafe {
-                let state = sse2_XXH3_createState();
-                sse2_XXH3_64bits_reset_withSeed(state, seed);
-                state
-            };
-
-            Self(state)
-        }
-
-        pub fn write(&mut self, data: &[u8]) {
-            let retval =
-                unsafe { sse2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) };
-            assert_eq!(retval, XXH_OK);
-        }
-
-        pub fn finish(&mut self) -> u64 {
-            unsafe { sse2_XXH3_64bits_digest(self.0) }
-        }
-    }
-
-    impl Drop for XxHash3_64 {
-        fn drop(&mut self) {
-            let retval = unsafe { sse2_XXH3_freeState(self.0) };
-            assert_eq!(retval, XXH_OK);
-        }
-    }
+    crate::xxh3_64b_template!(sse2);
 }

From 694b9460c6d222a7dc09f629a353c8442344ffcf Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 13 Aug 2024 20:15:04 -0400
Subject: [PATCH 103/166] Use forcing cfgs for streaming functions too

---
 src/xxhash3_64.rs | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index d47c62ca5..0718a2b7f 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -614,25 +614,6 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
-    #[cfg(_internal_xxhash3_force_scalar)]
-    return scalar::oneshot(secret, input);
-
-    #[cfg(_internal_xxhash3_force_neon)]
-    unsafe {
-        return neon::oneshot_unchecked(secret, input);
-    };
-
-    #[cfg(_internal_xxhash3_force_sse2)]
-    unsafe {
-        return sse2::oneshot_unchecked(secret, input);
-    };
-
-    #[cfg(_internal_xxhash3_force_avx2)]
-    unsafe {
-        return avx2::oneshot_unchecked(secret, input);
-    };
-
-    #[allow(unreachable_code)]
     detect::oneshot(secret, input)
 }
 
@@ -1101,12 +1082,14 @@ mod neon {
 mod aarch64_detect {
     macro_rules! pick {
         ($f:ident, $s:ident, $($t:tt)+) => {
+            #[cfg(_internal_xxhash3_force_neon)]
+            return unsafe { super::neon::$f $($t)+ };
+
             if std::arch::is_aarch64_feature_detected!("neon") {
                 return unsafe { super::neon::$f $($t)+ };
             }
 
             super::scalar::$s $($t)+
-
         };
     }
 
@@ -1342,6 +1325,12 @@ mod sse2 {
 mod x86_64_detect {
     macro_rules! pick {
         ($f:ident, $s:ident, $($t:tt)+) => {
+            #[cfg(_internal_xxhash3_force_avx2)]
+            return unsafe { super::avx2::$f $($t)+ };
+
+            #[cfg(_internal_xxhash3_force_sse2)]
+            return unsafe { super::sse2::$f $($t)+ };
+
             if std::arch::is_x86_feature_detected!("avx2") {
                 return unsafe { super::avx2::$f $($t)+ };
             }
@@ -1383,6 +1372,12 @@ mod x86_64_detect {
 mod detect {
     macro_rules! pick {
         ($e:expr) => {
+            #[cfg(_internal_xxhash3_force_scalar)]
+            {
+                use super::scalar::*;
+                return $e;
+            }
+
             #[cfg(all(target_arch = "aarch64", feature = "std"))]
             {
                 use super::aarch64_detect::*;
@@ -1395,9 +1390,11 @@ mod detect {
                 return $e;
             }
 
-            use super::scalar::*;
             #[allow(unreachable_code)]
-            $e
+            {
+                use super::scalar::*;
+                $e
+            }
         };
     }
 

From 9ef64a717634c8922905dd7371580575ce837d4e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 13 Aug 2024 21:10:42 -0400
Subject: [PATCH 104/166] Lift computing the secret end up a function call

---
 src/xxhash3_64.rs | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 0718a2b7f..e8335f45b 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -667,8 +667,10 @@ impl<V: Vector> Algorithm<V> {
 
     #[inline]
     fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
+        let secret_end = secret.last_chunk().unwrap();
+
         self.round_accumulate(acc, stripes, secret);
-        self.0.round_scramble(acc, secret);
+        self.0.round_scramble(acc, secret_end);
     }
 
     #[inline]
@@ -766,7 +768,7 @@ unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExac
 }
 
 trait Vector {
-    fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]);
+    fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]);
 
     fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]);
 }
@@ -774,8 +776,6 @@ trait Vector {
 // This module is not `cfg`-gated because it is used by some of the
 // SIMD implementations.
 mod scalar {
-    use core::mem;
-
     #[inline]
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         super::Algorithm(Impl).oneshot(secret, input)
@@ -807,11 +807,8 @@ mod scalar {
 
     impl Vector for Impl {
         #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
-            let last = secret
-                .last_chunk::<{ mem::size_of::<[u8; 64]>() }>()
-                .unwrap();
-            let (last, _) = last.bp_as_chunks();
+        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+            let (last, _) = secret_end.bp_as_chunks();
             let last = last.iter().copied().map(u64::from_ne_bytes);
 
             for (acc, secret) in acc.iter_mut().zip(last) {
@@ -923,8 +920,8 @@ mod neon {
 
     impl Vector for Impl {
         #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
-            unsafe { round_scramble_neon(acc, secret) }
+        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+            unsafe { round_scramble_neon(acc, secret_end) }
         }
 
         #[inline]
@@ -935,9 +932,9 @@ mod neon {
 
     #[inline]
     #[target_feature(enable = "neon")]
-    unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret: &[u8]) {
+    unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
         unsafe {
-            let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::<u64>();
+            let secret_base = secret_end.as_ptr().cast::<u64>();
             let (acc, _) = acc.bp_as_chunks_mut::<2>();
             for (i, acc) in acc.iter_mut().enumerate() {
                 let mut accv = vld1q_u64(acc.as_ptr());
@@ -1172,9 +1169,9 @@ mod avx2 {
 
     impl Vector for Impl {
         #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
+        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
             // The scalar implementation is autovectorized nicely enough
-            self.0.round_scramble(acc, secret)
+            self.0.round_scramble(acc, secret_end)
         }
 
         #[inline]
@@ -1273,9 +1270,9 @@ mod sse2 {
 
     impl Vector for Impl {
         #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) {
+        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
             // The scalar implementation is autovectorized nicely enough
-            self.0.round_scramble(acc, secret)
+            self.0.round_scramble(acc, secret_end)
         }
 
         #[inline]

From 6e2d4408e70e872fcccdf712e7c8b9964e73ed38 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 14 Aug 2024 11:56:11 -0400
Subject: [PATCH 105/166] checkpoint rewrite smaller buffer

---
 src/xxhash3_64.rs | 337 +++++++++++++++++++++++++++++-----------------
 1 file changed, 215 insertions(+), 122 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index e8335f45b..6756b3078 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -32,8 +32,6 @@ const DEFAULT_SECRET: [u8; 192] = [
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 ];
 
-const DEFAULT_BUFFER_LEN: usize = 1024;
-
 pub const SECRET_MINIMUM_LENGTH: usize = 136;
 
 pub struct XxHash3_64 {
@@ -68,27 +66,33 @@ impl XxHash3_64 {
     }
 }
 
+const STRIPE_BYTES: usize = 64;
+const BUFFERED_STRIPES: usize = 4;
+const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES;
+
+// Ensure that a full buffer always implies we are in the 241+ byte case.
+const _: () = assert!(BUFFERED_BYTES > 240);
+
 /// Holds secret and temporary buffers that are ensured to be
 /// appropriately sized.
-pub struct SecretBuffer<S, B> {
+pub struct SecretBuffer<S> {
     seed: u64,
     secret: S,
-    buffer: B,
+    buffer: [u8; BUFFERED_BYTES],
 }
 
-impl<S, B> SecretBuffer<S, B>
+impl<S> SecretBuffer<S>
 where
     S: AsRef<[u8]>,
-    B: AsRef<[u8]> + AsMut<[u8]>,
 {
     /// Takes the seed, secret, and buffer and performs no
     /// modifications to them, only validating that the sizes are
     /// appropriate.
-    pub fn new(seed: u64, secret: S, buffer: B) -> Result<Self, (S, B)> {
+    pub fn new(seed: u64, secret: S) -> Result<Self, S> {
         let this = Self {
             seed,
             secret,
-            buffer,
+            buffer: [0; BUFFERED_BYTES],
         };
 
         if this.is_valid() {
@@ -101,21 +105,24 @@ where
     fn is_valid(&self) -> bool {
         let secret = self.secret.as_ref();
 
-        assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: return result
+        secret.len() >= SECRET_MINIMUM_LENGTH
+    }
 
-        let required_buffer_len = block_size(secret);
-        let buffer_len = self.buffer.as_ref().len();
+    #[inline]
+    fn n_stripes(&self) -> usize {
+        let secret = self.secret.as_ref();
 
-        required_buffer_len == buffer_len
+        // stripes_per_block
+        (secret.len() - 64) / 8
     }
 
     /// Returns the secret and buffer values.
-    pub fn decompose(self) -> (S, B) {
-        (self.secret, self.buffer)
+    pub fn decompose(self) -> S {
+        self.secret
     }
 }
 
-impl SecretBuffer<&'static [u8; 192], [u8; 1024]> {
+impl SecretBuffer<&'static [u8; 192]> {
     /// Use the default seed and secret values while allocating nothing.
     ///
     /// Note that this type may take up a surprising amount of stack space.
@@ -124,7 +131,7 @@ impl SecretBuffer<&'static [u8; 192], [u8; 1024]> {
         SecretBuffer {
             seed: DEFAULT_SEED,
             secret: &DEFAULT_SECRET,
-            buffer: [0; DEFAULT_BUFFER_LEN],
+            buffer: [0; BUFFERED_BYTES],
         }
     }
 }
@@ -132,7 +139,7 @@ impl SecretBuffer<&'static [u8; 192], [u8; 1024]> {
 #[cfg(feature = "alloc")]
 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
 mod with_alloc {
-    use ::alloc::{boxed::Box, vec};
+    use ::alloc::boxed::Box;
 
     use super::*;
 
@@ -177,7 +184,7 @@ mod with_alloc {
         }
     }
 
-    type AllocSecretBuffer = SecretBuffer<Box<[u8]>, Box<[u8]>>;
+    type AllocSecretBuffer = SecretBuffer<Box<[u8]>>;
 
     impl AllocSecretBuffer {
         /// Allocates the secret and temporary buffers and fills them
@@ -186,7 +193,7 @@ mod with_alloc {
             Self {
                 seed: DEFAULT_SEED,
                 secret: DEFAULT_SECRET.to_vec().into(),
-                buffer: vec![0; DEFAULT_BUFFER_LEN].into(),
+                buffer: [0; BUFFERED_BYTES],
             }
         }
 
@@ -199,7 +206,7 @@ mod with_alloc {
             Self {
                 seed,
                 secret: secret.to_vec().into(),
-                buffer: vec![0; DEFAULT_BUFFER_LEN].into(),
+                buffer: [0; BUFFERED_BYTES],
             }
         }
 
@@ -208,17 +215,16 @@ mod with_alloc {
         pub fn allocate_with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
             let secret = secret.into();
             assert!(secret.len() > SECRET_MINIMUM_LENGTH); // todo result
-            let block_size = block_size(&secret);
 
             Self {
                 seed,
                 secret,
-                buffer: vec![0; block_size].into(),
+                buffer: [0; BUFFERED_BYTES],
             }
         }
     }
 
-    pub type AllocRawHasher = RawHasher<Box<[u8]>, Box<[u8]>>;
+    pub type AllocRawHasher = RawHasher<Box<[u8]>>;
 
     impl AllocRawHasher {
         fn allocate_default() -> Self {
@@ -235,56 +241,92 @@ mod with_alloc {
     }
 }
 
-impl<S, B> SecretBuffer<S, B>
+impl<S> SecretBuffer<S>
 where
     S: AsRef<[u8]> + AsMut<[u8]>,
-    B: AsRef<[u8]> + AsMut<[u8]>,
 {
     /// Fills the secret buffer with a secret derived from the seed
     /// and the default secret.
-    pub fn with_seed(seed: u64, mut secret: S, buffer: B) -> Result<Self, (S, B)> {
+    pub fn with_seed(seed: u64, mut secret: S) -> Result<Self, S> {
         let secret_slice: &mut [u8; 192] = match secret.as_mut().try_into() {
             Ok(s) => s,
-            Err(_) => return Err((secret, buffer)),
+            Err(_) => return Err(secret),
         };
 
         *secret_slice = DEFAULT_SECRET;
         derive_secret(seed, secret_slice);
 
-        Self::new(seed, secret, buffer)
+        Self::new(seed, secret)
+    }
+}
+
+#[derive(Copy, Clone)]
+struct Grug {
+    // TODO FIXME
+    accumulator: [u64; 8],
+    current_stripe: usize,
+}
+
+impl Grug {
+    fn new() -> Self {
+        Self {
+            accumulator: INITIAL_ACCUMULATORS,
+            current_stripe: 0,
+        }
+    }
+
+    fn process_stripe(&mut self, stripe: &[u8; 64], n_stripes: usize, secret: &[u8]) {
+        let Self {
+            accumulator,
+            current_stripe,
+            ..
+        } = self;
+
+        let secret_end = secret.last_chunk().unwrap();
+
+        // each stripe
+        let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() };
+        detect::accumulate(accumulator, stripe, secret);
+
+        *current_stripe += 1;
+
+        if *current_stripe == n_stripes {
+            // after block's worth
+            detect::round_scramble(accumulator, secret_end);
+            *current_stripe = 0;
+        }
     }
 }
 
 /// A lower-level interface for computing a hash from streaming data.
 ///
-/// The algorithm requires two reasonably large pieces of data: the
-/// secret and a temporary buffer. [`XxHash3_64`][] makes one concrete
-/// implementation decision that uses dynamic memory allocation, but
-/// specialized usages may desire more flexibility. This type,
-/// combined with [`SecretBuffer`][], offer that flexibility at the
-/// cost of a generic type.
-pub struct RawHasher<S, B> {
-    secret_buffer: SecretBuffer<S, B>,
+/// The algorithm requires a secret which can be a reasonably large
+/// piece of data. [`XxHash3_64`][] makes one concrete implementation
+/// decision that uses dynamic memory allocation, but specialized
+/// usages may desire more flexibility. This type, combined with
+/// [`SecretBuffer`][], offer that flexibility at the cost of a
+/// generic type.
+pub struct RawHasher<S> {
+    secret_buffer: SecretBuffer<S>,
     buffer_len: usize,
-    accumulator: [u64; 8],
+    grug: Grug,
     total_bytes: usize,
 }
 
-impl<S, B> RawHasher<S, B> {
-    pub fn new(secret_buffer: SecretBuffer<S, B>) -> Self {
+impl<S> RawHasher<S> {
+    pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
         Self {
             secret_buffer,
             buffer_len: 0,
-            accumulator: INITIAL_ACCUMULATORS,
+            grug: Grug::new(),
             total_bytes: 0,
         }
     }
 }
 
-impl<S, B> hash::Hasher for RawHasher<S, B>
+impl<S> hash::Hasher for RawHasher<S>
 where
     S: AsRef<[u8]>,
-    B: AsRef<[u8]> + AsMut<[u8]>,
 {
     #[inline(never)]
     fn write(&mut self, mut input: &[u8]) {
@@ -295,27 +337,20 @@ where
         let Self {
             secret_buffer,
             buffer_len,
-            accumulator,
+            grug,
             total_bytes,
+            ..
         } = self;
-        let SecretBuffer {
-            seed: _,
-            secret,
-            buffer,
-        } = secret_buffer;
+
+        let n_stripes = secret_buffer.n_stripes();
+
+        let SecretBuffer { secret, buffer, .. } = secret_buffer;
         let secret = secret.as_ref();
-        let buffer = buffer.as_mut();
-        let input_len = input.len();
-
-        // Short-circuit if the buffer is empty and we have one or
-        // more full buffers-worth on the input.
-        if buffer.is_empty() {
-            let (blocks, remainder) = unsafe { chunks_and_last(input, buffer.len()) };
-            detect::rounds(accumulator, blocks, secret);
-            input = remainder;
-        }
 
-        while !input.is_empty() {
+        *total_bytes += input.len();
+
+        // We have some previous data saved; try to fill it up and process it first
+        if !buffer.is_empty() {
             let remaining = &mut buffer[*buffer_len..];
             let n_to_copy = usize::min(remaining.len(), input.len());
 
@@ -325,29 +360,50 @@ where
             remaining_head.copy_from_slice(input_head);
             *buffer_len += n_to_copy;
 
-            // We have not filled the whole buffer, no need to
-            // process it now
+            input = input_tail;
+
+            // We did not fill up the buffer
             if !remaining_tail.is_empty() {
-                break;
+                return;
             }
 
-            // We filled the buffer, but we don't know we have
-            // more data so we have to leave it in case it is the
-            // last full block.
-            if input_tail.is_empty() {
-                break;
+            // We don't know this isn't the last of the data
+            if input.is_empty() {
+                return;
             }
 
-            // We have a full buffer *and* we know there's more
-            // data after the buffer, so we can process this as a
-            // full block.
-            detect::rounds(accumulator, [&*buffer], secret);
+            let (stripes, _) = buffer.bp_as_chunks();
+            for stripe in stripes {
+                grug.process_stripe(stripe, n_stripes, secret);
+            }
             *buffer_len = 0;
+        }
 
-            input = input_tail;
+        debug_assert!(*buffer_len == 0);
+
+        // Process as much of the input data in-place as possible,
+        // while leaving at least one full stripe for the
+        // finalization.
+        if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) {
+            let nn = dd / STRIPE_BYTES;
+            let nn = nn * STRIPE_BYTES;
+            let (aa, remainder) = input.split_at(nn);
+            let (stripes, _) = aa.bp_as_chunks();
+
+            for stripe in stripes {
+                grug.process_stripe(stripe, n_stripes, secret)
+            }
+            input = remainder;
         }
 
-        *total_bytes += input_len;
+        // Any remaining data has to be less than the buffer, and the
+        // buffer is empty so just fill up the buffer.
+        debug_assert!(*buffer_len == 0);
+        debug_assert!(!input.is_empty());
+        debug_assert!(input.len() < buffer.len());
+
+        buffer[..input.len()].copy_from_slice(input);
+        *buffer_len = input.len();
     }
 
     #[inline(never)]
@@ -355,15 +411,15 @@ where
         let Self {
             ref secret_buffer,
             buffer_len,
-            accumulator,
+            mut grug,
             total_bytes,
         } = *self;
+        let n_stripes = secret_buffer.n_stripes();
         let SecretBuffer {
             seed,
             ref secret,
             ref buffer,
         } = *secret_buffer;
-
         let secret = secret.as_ref();
         let buffer = buffer.as_ref();
 
@@ -371,6 +427,12 @@ where
 
         match total_bytes {
             241.. => {
+                // Ingest final stripes
+                let (stripes, remainder) = fun_name(input);
+                for stripe in stripes {
+                    grug.process_stripe(stripe, n_stripes, secret);
+                }
+
                 let mut temp = [0; 64];
 
                 let last_stripe = match input.last_chunk() {
@@ -387,7 +449,13 @@ where
                     }
                 };
 
-                detect::finalize(accumulator, input, last_stripe, secret, total_bytes)
+                detect::finalize(
+                    grug.accumulator,
+                    remainder,
+                    last_stripe,
+                    secret,
+                    total_bytes,
+                )
             }
 
             129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input),
@@ -675,6 +743,7 @@ impl<V: Vector> Algorithm<V> {
 
     #[inline]
     fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
+        // TODO: [unify]
         let secrets =
             (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
@@ -692,6 +761,7 @@ impl<V: Vector> Algorithm<V> {
         secret: &[u8],
         len: usize,
     ) -> u64 {
+        debug_assert!(!last_block.is_empty());
         self.last_round(&mut acc, last_block, last_stripe, secret);
 
         self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret, 11)
@@ -701,10 +771,9 @@ impl<V: Vector> Algorithm<V> {
     fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
         // Accumulation steps are run for the stripes in the last block,
         // except for the last stripe (whether it is full or not)
-        let stripes = match block.bp_as_chunks() {
-            ([stripes @ .., _last], []) => stripes,
-            (stripes, _last) => stripes,
-        };
+        let (stripes, _) = fun_name(block);
+
+        // TODO: [unify]
         let secrets =
             (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
 
@@ -746,6 +815,14 @@ impl<V: Vector> Algorithm<V> {
     }
 }
 
+#[inline]
+fn fun_name(block: &[u8]) -> (&[[u8; 64]], &[u8]) {
+    match block.bp_as_chunks() {
+        ([stripes @ .., last], []) => (stripes, last),
+        (stripes, last) => (stripes, last),
+    }
+}
+
 /// # Safety
 /// `input` must be non-empty.
 #[inline]
@@ -782,12 +859,13 @@ mod scalar {
     }
 
     #[inline]
-    pub fn rounds<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        super::Algorithm(Impl).rounds(acc, blocks, secret)
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        Impl.accumulate(acc, stripe, secret)
+    }
+
+    #[inline]
+    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        Impl.round_scramble(acc, secret_end);
     }
 
     #[inline]
@@ -885,12 +963,16 @@ mod neon {
     /// You must ensure that the CPU has the NEON feature
     #[inline]
     #[target_feature(enable = "neon")]
-    pub unsafe fn rounds_unchecked<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret)
+    pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        Impl::new_unchecked().round_scramble(acc, secret_end)
+    }
+
+    /// # Safety
+    /// You must ensure that the CPU has the NEON feature
+    #[inline]
+    #[target_feature(enable = "neon")]
+    pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        Impl::new_unchecked().accumulate(acc, stripe, secret)
     }
 
     /// # Safety
@@ -1096,12 +1178,13 @@ mod aarch64_detect {
     }
 
     #[inline]
-    pub fn rounds<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        pick! { rounds_unchecked, rounds, (acc, blocks, secret) }
+    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) }
+    }
+
+    #[inline]
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) }
     }
 
     #[inline]
@@ -1134,12 +1217,16 @@ mod avx2 {
     /// You must ensure that the CPU has the AVX2 feature
     #[inline]
     #[target_feature(enable = "avx2")]
-    pub unsafe fn rounds_unchecked<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret)
+    pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        Impl::new_unchecked().round_scramble(acc, secret_end)
+    }
+
+    /// # Safety
+    /// You must ensure that the CPU has the AVX2 feature
+    #[inline]
+    #[target_feature(enable = "avx2")]
+    pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        Impl::new_unchecked().accumulate(acc, stripe, secret)
     }
 
     /// # Safety
@@ -1235,12 +1322,16 @@ mod sse2 {
     /// You must ensure that the CPU has the SSE2 feature
     #[inline]
     #[target_feature(enable = "sse2")]
-    pub unsafe fn rounds_unchecked<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret)
+    pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        Impl::new_unchecked().round_scramble(acc, secret_end)
+    }
+
+    /// # Safety
+    /// You must ensure that the CPU has the SSE2 feature
+    #[inline]
+    #[target_feature(enable = "sse2")]
+    pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        Impl::new_unchecked().accumulate(acc, stripe, secret)
     }
 
     /// # Safety
@@ -1346,12 +1437,13 @@ mod x86_64_detect {
     }
 
     #[inline]
-    pub fn rounds<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        pick! { rounds_unchecked, rounds, (acc, blocks, secret) }
+    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) }
+    }
+
+    #[inline]
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) }
     }
 
     #[inline]
@@ -1401,12 +1493,13 @@ mod detect {
     }
 
     #[inline]
-    pub fn rounds<'a>(
-        acc: &mut [u64; 8],
-        blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
-    ) {
-        pick! { rounds(acc, blocks, secret) }
+    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        pick! { accumulate(acc, stripe, secret) }
+    }
+
+    #[inline]
+    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        pick! { round_scramble(acc, secret_end) }
     }
 
     #[inline]

From 0f1980ff3495ca9d4ecbfbf662ce1566f71683ba Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 14 Aug 2024 15:57:30 -0400
Subject: [PATCH 106/166] speeeeds

---
 src/xxhash3_64.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 6756b3078..1d190b36b 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -275,6 +275,8 @@ impl Grug {
         }
     }
 
+    // TODO: NEXT: inline this? pass in secret_end?
+    #[inline]
     fn process_stripe(&mut self, stripe: &[u8; 64], n_stripes: usize, secret: &[u8]) {
         let Self {
             accumulator,
@@ -282,7 +284,7 @@ impl Grug {
             ..
         } = self;
 
-        let secret_end = secret.last_chunk().unwrap();
+        let secret_end = unsafe { secret.last_chunk().unwrap_unchecked() };
 
         // each stripe
         let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() };

From 7c3a8ed6ccc49b059b518f6dd79329212a0389a3 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 15 Aug 2024 20:09:20 -0400
Subject: [PATCH 107/166] this is actually neon oops

---
 src/xxhash3_64.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 1d190b36b..84176a375 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1154,7 +1154,8 @@ mod neon {
 
     // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
     #[inline]
-    fn reordering_barrier(r: uint64x2_t) {
+    #[target_feature(enable = "neon")]
+    unsafe fn reordering_barrier(r: uint64x2_t) {
         unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
     }
 }

From 3a4132681f767b6830cda53d3a18b5b816dec2f4 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 15 Aug 2024 20:21:04 -0400
Subject: [PATCH 108/166] push it

---
 src/xxhash3_64.rs | 541 ++++++++++++++++++++--------------------------
 1 file changed, 239 insertions(+), 302 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 84176a375..7bb2bd4ff 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -277,7 +277,13 @@ impl Grug {
 
     // TODO: NEXT: inline this? pass in secret_end?
     #[inline]
-    fn process_stripe(&mut self, stripe: &[u8; 64], n_stripes: usize, secret: &[u8]) {
+    fn process_stripe<V: Vector>(
+        &mut self,
+        vector: V,
+        stripe: &[u8; 64],
+        n_stripes: usize,
+        secret: &[u8],
+    ) {
         let Self {
             accumulator,
             current_stripe,
@@ -288,13 +294,13 @@ impl Grug {
 
         // each stripe
         let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() };
-        detect::accumulate(accumulator, stripe, secret);
+        vector.accumulate(accumulator, stripe, secret);
 
         *current_stripe += 1;
 
         if *current_stripe == n_stripes {
             // after block's worth
-            detect::round_scramble(accumulator, secret_end);
+            vector.round_scramble(accumulator, secret_end);
             *current_stripe = 0;
         }
     }
@@ -326,152 +332,239 @@ impl<S> RawHasher<S> {
     }
 }
 
+macro_rules! dispatch {
+    (
+        fn $fn_name:ident<$($gen:ident),*>($($arg_name:ident : $arg_ty:ty),*) $(-> $ret_ty:ty)?
+        [$($wheres:tt)*]
+    ) => {
+        #[inline]
+        fn do_scalar<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+        where
+            $($wheres)*
+        {
+            $fn_name(scalar::Impl, $($arg_name),*)
+        }
+
+        #[inline]
+        #[target_feature(enable = "neon")]
+        #[cfg(target_arch = "aarch64")]
+        unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+        where
+            $($wheres)*
+        {
+            $fn_name(neon::Impl::new_unchecked(), $($arg_name),*)
+        }
+
+        #[inline]
+        #[target_feature(enable = "avx2")]
+        #[cfg(target_arch = "x86_64")]
+        unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+        where
+            $($wheres)*
+        {
+            $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*)
+        }
+
+        #[inline]
+        #[target_feature(enable = "sse2")]
+        #[cfg(target_arch = "x86_64")]
+        unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+        where
+            $($wheres)*
+        {
+            $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*)
+        }
+
+        #[cfg(target_arch = "aarch64")]
+        {
+            if std::arch::is_aarch64_feature_detected!("neon") {
+                return unsafe { do_neon($($arg_name),*) };
+            }
+        }
+
+        #[cfg(target_arch = "x86_64")]
+        {
+            if is_x86_feature_detected!("avx2") {
+                return unsafe { do_avx2($($arg_name),*) };
+            } else if is_x86_feature_detected!("sse2") {
+                return unsafe { do_sse2($($arg_name),*) };
+            }
+        }
+
+        do_scalar($($arg_name),*)
+    };
+}
+
 impl<S> hash::Hasher for RawHasher<S>
 where
     S: AsRef<[u8]>,
 {
     #[inline(never)]
-    fn write(&mut self, mut input: &[u8]) {
-        if input.is_empty() {
-            return;
+    fn write(&mut self, input: &[u8]) {
+        let this = self;
+        dispatch! {
+            fn write_impl<S>(this: &mut RawHasher<S>, input: &[u8])
+            [S: AsRef<[u8]>]
+        }
+    }
+
+    #[inline(never)]
+    fn finish(&self) -> u64 {
+        let this = self;
+        dispatch! {
+            fn finish_impl<S>(this: &RawHasher<S>) -> u64
+            [S: AsRef<[u8]>]
         }
+    }
+}
 
-        let Self {
-            secret_buffer,
-            buffer_len,
-            grug,
-            total_bytes,
-            ..
-        } = self;
+#[inline(always)]
+fn write_impl<S>(vector: impl Vector, this: &mut RawHasher<S>, mut input: &[u8])
+where
+    S: AsRef<[u8]>,
+{
+    if input.is_empty() {
+        return;
+    }
 
-        let n_stripes = secret_buffer.n_stripes();
+    let RawHasher {
+        secret_buffer,
+        buffer_len,
+        grug,
+        total_bytes,
+        ..
+    } = this;
 
-        let SecretBuffer { secret, buffer, .. } = secret_buffer;
-        let secret = secret.as_ref();
+    let n_stripes = secret_buffer.n_stripes();
 
-        *total_bytes += input.len();
+    let SecretBuffer { secret, buffer, .. } = secret_buffer;
+    let secret = secret.as_ref();
 
-        // We have some previous data saved; try to fill it up and process it first
-        if !buffer.is_empty() {
-            let remaining = &mut buffer[*buffer_len..];
-            let n_to_copy = usize::min(remaining.len(), input.len());
+    *total_bytes += input.len();
 
-            let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy);
-            let (input_head, input_tail) = input.split_at(n_to_copy);
+    // We have some previous data saved; try to fill it up and process it first
+    if !buffer.is_empty() {
+        let remaining = &mut buffer[*buffer_len..];
+        let n_to_copy = usize::min(remaining.len(), input.len());
 
-            remaining_head.copy_from_slice(input_head);
-            *buffer_len += n_to_copy;
+        let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy);
+        let (input_head, input_tail) = input.split_at(n_to_copy);
 
-            input = input_tail;
+        remaining_head.copy_from_slice(input_head);
+        *buffer_len += n_to_copy;
 
-            // We did not fill up the buffer
-            if !remaining_tail.is_empty() {
-                return;
-            }
+        input = input_tail;
 
-            // We don't know this isn't the last of the data
-            if input.is_empty() {
-                return;
-            }
+        // We did not fill up the buffer
+        if !remaining_tail.is_empty() {
+            return;
+        }
 
-            let (stripes, _) = buffer.bp_as_chunks();
-            for stripe in stripes {
-                grug.process_stripe(stripe, n_stripes, secret);
-            }
-            *buffer_len = 0;
+        // We don't know this isn't the last of the data
+        if input.is_empty() {
+            return;
         }
 
-        debug_assert!(*buffer_len == 0);
+        let (stripes, _) = buffer.bp_as_chunks();
+        for stripe in stripes {
+            grug.process_stripe(vector, stripe, n_stripes, secret);
+        }
+        *buffer_len = 0;
+    }
+
+    debug_assert!(*buffer_len == 0);
 
-        // Process as much of the input data in-place as possible,
-        // while leaving at least one full stripe for the
-        // finalization.
-        if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) {
-            let nn = dd / STRIPE_BYTES;
-            let nn = nn * STRIPE_BYTES;
-            let (aa, remainder) = input.split_at(nn);
-            let (stripes, _) = aa.bp_as_chunks();
+    // Process as much of the input data in-place as possible,
+    // while leaving at least one full stripe for the
+    // finalization.
+    if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) {
+        let nn = dd / STRIPE_BYTES;
+        let nn = nn * STRIPE_BYTES;
+        let (aa, remainder) = input.split_at(nn);
+        let (stripes, _) = aa.bp_as_chunks();
 
+        for stripe in stripes {
+            grug.process_stripe(vector, stripe, n_stripes, secret)
+        }
+        input = remainder;
+    }
+
+    // Any remaining data has to be less than the buffer, and the
+    // buffer is empty so just fill up the buffer.
+    debug_assert!(*buffer_len == 0);
+    debug_assert!(!input.is_empty());
+    debug_assert!(input.len() < buffer.len());
+
+    buffer[..input.len()].copy_from_slice(input);
+    *buffer_len = input.len();
+}
+
+#[inline(always)]
+fn finish_impl<S>(vector: impl Vector, this: &RawHasher<S>) -> u64
+where
+    S: AsRef<[u8]>,
+{
+    let RawHasher {
+        ref secret_buffer,
+        buffer_len,
+        mut grug,
+        total_bytes,
+    } = *this;
+    let n_stripes = secret_buffer.n_stripes();
+    let SecretBuffer {
+        seed,
+        ref secret,
+        ref buffer,
+    } = *secret_buffer;
+    let secret = secret.as_ref();
+    let buffer = buffer.as_ref();
+
+    let input = &buffer[..buffer_len];
+
+    match total_bytes {
+        241.. => {
+            // Ingest final stripes
+            let (stripes, remainder) = fun_name(input);
             for stripe in stripes {
-                grug.process_stripe(stripe, n_stripes, secret)
+                grug.process_stripe(vector, stripe, n_stripes, secret);
             }
-            input = remainder;
-        }
 
-        // Any remaining data has to be less than the buffer, and the
-        // buffer is empty so just fill up the buffer.
-        debug_assert!(*buffer_len == 0);
-        debug_assert!(!input.is_empty());
-        debug_assert!(input.len() < buffer.len());
+            let mut temp = [0; 64];
 
-        buffer[..input.len()].copy_from_slice(input);
-        *buffer_len = input.len();
-    }
+            let last_stripe = match input.last_chunk() {
+                Some(chunk) => chunk,
+                None => {
+                    let n_to_reuse = 64 - input.len();
+                    let to_reuse = buffer.len() - n_to_reuse;
 
-    #[inline(never)]
-    fn finish(&self) -> u64 {
-        let Self {
-            ref secret_buffer,
-            buffer_len,
-            mut grug,
-            total_bytes,
-        } = *self;
-        let n_stripes = secret_buffer.n_stripes();
-        let SecretBuffer {
-            seed,
-            ref secret,
-            ref buffer,
-        } = *secret_buffer;
-        let secret = secret.as_ref();
-        let buffer = buffer.as_ref();
-
-        let input = &buffer[..buffer_len];
-
-        match total_bytes {
-            241.. => {
-                // Ingest final stripes
-                let (stripes, remainder) = fun_name(input);
-                for stripe in stripes {
-                    grug.process_stripe(stripe, n_stripes, secret);
+                    let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse);
+                    temp_head.copy_from_slice(&buffer[to_reuse..]);
+                    temp_tail.copy_from_slice(input);
+
+                    &temp
                 }
+            };
 
-                let mut temp = [0; 64];
-
-                let last_stripe = match input.last_chunk() {
-                    Some(chunk) => chunk,
-                    None => {
-                        let n_to_reuse = 64 - input.len();
-                        let to_reuse = buffer.len() - n_to_reuse;
-
-                        let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse);
-                        temp_head.copy_from_slice(&buffer[to_reuse..]);
-                        temp_tail.copy_from_slice(input);
-
-                        &temp
-                    }
-                };
-
-                detect::finalize(
-                    grug.accumulator,
-                    remainder,
-                    last_stripe,
-                    secret,
-                    total_bytes,
-                )
-            }
+            Algorithm(vector).finalize(
+                grug.accumulator,
+                remainder,
+                last_stripe,
+                secret,
+                total_bytes,
+            )
+        }
 
-            129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input),
+        129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input),
 
-            17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input),
+        17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input),
 
-            9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input),
+        9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input),
 
-            4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input),
+        4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input),
 
-            1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input),
+        1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input),
 
-            0 => impl_0_bytes(&DEFAULT_SECRET, seed),
-        }
+        0 => impl_0_bytes(&DEFAULT_SECRET, seed),
     }
 }
 
@@ -846,7 +939,7 @@ unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExac
     (blocks, last_block)
 }
 
-trait Vector {
+trait Vector: Copy {
     fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]);
 
     fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]);
@@ -860,29 +953,9 @@ mod scalar {
         super::Algorithm(Impl).oneshot(secret, input)
     }
 
-    #[inline]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        Impl.accumulate(acc, stripe, secret)
-    }
-
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        Impl.round_scramble(acc, secret_end);
-    }
-
-    #[inline]
-    pub fn finalize(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        super::Algorithm(Impl).finalize(acc, last_block, last_stripe, secret, len)
-    }
-
     use super::{SliceBackport as _, Vector, PRIME32_1};
 
+    #[derive(Copy, Clone)]
     pub struct Impl;
 
     impl Vector for Impl {
@@ -961,43 +1034,14 @@ mod neon {
         super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
-    /// # Safety
-    /// You must ensure that the CPU has the NEON feature
-    #[inline]
-    #[target_feature(enable = "neon")]
-    pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        Impl::new_unchecked().round_scramble(acc, secret_end)
-    }
-
-    /// # Safety
-    /// You must ensure that the CPU has the NEON feature
-    #[inline]
-    #[target_feature(enable = "neon")]
-    pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        Impl::new_unchecked().accumulate(acc, stripe, secret)
-    }
-
-    /// # Safety
-    /// You must ensure that the CPU has the NEON feature
-    #[inline]
-    #[target_feature(enable = "neon")]
-    pub unsafe fn finalize_unchecked(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len)
-    }
-
-    struct Impl(());
+    #[derive(Copy, Clone)]
+    pub struct Impl(());
 
     impl Impl {
         /// # Safety
         /// You must ensure that the CPU has the NEON feature
         #[inline]
-        unsafe fn new_unchecked() -> Self {
+        pub unsafe fn new_unchecked() -> Self {
             Self(())
         }
     }
@@ -1179,34 +1223,13 @@ mod aarch64_detect {
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         pick! { oneshot_unchecked, oneshot, (secret, input) }
     }
-
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) }
-    }
-
-    #[inline]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) }
-    }
-
-    #[inline]
-    pub fn finalize(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) }
-    }
 }
 
 #[cfg(target_arch = "x86_64")]
 mod avx2 {
     use core::arch::x86_64::*;
 
-    use super::Vector;
+    use super::{scalar, Vector};
 
     /// # Safety
     /// You must ensure that the CPU has the AVX2 feature
@@ -1216,52 +1239,23 @@ mod avx2 {
         super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
-    /// # Safety
-    /// You must ensure that the CPU has the AVX2 feature
-    #[inline]
-    #[target_feature(enable = "avx2")]
-    pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        Impl::new_unchecked().round_scramble(acc, secret_end)
-    }
-
-    /// # Safety
-    /// You must ensure that the CPU has the AVX2 feature
-    #[inline]
-    #[target_feature(enable = "avx2")]
-    pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        Impl::new_unchecked().accumulate(acc, stripe, secret)
-    }
-
-    /// # Safety
-    /// You must ensure that the CPU has the AVX2 feature
-    #[inline]
-    #[target_feature(enable = "avx2")]
-    pub unsafe fn finalize_unchecked(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len)
-    }
-
-    pub struct Impl(super::scalar::Impl);
+    #[derive(Copy, Clone)]
+    pub struct Impl(());
 
     impl Impl {
         /// # Safety
         /// You must ensure that the CPU has the AVX2 feature
         #[inline]
         pub unsafe fn new_unchecked() -> Impl {
-            Impl(super::scalar::Impl)
+            Impl(())
         }
     }
 
     impl Vector for Impl {
         #[inline]
         fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // The scalar implementation is autovectorized nicely enough
-            self.0.round_scramble(acc, secret_end)
+            // SAFETY: Type can only be constructed when AVX2 feature is present
+            unsafe { round_scramble_avx2(acc, secret_end) }
         }
 
         #[inline]
@@ -1271,6 +1265,13 @@ mod avx2 {
         }
     }
 
+    #[inline]
+    #[target_feature(enable = "avx2")]
+    unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        // The scalar implementation is autovectorized nicely enough
+        scalar::Impl.round_scramble(acc, secret_end)
+    }
+
     #[inline]
     #[target_feature(enable = "avx2")]
     unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
@@ -1311,7 +1312,7 @@ mod avx2 {
 mod sse2 {
     use core::arch::x86_64::*;
 
-    use super::Vector;
+    use super::{scalar, Vector};
 
     /// # Safety
     /// You must ensure that the CPU has the SSE2 feature
@@ -1321,52 +1322,23 @@ mod sse2 {
         super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
     }
 
-    /// # Safety
-    /// You must ensure that the CPU has the SSE2 feature
-    #[inline]
-    #[target_feature(enable = "sse2")]
-    pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        Impl::new_unchecked().round_scramble(acc, secret_end)
-    }
-
-    /// # Safety
-    /// You must ensure that the CPU has the SSE2 feature
-    #[inline]
-    #[target_feature(enable = "sse2")]
-    pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        Impl::new_unchecked().accumulate(acc, stripe, secret)
-    }
-
-    /// # Safety
-    /// You must ensure that the CPU has the SSE2 feature
-    #[inline]
-    #[target_feature(enable = "sse2")]
-    pub unsafe fn finalize_unchecked(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len)
-    }
-
-    pub struct Impl(super::scalar::Impl);
+    #[derive(Copy, Clone)]
+    pub struct Impl(());
 
     impl Impl {
         /// # Safety
         /// You must ensure that the CPU has the SSE2 feature
         #[inline]
         pub unsafe fn new_unchecked() -> Impl {
-            Impl(super::scalar::Impl)
+            Impl(())
         }
     }
 
     impl Vector for Impl {
         #[inline]
         fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // The scalar implementation is autovectorized nicely enough
-            self.0.round_scramble(acc, secret_end)
+            // SAFETY: Type can only be constructed when SSE2 feature is present
+            unsafe { round_scramble_sse2(acc, secret_end) }
         }
 
         #[inline]
@@ -1376,6 +1348,13 @@ mod sse2 {
         }
     }
 
+    #[inline]
+    #[target_feature(enable = "sse2")]
+    unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        // The scalar implementation is autovectorized nicely enough
+        scalar::Impl.round_scramble(acc, secret_end)
+    }
+
     #[inline]
     #[target_feature(enable = "sse2")]
     unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
@@ -1438,27 +1417,6 @@ mod x86_64_detect {
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         pick! { oneshot_unchecked, oneshot, (secret, input) }
     }
-
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) }
-    }
-
-    #[inline]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) }
-    }
-
-    #[inline]
-    pub fn finalize(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) }
-    }
 }
 
 mod detect {
@@ -1494,27 +1452,6 @@ mod detect {
     pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
         pick! { oneshot(secret, input) }
     }
-
-    #[inline]
-    pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        pick! { accumulate(acc, stripe, secret) }
-    }
-
-    #[inline]
-    pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        pick! { round_scramble(acc, secret_end) }
-    }
-
-    #[inline]
-    pub fn finalize(
-        acc: [u64; 8],
-        last_block: &[u8],
-        last_stripe: &[u8; 64],
-        secret: &[u8],
-        len: usize,
-    ) -> u64 {
-        pick! { finalize(acc, last_block, last_stripe, secret, len) }
-    }
 }
 
 #[inline]

From 9657b00fc82b3888290492c644a796b092d1df47 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 16 Aug 2024 20:55:18 -0400
Subject: [PATCH 109/166] Address some todos

---
 src/xxhash3_64.rs | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 7bb2bd4ff..662e6e36f 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -260,14 +260,15 @@ where
     }
 }
 
+/// Tracks which stripe we are currently on to know which part of the
+/// secret we should be using.
 #[derive(Copy, Clone)]
-struct Grug {
-    // TODO FIXME
+struct StripeAccumulator {
     accumulator: [u64; 8],
     current_stripe: usize,
 }
 
-impl Grug {
+impl StripeAccumulator {
     fn new() -> Self {
         Self {
             accumulator: INITIAL_ACCUMULATORS,
@@ -275,7 +276,6 @@ impl Grug {
         }
     }
 
-    // TODO: NEXT: inline this? pass in secret_end?
     #[inline]
     fn process_stripe<V: Vector>(
         &mut self,
@@ -317,7 +317,7 @@ impl Grug {
 pub struct RawHasher<S> {
     secret_buffer: SecretBuffer<S>,
     buffer_len: usize,
-    grug: Grug,
+    stripe_accumulator: StripeAccumulator,
     total_bytes: usize,
 }
 
@@ -326,7 +326,7 @@ impl<S> RawHasher<S> {
         Self {
             secret_buffer,
             buffer_len: 0,
-            grug: Grug::new(),
+            stripe_accumulator: StripeAccumulator::new(),
             total_bytes: 0,
         }
     }
@@ -430,7 +430,7 @@ where
     let RawHasher {
         secret_buffer,
         buffer_len,
-        grug,
+        stripe_accumulator,
         total_bytes,
         ..
     } = this;
@@ -467,7 +467,7 @@ where
 
         let (stripes, _) = buffer.bp_as_chunks();
         for stripe in stripes {
-            grug.process_stripe(vector, stripe, n_stripes, secret);
+            stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
         }
         *buffer_len = 0;
     }
@@ -484,7 +484,7 @@ where
         let (stripes, _) = aa.bp_as_chunks();
 
         for stripe in stripes {
-            grug.process_stripe(vector, stripe, n_stripes, secret)
+            stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret)
         }
         input = remainder;
     }
@@ -507,7 +507,7 @@ where
     let RawHasher {
         ref secret_buffer,
         buffer_len,
-        mut grug,
+        mut stripe_accumulator,
         total_bytes,
     } = *this;
     let n_stripes = secret_buffer.n_stripes();
@@ -526,7 +526,7 @@ where
             // Ingest final stripes
             let (stripes, remainder) = fun_name(input);
             for stripe in stripes {
-                grug.process_stripe(vector, stripe, n_stripes, secret);
+                stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
             }
 
             let mut temp = [0; 64];
@@ -546,7 +546,7 @@ where
             };
 
             Algorithm(vector).finalize(
-                grug.accumulator,
+                stripe_accumulator.accumulator,
                 remainder,
                 last_stripe,
                 secret,
@@ -999,7 +999,6 @@ mod scalar {
     #[inline]
     // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
     // https://github.com/llvm/llvm-project/issues/98481
-    // TODO: this is probably if NEON, yeah?
     #[cfg(target_arch = "aarch64")]
     fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
         use core::arch::asm;
@@ -1280,7 +1279,9 @@ mod avx2 {
         let secret = secret.as_ptr().cast::<__m256i>();
 
         for i in 0..2 {
-            // todo: align the accumulator and avoid the unaligned load and store
+            // [align-acc]: The C code aligns the accumulator to avoid
+            // the unaligned load and store here, but that doesn't
+            // seem to be a big performance loss.
             let mut acc_0 = _mm256_loadu_si256(acc.add(i));
             let stripe_0 = _mm256_loadu_si256(stripe.add(i));
             let secret_0 = _mm256_loadu_si256(secret.add(i));
@@ -1363,7 +1364,7 @@ mod sse2 {
         let secret = secret.as_ptr().cast::<__m128i>();
 
         for i in 0..4 {
-            // todo: align the accumulator and avoid the unaligned load and store
+            // See [align-acc].
             let mut acc_0 = _mm_loadu_si128(acc.add(i));
             let stripe_0 = _mm_loadu_si128(stripe.add(i));
             let secret_0 = _mm_loadu_si128(secret.add(i));

From 9566f322768a320c45c5a188a7627e750c739cc8 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 09:27:56 -0400
Subject: [PATCH 110/166] Use unified dispatch mechanism

---
 src/xxhash3_64.rs | 141 ++++++++--------------------------------------
 1 file changed, 25 insertions(+), 116 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 662e6e36f..6be59778b 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -375,14 +375,28 @@ macro_rules! dispatch {
             $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*)
         }
 
-        #[cfg(target_arch = "aarch64")]
+        // Now we invoke the right function
+
+        #[cfg(_internal_xxhash3_force_neon)]
+        return unsafe { do_neon($($arg_name),*) };
+
+        #[cfg(_internal_xxhash3_force_avx2)]
+        return unsafe { do_avx2($($arg_name),*) };
+
+        #[cfg(_internal_xxhash3_force_sse2)]
+        return unsafe { do_sse2($($arg_name),*) };
+
+        #[cfg(_internal_xxhash3_force_scalar)]
+        return do_scalar($($arg_name),*);
+
+        #[cfg(all(target_arch = "aarch64", feature = "std"))]
         {
             if std::arch::is_aarch64_feature_detected!("neon") {
                 return unsafe { do_neon($($arg_name),*) };
             }
         }
 
-        #[cfg(target_arch = "x86_64")]
+        #[cfg(all(target_arch = "x86_64", feature = "std"))]
         {
             if is_x86_feature_detected!("avx2") {
                 return unsafe { do_avx2($($arg_name),*) };
@@ -777,7 +791,15 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
-    detect::oneshot(secret, input)
+    dispatch! {
+        fn oneshot_x<>(secret: &[u8], input: &[u8]) -> u64
+        []
+    }
+}
+
+#[inline(always)]
+fn oneshot_x(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 {
+    Algorithm(vector).oneshot(secret, input)
 }
 
 fn block_size(secret: &[u8]) -> usize {
@@ -948,11 +970,6 @@ trait Vector: Copy {
 // This module is not `cfg`-gated because it is used by some of the
 // SIMD implementations.
 mod scalar {
-    #[inline]
-    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        super::Algorithm(Impl).oneshot(secret, input)
-    }
-
     use super::{SliceBackport as _, Vector, PRIME32_1};
 
     #[derive(Copy, Clone)]
@@ -1025,14 +1042,6 @@ mod neon {
 
     use super::{SliceBackport as _, Vector, PRIME32_1};
 
-    /// # Safety
-    /// You must ensure that the CPU has the NEON feature
-    #[inline]
-    #[target_feature(enable = "neon")]
-    pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
-        super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
-    }
-
     #[derive(Copy, Clone)]
     pub struct Impl(());
 
@@ -1203,41 +1212,12 @@ mod neon {
     }
 }
 
-#[cfg(all(target_arch = "aarch64", feature = "std"))]
-mod aarch64_detect {
-    macro_rules! pick {
-        ($f:ident, $s:ident, $($t:tt)+) => {
-            #[cfg(_internal_xxhash3_force_neon)]
-            return unsafe { super::neon::$f $($t)+ };
-
-            if std::arch::is_aarch64_feature_detected!("neon") {
-                return unsafe { super::neon::$f $($t)+ };
-            }
-
-            super::scalar::$s $($t)+
-        };
-    }
-
-    #[inline]
-    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        pick! { oneshot_unchecked, oneshot, (secret, input) }
-    }
-}
-
 #[cfg(target_arch = "x86_64")]
 mod avx2 {
     use core::arch::x86_64::*;
 
     use super::{scalar, Vector};
 
-    /// # Safety
-    /// You must ensure that the CPU has the AVX2 feature
-    #[inline]
-    #[target_feature(enable = "avx2")]
-    pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
-        super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
-    }
-
     #[derive(Copy, Clone)]
     pub struct Impl(());
 
@@ -1315,14 +1295,6 @@ mod sse2 {
 
     use super::{scalar, Vector};
 
-    /// # Safety
-    /// You must ensure that the CPU has the SSE2 feature
-    #[inline]
-    #[target_feature(enable = "sse2")]
-    pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 {
-        super::Algorithm(Impl::new_unchecked()).oneshot(secret, input)
-    }
-
     #[derive(Copy, Clone)]
     pub struct Impl(());
 
@@ -1392,69 +1364,6 @@ mod sse2 {
     }
 }
 
-#[cfg(all(target_arch = "x86_64", feature = "std"))]
-mod x86_64_detect {
-    macro_rules! pick {
-        ($f:ident, $s:ident, $($t:tt)+) => {
-            #[cfg(_internal_xxhash3_force_avx2)]
-            return unsafe { super::avx2::$f $($t)+ };
-
-            #[cfg(_internal_xxhash3_force_sse2)]
-            return unsafe { super::sse2::$f $($t)+ };
-
-            if std::arch::is_x86_feature_detected!("avx2") {
-                return unsafe { super::avx2::$f $($t)+ };
-            }
-
-            if std::arch::is_x86_feature_detected!("sse2") {
-                return unsafe { super::sse2::$f $($t)+ };
-            }
-
-            super::scalar::$s $($t)+
-        };
-    }
-
-    #[inline]
-    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        pick! { oneshot_unchecked, oneshot, (secret, input) }
-    }
-}
-
-mod detect {
-    macro_rules! pick {
-        ($e:expr) => {
-            #[cfg(_internal_xxhash3_force_scalar)]
-            {
-                use super::scalar::*;
-                return $e;
-            }
-
-            #[cfg(all(target_arch = "aarch64", feature = "std"))]
-            {
-                use super::aarch64_detect::*;
-                return $e;
-            }
-
-            #[cfg(all(target_arch = "x86_64", feature = "std"))]
-            {
-                use super::x86_64_detect::*;
-                return $e;
-            }
-
-            #[allow(unreachable_code)]
-            {
-                use super::scalar::*;
-                $e
-            }
-        };
-    }
-
-    #[inline]
-    pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 {
-        pick! { oneshot(secret, input) }
-    }
-}
-
 #[inline]
 fn avalanche(mut x: u64) -> u64 {
     x ^= x >> 37;

From e9d17b923f17ff0e1d7287d36fbadbcaa6d3fcad Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 09:29:21 -0400
Subject: [PATCH 111/166] re-inline helpers

---
 src/xxhash3_64.rs | 41 +++++++++++++----------------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 6be59778b..2e2dc768f 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -802,11 +802,6 @@ fn oneshot_x(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 {
     Algorithm(vector).oneshot(secret, input)
 }
 
-fn block_size(secret: &[u8]) -> usize {
-    let stripes_per_block = (secret.len() - 64) / 8;
-    64 * stripes_per_block
-}
-
 struct Algorithm<V>(V);
 
 impl<V: Vector> Algorithm<V> {
@@ -817,9 +812,20 @@ impl<V: Vector> Algorithm<V> {
         assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
         assert!(input.len() >= 241);
 
-        let block_size = block_size(secret);
+        let stripes_per_block = (secret.len() - 64) / 8;
+        let block_size = 64 * stripes_per_block;
 
-        let (blocks, last_block) = unsafe { chunks_and_last(input, block_size) };
+        let mut blocks = input.chunks_exact(block_size);
+
+        let last_block = if blocks.remainder().is_empty() {
+            // SAFETY: We know that `input` is non-empty, which means
+            // that either there will be a remainder or one or more
+            // full blocks. That info isn't flowing to the optimizer,
+            // so we use `unwrap_unchecked`.
+            unsafe { blocks.next_back().unwrap_unchecked() }
+        } else {
+            blocks.remainder()
+        };
 
         self.rounds(&mut acc, blocks, secret);
 
@@ -940,27 +946,6 @@ fn fun_name(block: &[u8]) -> (&[[u8; 64]], &[u8]) {
     }
 }
 
-/// # Safety
-/// `input` must be non-empty.
-#[inline]
-unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExact<'_, u8>, &[u8]) {
-    debug_assert!(!input.is_empty());
-
-    let mut blocks = input.chunks_exact(block_size);
-
-    let last_block = if blocks.remainder().is_empty() {
-        // SAFETY: We know that `input` is non-empty, which means
-        // that either there will be a remainder or one or more
-        // full blocks. That info isn't flowing to the optimizer,
-        // so we use `unwrap_unchecked`.
-        unsafe { blocks.next_back().unwrap_unchecked() }
-    } else {
-        blocks.remainder()
-    };
-
-    (blocks, last_block)
-}
-
 trait Vector: Copy {
     fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]);
 

From 935e70132dc10a3117d2183b0791e34bd75e96eb Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 12:51:22 -0400
Subject: [PATCH 112/166] improve names

---
 src/xxhash3_64.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 2e2dc768f..d3916ec75 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -538,7 +538,7 @@ where
     match total_bytes {
         241.. => {
             // Ingest final stripes
-            let (stripes, remainder) = fun_name(input);
+            let (stripes, remainder) = stripes_with_tail(input);
             for stripe in stripes {
                 stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
             }
@@ -792,13 +792,13 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 #[inline]
 fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
     dispatch! {
-        fn oneshot_x<>(secret: &[u8], input: &[u8]) -> u64
+        fn oneshot_impl<>(secret: &[u8], input: &[u8]) -> u64
         []
     }
 }
 
 #[inline(always)]
-fn oneshot_x(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 {
+fn oneshot_impl(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 {
     Algorithm(vector).oneshot(secret, input)
 }
 
@@ -894,7 +894,7 @@ impl<V: Vector> Algorithm<V> {
     fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
         // Accumulation steps are run for the stripes in the last block,
         // except for the last stripe (whether it is full or not)
-        let (stripes, _) = fun_name(block);
+        let (stripes, _) = stripes_with_tail(block);
 
         // TODO: [unify]
         let secrets =
@@ -939,7 +939,7 @@ impl<V: Vector> Algorithm<V> {
 }
 
 #[inline]
-fn fun_name(block: &[u8]) -> (&[[u8; 64]], &[u8]) {
+fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) {
     match block.bp_as_chunks() {
         ([stripes @ .., last], []) => (stripes, last),
         (stripes, last) => (stripes, last),

From e7662a4db9a1d24a3e3a3d881bd3ba62e752b6b0 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 12:54:54 -0400
Subject: [PATCH 113/166] reduce unsafe

---
 src/xxhash3_64.rs | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index d3916ec75..7e76d7ae7 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -831,14 +831,7 @@ impl<V: Vector> Algorithm<V> {
 
         let len = input.len();
 
-        let last_stripe: &[u8; 64] = unsafe {
-            &*input
-                .as_ptr()
-                .add(len)
-                .sub(mem::size_of::<[u8; 64]>())
-                .cast()
-        };
-
+        let last_stripe = input.last_chunk().unwrap();
         self.finalize(acc, last_block, last_stripe, secret, len)
     }
 

From ab749c25764ff653c1ba478aabf79add33790062 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 13:11:50 -0400
Subject: [PATCH 114/166] reduce unsafe

---
 src/xxhash3_64.rs | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 7e76d7ae7..ec2cd57b4 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -897,8 +897,7 @@ impl<V: Vector> Algorithm<V> {
             self.0.accumulate(acc, stripe, secret);
         }
 
-        let q = &secret[secret.len() - 71..];
-        let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() };
+        let q = secret[secret.len() - 71..].first_chunk().unwrap();
         self.0.accumulate(acc, last_stripe, q);
     }
 
@@ -910,19 +909,17 @@ impl<V: Vector> Algorithm<V> {
         secret: &[u8],
         secret_offset: usize,
     ) -> u64 {
-        let secret_words = unsafe {
-            secret
-                .as_ptr()
-                .add(secret_offset)
-                .cast::<[u64; 8]>()
-                .read_unaligned()
-        };
+        let secrets = secret[secret_offset..].first_chunk::<64>().unwrap();
+        let (secrets, _) = secrets.bp_as_chunks();
         let mut result = init_value;
         for i in 0..4 {
             // 64-bit by 64-bit multiplication to 128-bit full result
             let mul_result = {
-                let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128();
-                let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128();
+                let sa = u64::from_ne_bytes(secrets[i * 2]);
+                let sb = u64::from_ne_bytes(secrets[i * 2 + 1]);
+
+                let a = (acc[i * 2] ^ sa).into_u128();
+                let b = (acc[i * 2 + 1] ^ sb).into_u128();
                 a.wrapping_mul(b)
             };
             result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
@@ -968,9 +965,12 @@ mod scalar {
 
         #[inline]
         fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+            let (stripe, _) = stripe.bp_as_chunks();
+            let (secret, _) = secret.bp_as_chunks();
+
             for i in 0..8 {
-                let stripe = unsafe { stripe.as_ptr().cast::<u64>().add(i).read_unaligned() };
-                let secret = unsafe { secret.as_ptr().cast::<u64>().add(i).read_unaligned() };
+                let stripe = u64::from_ne_bytes(stripe[i]);
+                let secret = u64::from_ne_bytes(secret[i]);
 
                 let value = stripe ^ secret;
                 acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);

From 7c0cc3f9eb8bee3353485b207f1442b20a0c545c Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 13:14:25 -0400
Subject: [PATCH 115/166] unsafe-op-in-fn

---
 src/xxhash3_64.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index ec2cd57b4..36a58a040 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,4 +1,5 @@
 #![allow(missing_docs)]
+#![deny(unsafe_op_in_unsafe_fn)]
 
 use core::{hash, mem, slice};
 
@@ -345,6 +346,8 @@ macro_rules! dispatch {
             $fn_name(scalar::Impl, $($arg_name),*)
         }
 
+        /// # Safety
+        /// You must ensure that the CPU has the NEON feature
         #[inline]
         #[target_feature(enable = "neon")]
         #[cfg(target_arch = "aarch64")]
@@ -352,7 +355,10 @@ macro_rules! dispatch {
         where
             $($wheres)*
         {
-            $fn_name(neon::Impl::new_unchecked(), $($arg_name),*)
+            // SAFETY: the caller has ensured we have the NEON feature
+            unsafe {
+                $fn_name(neon::Impl::new_unchecked(), $($arg_name),*)
+            }
         }
 
         #[inline]

From dde22b4b5b993cf3c2a5179e533a955916707d77 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sat, 17 Aug 2024 13:38:58 -0400
Subject: [PATCH 116/166] extract secret start

---
 src/xxhash3_64.rs | 166 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 121 insertions(+), 45 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 36a58a040..c1f43ce42 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -33,8 +33,76 @@ const DEFAULT_SECRET: [u8; 192] = [
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 ];
 
+const DEFAULT_SECRET2: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET) };
+
 pub const SECRET_MINIMUM_LENGTH: usize = 136;
 
+#[repr(transparent)]
+struct Secret([u8]);
+
+impl Secret {
+    #[inline]
+    fn new(bytes: &[u8]) -> Result<&Self, ()> {
+        if bytes.len() >= SECRET_MINIMUM_LENGTH {
+            unsafe { Ok(Self::new_unchecked(bytes)) }
+        } else {
+            Err(()) // TODO error
+        }
+    }
+
+    #[inline]
+    const unsafe fn new_unchecked(bytes: &[u8]) -> &Self {
+        unsafe { mem::transmute(bytes) }
+    }
+
+    #[inline]
+    fn words_for_0(&self) -> [u64; 2] {
+        // unsafe { self.0.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }
+
+        let (q, _) = self.0[56..].bp_as_chunks();
+        [q[0], q[1]].map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    fn words_for_1_to_3(&self) -> [u32; 2] {
+        // unsafe { self.0.as_ptr().cast::<[u32; 2]>().read_unaligned() }
+
+        let (q, _) = self.0.bp_as_chunks();
+        [q[0], q[1]].map(u32::from_ne_bytes)
+    }
+
+    #[inline]
+    fn words_for_4_to_8(&self) -> [u64; 2] {
+        //unsafe { self.0.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }
+
+        let (q, _) = self.0[8..].bp_as_chunks();
+        [q[0], q[1]].map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    fn words_for_9_to_16(&self) -> [u64; 4] {
+        // unsafe { self.0.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }
+
+        let (q, _) = self.0[24..].bp_as_chunks();
+        [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    fn stripe(&self, i: usize) -> &[u8; 64] {
+        unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() }
+    }
+
+    #[inline]
+    fn end(&self) -> &[u8; 64] {
+        unsafe { self.0.last_chunk().unwrap_unchecked() }
+    }
+
+    #[inline]
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
 pub struct XxHash3_64 {
     #[cfg(feature = "alloc")]
     inner: with_alloc::AllocRawHasher,
@@ -44,7 +112,7 @@ pub struct XxHash3_64 {
 impl XxHash3_64 {
     #[inline(never)]
     pub fn oneshot(input: &[u8]) -> u64 {
-        impl_oneshot(&DEFAULT_SECRET, DEFAULT_SEED, input)
+        impl_oneshot(DEFAULT_SECRET2, DEFAULT_SEED, input)
     }
 
     #[inline(never)]
@@ -57,12 +125,14 @@ impl XxHash3_64 {
             derive_secret(seed, &mut secret);
         }
 
-        impl_oneshot(&secret, seed, input)
+        let s = unsafe { Secret::new_unchecked(&secret) };
+
+        impl_oneshot(s, seed, input)
     }
 
     #[inline(never)]
     pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 {
-        assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: ERROR
+        let secret = Secret::new(secret).unwrap(); // TODO: ERROR
         impl_oneshot(secret, DEFAULT_SEED, input)
     }
 }
@@ -283,7 +353,7 @@ impl StripeAccumulator {
         vector: V,
         stripe: &[u8; 64],
         n_stripes: usize,
-        secret: &[u8],
+        secret: &Secret,
     ) {
         let Self {
             accumulator,
@@ -291,10 +361,10 @@ impl StripeAccumulator {
             ..
         } = self;
 
-        let secret_end = unsafe { secret.last_chunk().unwrap_unchecked() };
+        let secret_end = secret.end();
 
         // each stripe
-        let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() };
+        let secret = secret.stripe(*current_stripe);
         vector.accumulate(accumulator, stripe, secret);
 
         *current_stripe += 1;
@@ -459,6 +529,7 @@ where
 
     let SecretBuffer { secret, buffer, .. } = secret_buffer;
     let secret = secret.as_ref();
+    let secret = unsafe { Secret::new_unchecked(secret) };
 
     *total_bytes += input.len();
 
@@ -537,6 +608,7 @@ where
         ref buffer,
     } = *secret_buffer;
     let secret = secret.as_ref();
+    let secret = unsafe { Secret::new_unchecked(secret) };
     let buffer = buffer.as_ref();
 
     let input = &buffer[..buffer_len];
@@ -574,17 +646,17 @@ where
             )
         }
 
-        129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input),
+        129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input),
 
-        17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input),
+        17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input),
 
-        9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input),
+        9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input),
 
-        4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input),
+        4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input),
 
-        1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input),
+        1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input),
 
-        0 => impl_0_bytes(&DEFAULT_SECRET, seed),
+        0 => impl_0_bytes(DEFAULT_SECRET2, seed),
     }
 }
 
@@ -616,7 +688,7 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) {
 }
 
 #[inline(always)]
-fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     match input.len() {
         241.. => impl_241_plus_bytes(secret, input),
 
@@ -635,13 +707,13 @@ fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 }
 
 #[inline]
-fn impl_0_bytes(secret: &[u8], seed: u64) -> u64 {
-    let secret_words = unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() };
+fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
+    let secret_words = secret.words_for_0();
     avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
 }
 
 #[inline]
-fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let input_length = input.len() as u8; // OK as we checked that the length fits
 
     let combined = input[input.len() - 1].into_u32()
@@ -649,7 +721,7 @@ fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
         | input[0].into_u32() << 16
         | input[input.len() >> 1].into_u32() << 24;
 
-    let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() };
+    let secret_words = secret.words_for_1_to_3();
 
     let value = ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64();
 
@@ -658,7 +730,7 @@ fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 }
 
 #[inline]
-fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
     let input_last = unsafe {
         input
@@ -670,7 +742,7 @@ fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
     };
 
     let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
-    let secret_words = unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() };
+    let secret_words = secret.words_for_4_to_8();
 
     let combined = input_last.into_u64() | (input_first.into_u64() << 32);
 
@@ -688,7 +760,7 @@ fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 }
 
 #[inline]
-fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
     let input_last = unsafe {
         input
@@ -699,7 +771,7 @@ fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
             .read_unaligned()
     };
 
-    let secret_words = unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() };
+    let secret_words = secret.words_for_9_to_16();
     let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
     let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
     let mul_result = low.into_u128().wrapping_mul(high.into_u128());
@@ -714,10 +786,10 @@ fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 }
 
 #[inline]
-fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
-    let (secret, _) = secret.bp_as_chunks();
+    let (secret, _) = secret.0.bp_as_chunks();
     let (secret, _) = secret.bp_as_chunks::<2>();
     let (fwd, _) = input.bp_as_chunks();
     let (_, bwd) = input.bp_as_rchunks();
@@ -746,15 +818,15 @@ fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
 }
 
 #[inline]
-fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
+fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
     let (head, _) = input.bp_as_chunks();
     let last_chunk = input.last_chunk().unwrap();
     let mut head = head.iter();
 
-    let (ss, _) = secret.bp_as_chunks();
-    let (ss2, _) = secret[3..].bp_as_chunks();
+    let (ss, _) = secret.0.bp_as_chunks();
+    let (ss2, _) = secret.0[3..].bp_as_chunks();
 
     let qq = head.by_ref().zip(ss);
 
@@ -768,7 +840,7 @@ fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 {
         acc = acc.wrapping_add(mix_step(chunk, s, seed));
     }
 
-    let ss3 = &secret[119..].first_chunk().unwrap();
+    let ss3 = &secret.0[119..].first_chunk().unwrap();
     acc = acc.wrapping_add(mix_step(last_chunk, ss3, seed));
 
     avalanche(acc)
@@ -796,15 +868,15 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 ];
 
 #[inline]
-fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 {
+fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
     dispatch! {
-        fn oneshot_impl<>(secret: &[u8], input: &[u8]) -> u64
+        fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64
         []
     }
 }
 
 #[inline(always)]
-fn oneshot_impl(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 {
+fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
     Algorithm(vector).oneshot(secret, input)
 }
 
@@ -812,10 +884,10 @@ struct Algorithm<V>(V);
 
 impl<V: Vector> Algorithm<V> {
     #[inline]
-    fn oneshot(&self, secret: &[u8], input: &[u8]) -> u64 {
+    fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 {
         let mut acc = INITIAL_ACCUMULATORS;
 
-        assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
+        //assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
         assert!(input.len() >= 241);
 
         let stripes_per_block = (secret.len() - 64) / 8;
@@ -846,7 +918,7 @@ impl<V: Vector> Algorithm<V> {
         &self,
         acc: &mut [u64; 8],
         blocks: impl IntoIterator<Item = &'a [u8]>,
-        secret: &[u8],
+        secret: &Secret,
     ) {
         for block in blocks {
             let (stripes, _) = block.bp_as_chunks();
@@ -856,18 +928,17 @@ impl<V: Vector> Algorithm<V> {
     }
 
     #[inline]
-    fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
-        let secret_end = secret.last_chunk().unwrap();
+    fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) {
+        let secret_end = secret.0.last_chunk().unwrap();
 
         self.round_accumulate(acc, stripes, secret);
         self.0.round_scramble(acc, secret_end);
     }
 
     #[inline]
-    fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) {
+    fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) {
         // TODO: [unify]
-        let secrets =
-            (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
+        let secrets = (0..stripes.len()).map(|i| secret.stripe(i));
 
         for (stripe, secret) in stripes.iter().zip(secrets) {
             self.0.accumulate(acc, stripe, secret);
@@ -880,7 +951,7 @@ impl<V: Vector> Algorithm<V> {
         mut acc: [u64; 8],
         last_block: &[u8],
         last_stripe: &[u8; 64],
-        secret: &[u8],
+        secret: &Secret,
         len: usize,
     ) -> u64 {
         debug_assert!(!last_block.is_empty());
@@ -890,20 +961,25 @@ impl<V: Vector> Algorithm<V> {
     }
 
     #[inline]
-    fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) {
+    fn last_round(
+        &self,
+        acc: &mut [u64; 8],
+        block: &[u8],
+        last_stripe: &[u8; 64],
+        secret: &Secret,
+    ) {
         // Accumulation steps are run for the stripes in the last block,
         // except for the last stripe (whether it is full or not)
         let (stripes, _) = stripes_with_tail(block);
 
         // TODO: [unify]
-        let secrets =
-            (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() });
+        let secrets = (0..stripes.len()).map(|i| secret.stripe(i));
 
         for (stripe, secret) in stripes.iter().zip(secrets) {
             self.0.accumulate(acc, stripe, secret);
         }
 
-        let q = secret[secret.len() - 71..].first_chunk().unwrap();
+        let q = secret.0[secret.len() - 71..].first_chunk().unwrap();
         self.0.accumulate(acc, last_stripe, q);
     }
 
@@ -912,10 +988,10 @@ impl<V: Vector> Algorithm<V> {
         &self,
         acc: &mut [u64; 8],
         init_value: u64,
-        secret: &[u8],
+        secret: &Secret,
         secret_offset: usize,
     ) -> u64 {
-        let secrets = secret[secret_offset..].first_chunk::<64>().unwrap();
+        let secrets = secret.0[secret_offset..].first_chunk::<64>().unwrap();
         let (secrets, _) = secrets.bp_as_chunks();
         let mut result = init_value;
         for i in 0..4 {

From 849434ae097b5f553eb176d738b4dbd4826ebb20 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 08:47:02 -0400
Subject: [PATCH 117/166] oneshot asmasm

---
 asmasm/src/main.rs | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs
index 706702566..e515bcdfc 100644
--- a/asmasm/src/main.rs
+++ b/asmasm/src/main.rs
@@ -1,4 +1,4 @@
-use std::{hash::Hasher, hint::black_box, time::Instant};
+use std::{hint::black_box, time::Instant};
 use xx_hash_sys::XxHash3_64 as C;
 use xx_renu::xxhash3_64::XxHash3_64;
 
@@ -8,33 +8,18 @@ fn main() {
         .nth(2)
         .map_or(false, |a| a.eq_ignore_ascii_case("C"));
     let file = std::fs::read(filename).expect("read");
-    let seed = 0xdead_beef;
 
     if use_c {
         let start = Instant::now();
-        let hash = do_c(seed, &file);
+        let hash = C::oneshot(&file);
         let elapsed = start.elapsed();
         black_box(hash);
         eprintln!("C    {elapsed:?}");
     } else {
         let start = Instant::now();
-        let hash = do_rust(seed, &file);
+        let hash = XxHash3_64::oneshot(&file);
         let elapsed = start.elapsed();
         black_box(hash);
         eprintln!("Rust {elapsed:?}");
     }
 }
-
-#[inline(never)]
-fn do_c(seed: u64, file: &[u8]) -> u64 {
-    let mut hasher = C::with_seed(seed);
-    hasher.write(file);
-    hasher.finish()
-}
-
-#[inline(never)]
-fn do_rust(seed: u64, file: &[u8]) -> u64 {
-    let mut hasher = XxHash3_64::with_seed(seed);
-    hasher.write(&file);
-    hasher.finish()
-}

From 4b181d994d907099a5947fe0dd2474fafa45bbce Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 09:49:57 -0400
Subject: [PATCH 118/166] keep order consistent for now

---
 src/xxhash3_64.rs | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index c1f43ce42..9e6270895 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -614,6 +614,18 @@ where
     let input = &buffer[..buffer_len];
 
     match total_bytes {
+        0 => impl_0_bytes(DEFAULT_SECRET2, seed),
+
+        1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input),
+
+        4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input),
+
+        9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input),
+
+        17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input),
+
+        129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input),
+
         241.. => {
             // Ingest final stripes
             let (stripes, remainder) = stripes_with_tail(input);
@@ -645,18 +657,6 @@ where
                 total_bytes,
             )
         }
-
-        129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input),
-
-        17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input),
-
-        9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input),
-
-        4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input),
-
-        1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input),
-
-        0 => impl_0_bytes(DEFAULT_SECRET2, seed),
     }
 }
 
@@ -690,19 +690,19 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) {
 #[inline(always)]
 fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     match input.len() {
-        241.. => impl_241_plus_bytes(secret, input),
+        0 => impl_0_bytes(secret, seed),
 
-        129..=240 => impl_129_to_240_bytes(secret, seed, input),
+        1..=3 => impl_1_to_3_bytes(secret, seed, input),
 
-        17..=128 => impl_17_to_128_bytes(secret, seed, input),
+        4..=8 => impl_4_to_8_bytes(secret, seed, input),
 
         9..=16 => impl_9_to_16_bytes(secret, seed, input),
 
-        4..=8 => impl_4_to_8_bytes(secret, seed, input),
+        17..=128 => impl_17_to_128_bytes(secret, seed, input),
 
-        1..=3 => impl_1_to_3_bytes(secret, seed, input),
+        129..=240 => impl_129_to_240_bytes(secret, seed, input),
 
-        0 => impl_0_bytes(secret, seed),
+        241.. => impl_241_plus_bytes(secret, input),
     }
 }
 

From eee337e70326f8642271625da11466cd266dac1b Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 13:32:47 -0400
Subject: [PATCH 119/166] tweak inlines

---
 src/xxhash3_64.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 9e6270895..291e9482f 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -706,13 +706,13 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     }
 }
 
-#[inline]
+#[inline(always)]
 fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
     let secret_words = secret.words_for_0();
     avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
 }
 
-#[inline]
+#[inline(always)]
 fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let input_length = input.len() as u8; // OK as we checked that the length fits
 
@@ -729,7 +729,7 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     avalanche_xxh64(value)
 }
 
-#[inline]
+#[inline(always)]
 fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
     let input_last = unsafe {
@@ -759,7 +759,7 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     value
 }
 
-#[inline]
+#[inline(always)]
 fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
     let input_last = unsafe {
@@ -875,7 +875,7 @@ fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
     }
 }
 
-#[inline(always)]
+#[inline]
 fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
     Algorithm(vector).oneshot(secret, input)
 }

From 019ef1136896640b1a12c839a2002ad38ce7c32c Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 15:18:31 -0400
Subject: [PATCH 120/166] tweak bench

---
 compare/benches/benchmark.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 52000e427..fbb2fe10d 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -140,7 +140,7 @@ fn gen_data(length: usize) -> (u64, Vec<u8>) {
 }
 
 fn gen_chunked_data(length: usize, n_chunks: usize) -> (u64, Vec<Vec<u8>>) {
-    assert!(length > n_chunks);
+    assert!(length >= n_chunks);
 
     let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
 
@@ -254,8 +254,8 @@ mod xxhash3_64 {
     fn streaming(c: &mut Criterion) {
         let mut g = c.benchmark_group("xxhash3_64/streaming_many_chunks");
 
-        for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
-            for n_chunks in half_sizes(MAX_CHUNKS) {
+        for size in [1024 * 1024] {
+            for n_chunks in half_sizes(size) {
                 let (seed, chunks) = gen_chunked_data(size, n_chunks);
                 g.throughput(Throughput::Bytes(size as _));
 

From a2c946a9785ce62445a6ceb3a1b70b552d87bb24 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 15:30:47 -0400
Subject: [PATCH 121/166] disable lib bench

---
 compare/Cargo.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index bbb8d0a59..c405aefa4 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -3,6 +3,9 @@ name = "compare"
 version = "0.1.0"
 edition = "2021"
 
+[lib]
+bench = false
+
 [[bench]]
 name = "benchmark"
 harness = false

From 0c0597d6f721e348bc93f4f95815aaeea3e38362 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 15:34:09 -0400
Subject: [PATCH 122/166] Revert "keep order consistent for now"

This reverts commit 794d72a41c8752edc98da81cd3aa715324fcc0f7.
---
 src/xxhash3_64.rs | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 291e9482f..80a7e5b93 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -614,18 +614,6 @@ where
     let input = &buffer[..buffer_len];
 
     match total_bytes {
-        0 => impl_0_bytes(DEFAULT_SECRET2, seed),
-
-        1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input),
-
-        4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input),
-
-        9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input),
-
-        17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input),
-
-        129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input),
-
         241.. => {
             // Ingest final stripes
             let (stripes, remainder) = stripes_with_tail(input);
@@ -657,6 +645,18 @@ where
                 total_bytes,
             )
         }
+
+        129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input),
+
+        17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input),
+
+        9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input),
+
+        4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input),
+
+        1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input),
+
+        0 => impl_0_bytes(DEFAULT_SECRET2, seed),
     }
 }
 
@@ -690,19 +690,19 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) {
 #[inline(always)]
 fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     match input.len() {
-        0 => impl_0_bytes(secret, seed),
+        241.. => impl_241_plus_bytes(secret, input),
 
-        1..=3 => impl_1_to_3_bytes(secret, seed, input),
+        129..=240 => impl_129_to_240_bytes(secret, seed, input),
 
-        4..=8 => impl_4_to_8_bytes(secret, seed, input),
+        17..=128 => impl_17_to_128_bytes(secret, seed, input),
 
         9..=16 => impl_9_to_16_bytes(secret, seed, input),
 
-        17..=128 => impl_17_to_128_bytes(secret, seed, input),
+        4..=8 => impl_4_to_8_bytes(secret, seed, input),
 
-        129..=240 => impl_129_to_240_bytes(secret, seed, input),
+        1..=3 => impl_1_to_3_bytes(secret, seed, input),
 
-        241.. => impl_241_plus_bytes(secret, input),
+        0 => impl_0_bytes(secret, seed),
     }
 }
 

From 3591d7bbc705dafb8aa4608ffd744f97136d73cf Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 16:32:10 -0400
Subject: [PATCH 123/166] One category for each range

---
 compare/benches/benchmark.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index fbb2fe10d..1c40f1d72 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -177,7 +177,7 @@ mod xxhash3_64 {
 
         // Visual inspection of all the data points showed these as
         // examples of thier nearby neighbors.
-        let categories = [0, 2, 9, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230];
+        let categories = [0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230];
 
         for size in categories {
             let data = &data[..size];

From 5a6f1a40aff6b2dde70317131e680f9bb3828b92 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 16:41:49 -0400
Subject: [PATCH 124/166] asmasm and inline never

---
 asmasm/src/main.rs | 59 +++++++++++++++++++++++++++++++++-------------
 src/xxhash3_64.rs  | 10 ++++----
 2 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs
index e515bcdfc..b1244716b 100644
--- a/asmasm/src/main.rs
+++ b/asmasm/src/main.rs
@@ -1,25 +1,52 @@
-use std::{hint::black_box, time::Instant};
+use std::{hash::Hasher, time::Instant};
 use xx_hash_sys::XxHash3_64 as C;
 use xx_renu::xxhash3_64::XxHash3_64;
 
 fn main() {
     let filename = std::env::args().nth(1).expect("filename");
-    let use_c = std::env::args()
-        .nth(2)
-        .map_or(false, |a| a.eq_ignore_ascii_case("C"));
+    let mode = std::env::args().nth(2);
+    let mode = mode.as_deref().unwrap_or("rust-oneshot");
     let file = std::fs::read(filename).expect("read");
+    let chunk_size = file.len() / 100;
+    let chunk_size = usize::max(chunk_size, 1);
 
-    if use_c {
-        let start = Instant::now();
-        let hash = C::oneshot(&file);
-        let elapsed = start.elapsed();
-        black_box(hash);
-        eprintln!("C    {elapsed:?}");
-    } else {
-        let start = Instant::now();
-        let hash = XxHash3_64::oneshot(&file);
-        let elapsed = start.elapsed();
-        black_box(hash);
-        eprintln!("Rust {elapsed:?}");
+    let start = Instant::now();
+    let hash = match mode {
+        "rust-oneshot" => rust_oneshot(&file),
+        "c-oneshot" => c_oneshot(&file),
+        "rust-chunked" => rust_chunked(&file, chunk_size),
+        "c-chunked" => c_chunked(&file, chunk_size),
+        other => panic!("Unknown mode {other}"),
+    };
+    let elapsed = start.elapsed();
+
+    eprintln!("{mode}\t{elapsed:?}\t{hash:016X}");
+}
+
+#[inline(never)]
+fn rust_oneshot(file: &[u8]) -> u64 {
+    XxHash3_64::oneshot(file)
+}
+
+#[inline(never)]
+fn c_oneshot(file: &[u8]) -> u64 {
+    C::oneshot(file)
+}
+
+#[inline(never)]
+fn rust_chunked(file: &[u8], chunk_size: usize) -> u64 {
+    let mut hasher = XxHash3_64::new();
+    for chunk in file.chunks(chunk_size) {
+        hasher.write(chunk);
+    }
+    hasher.finish()
+}
+
+#[inline(never)]
+fn c_chunked(file: &[u8], chunk_size: usize) -> u64 {
+    let mut hasher = C::new();
+    for chunk in file.chunks(chunk_size) {
+        hasher.write(chunk);
     }
+    hasher.finish()
 }
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 80a7e5b93..ec467a959 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -110,12 +110,12 @@ pub struct XxHash3_64 {
 }
 
 impl XxHash3_64 {
-    #[inline(never)]
+    #[inline]
     pub fn oneshot(input: &[u8]) -> u64 {
         impl_oneshot(DEFAULT_SECRET2, DEFAULT_SEED, input)
     }
 
-    #[inline(never)]
+    #[inline]
     pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
         let mut secret = DEFAULT_SECRET;
 
@@ -130,7 +130,7 @@ impl XxHash3_64 {
         impl_oneshot(s, seed, input)
     }
 
-    #[inline(never)]
+    #[inline]
     pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 {
         let secret = Secret::new(secret).unwrap(); // TODO: ERROR
         impl_oneshot(secret, DEFAULT_SEED, input)
@@ -489,7 +489,7 @@ impl<S> hash::Hasher for RawHasher<S>
 where
     S: AsRef<[u8]>,
 {
-    #[inline(never)]
+    #[inline]
     fn write(&mut self, input: &[u8]) {
         let this = self;
         dispatch! {
@@ -498,7 +498,7 @@ where
         }
     }
 
-    #[inline(never)]
+    #[inline]
     fn finish(&self) -> u64 {
         let this = self;
         dispatch! {

From e0f44663af7a48c2f673d36dd9c52471d6a09913 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Sun, 18 Aug 2024 23:43:12 -0400
Subject: [PATCH 125/166] asserts and unsafe

---
 src/xxhash3_64.rs | 135 +++++++++++++++++++++++++++++-----------------
 1 file changed, 86 insertions(+), 49 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index ec467a959..84fcd96e2 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,7 +1,7 @@
 #![allow(missing_docs)]
 #![deny(unsafe_op_in_unsafe_fn)]
 
-use core::{hash, mem, slice};
+use core::{hash, hint::assert_unchecked, mem, slice};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 
@@ -59,6 +59,7 @@ impl Secret {
     fn words_for_0(&self) -> [u64; 2] {
         // unsafe { self.0.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }
 
+        self.reassert_preconditions();
         let (q, _) = self.0[56..].bp_as_chunks();
         [q[0], q[1]].map(u64::from_ne_bytes)
     }
@@ -67,6 +68,7 @@ impl Secret {
     fn words_for_1_to_3(&self) -> [u32; 2] {
         // unsafe { self.0.as_ptr().cast::<[u32; 2]>().read_unaligned() }
 
+        self.reassert_preconditions();
         let (q, _) = self.0.bp_as_chunks();
         [q[0], q[1]].map(u32::from_ne_bytes)
     }
@@ -75,6 +77,7 @@ impl Secret {
     fn words_for_4_to_8(&self) -> [u64; 2] {
         //unsafe { self.0.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }
 
+        self.reassert_preconditions();
         let (q, _) = self.0[8..].bp_as_chunks();
         [q[0], q[1]].map(u64::from_ne_bytes)
     }
@@ -83,24 +86,38 @@ impl Secret {
     fn words_for_9_to_16(&self) -> [u64; 4] {
         // unsafe { self.0.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }
 
+        self.reassert_preconditions();
         let (q, _) = self.0[24..].bp_as_chunks();
         [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes)
     }
 
+    #[inline]
+    fn words_for_17_to_128(&self) -> &[[u8; 16]] {
+        self.reassert_preconditions();
+        let (words, _) = self.0.bp_as_chunks();
+        words
+    }
+
     #[inline]
     fn stripe(&self, i: usize) -> &[u8; 64] {
         unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() }
     }
 
     #[inline]
-    fn end(&self) -> &[u8; 64] {
-        unsafe { self.0.last_chunk().unwrap_unchecked() }
+    fn last_stripe(&self) -> &[u8; 64] {
+        self.reassert_preconditions();
+        self.0.last_chunk().unwrap()
     }
 
     #[inline]
     fn len(&self) -> usize {
         self.0.len()
     }
+
+    #[inline(always)]
+    fn reassert_preconditions(&self) {
+        unsafe { assert_unchecked(self.0.len() >= SECRET_MINIMUM_LENGTH) }
+    }
 }
 
 pub struct XxHash3_64 {
@@ -361,7 +378,7 @@ impl StripeAccumulator {
             ..
         } = self;
 
-        let secret_end = secret.end();
+        let secret_end = secret.last_stripe();
 
         // each stripe
         let secret = secret.stripe(*current_stripe);
@@ -533,6 +550,9 @@ where
 
     *total_bytes += input.len();
 
+    debug_assert!(*buffer_len <= buffer.len());
+    unsafe { assert_unchecked(*buffer_len <= buffer.len()) };
+
     // We have some previous data saved; try to fill it up and process it first
     if !buffer.is_empty() {
         let remaining = &mut buffer[*buffer_len..];
@@ -568,11 +588,15 @@ where
     // Process as much of the input data in-place as possible,
     // while leaving at least one full stripe for the
     // finalization.
-    if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) {
-        let nn = dd / STRIPE_BYTES;
-        let nn = nn * STRIPE_BYTES;
-        let (aa, remainder) = input.split_at(nn);
-        let (stripes, _) = aa.bp_as_chunks();
+    if let Some(len) = input.len().checked_sub(STRIPE_BYTES) {
+        let full_block_point = (len / STRIPE_BYTES) * STRIPE_BYTES;
+        // Safety: We know that `full_block_point` must be less than
+        // `input.len()` as we subtracted and then integer-divided
+        // (which rounds down) and then multiplied back. That's not
+        // evident to the compiler and `split_at` results in a
+        // potential panic.
+        let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) };
+        let (stripes, _) = stripes.bp_as_chunks();
 
         for stripe in stripes {
             stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret)
@@ -584,9 +608,13 @@ where
     // buffer is empty so just fill up the buffer.
     debug_assert!(*buffer_len == 0);
     debug_assert!(!input.is_empty());
-    debug_assert!(input.len() < buffer.len());
+    debug_assert!(input.len() < 2 * STRIPE_BYTES);
+    debug_assert!(2 * STRIPE_BYTES < buffer.len());
 
-    buffer[..input.len()].copy_from_slice(input);
+    // SAFETY: We have parsed all the full blocks of input except one
+    // and potentially a full block minus one byte. That amount of
+    // data must be less than the buffer.
+    unsafe { buffer.get_unchecked_mut(..input.len()) }.copy_from_slice(input);
     *buffer_len = input.len();
 }
 
@@ -611,52 +639,43 @@ where
     let secret = unsafe { Secret::new_unchecked(secret) };
     let buffer = buffer.as_ref();
 
-    let input = &buffer[..buffer_len];
+    unsafe { assert_unchecked(buffer_len <= buffer.len()) };
 
-    match total_bytes {
-        241.. => {
-            // Ingest final stripes
-            let (stripes, remainder) = stripes_with_tail(input);
-            for stripe in stripes {
-                stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
-            }
-
-            let mut temp = [0; 64];
-
-            let last_stripe = match input.last_chunk() {
-                Some(chunk) => chunk,
-                None => {
-                    let n_to_reuse = 64 - input.len();
-                    let to_reuse = buffer.len() - n_to_reuse;
 
-                    let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse);
-                    temp_head.copy_from_slice(&buffer[to_reuse..]);
-                    temp_tail.copy_from_slice(input);
+    if total_bytes >= 241 {
+        let input = &buffer[..buffer_len];
 
-                    &temp
-                }
-            };
-
-            Algorithm(vector).finalize(
-                stripe_accumulator.accumulator,
-                remainder,
-                last_stripe,
-                secret,
-                total_bytes,
-            )
+        // Ingest final stripes
+        let (stripes, remainder) = stripes_with_tail(input);
+        for stripe in stripes {
+            stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
         }
 
-        129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input),
+        let mut temp = [0; 64];
 
-        17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input),
+        let last_stripe = match input.last_chunk() {
+            Some(chunk) => chunk,
+            None => {
+                let n_to_reuse = 64 - input.len();
+                let to_reuse = buffer.len() - n_to_reuse;
 
-        9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input),
+                let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse);
+                temp_head.copy_from_slice(&buffer[to_reuse..]);
+                temp_tail.copy_from_slice(input);
 
-        4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input),
-
-        1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input),
+                &temp
+            }
+        };
 
-        0 => impl_0_bytes(DEFAULT_SECRET2, seed),
+        Algorithm(vector).finalize(
+            stripe_accumulator.accumulator,
+            remainder,
+            last_stripe,
+            secret,
+            total_bytes,
+        )
+    } else {
+        impl_oneshot(&DEFAULT_SECRET2, seed, &buffer[..total_bytes])
     }
 }
 
@@ -706,6 +725,16 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     }
 }
 
+macro_rules! assert_input_range {
+    ($min:literal.., $len:expr) => {
+        assert!($min <= $len);
+    };
+    ($min:literal..=$max:literal, $len:expr) => {
+        assert!($min <= $len);
+        assert!($len <= $max);
+    };
+}
+
 #[inline(always)]
 fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
     let secret_words = secret.words_for_0();
@@ -714,6 +743,7 @@ fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
 
 #[inline(always)]
 fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+    assert_input_range!(1..=3, input.len());
     let input_length = input.len() as u8; // OK as we checked that the length fits
 
     let combined = input[input.len() - 1].into_u32()
@@ -731,6 +761,7 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
 
 #[inline(always)]
 fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+    assert_input_range!(4..=8, input.len());
     let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
     let input_last = unsafe {
         input
@@ -761,6 +792,7 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
 
 #[inline(always)]
 fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+    assert_input_range!(9..=16, input.len());
     let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
     let input_last = unsafe {
         input
@@ -787,9 +819,10 @@ fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
 
 #[inline]
 fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+    assert_input_range!(17..=128, input.len());
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
-    let (secret, _) = secret.0.bp_as_chunks();
+    let secret = secret.words_for_17_to_128();
     let (secret, _) = secret.bp_as_chunks::<2>();
     let (fwd, _) = input.bp_as_chunks();
     let (_, bwd) = input.bp_as_rchunks();
@@ -819,6 +852,7 @@ fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
 
 #[inline]
 fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+    assert_input_range!(129..=240, input.len());
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
     let (head, _) = input.bp_as_chunks();
@@ -869,6 +903,7 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [
 
 #[inline]
 fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
+    assert_input_range!(241.., input.len());
     dispatch! {
         fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64
         []
@@ -979,6 +1014,8 @@ impl<V: Vector> Algorithm<V> {
             self.0.accumulate(acc, stripe, secret);
         }
 
+        unsafe { assert_unchecked(secret.len() >= SECRET_MINIMUM_LENGTH) };
+
         let q = secret.0[secret.len() - 71..].first_chunk().unwrap();
         self.0.accumulate(acc, last_stripe, q);
     }

From 1e6f66d179e53bb38c75fd1f1b5d718490711fe6 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 20 Aug 2024 10:49:18 -0400
Subject: [PATCH 126/166] sum with new

---
 renu-sum/src/main.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs
index 7a1055778..b86a5e26c 100644
--- a/renu-sum/src/main.rs
+++ b/renu-sum/src/main.rs
@@ -7,7 +7,7 @@ use std::{
     sync::mpsc::{self, SendError},
     thread,
 };
-use xx_renu::XxHash64;
+use xx_renu::XxHash3_64;
 
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T, E = Error> = std::result::Result<T, E>;
@@ -53,9 +53,10 @@ fn main() -> Result<()> {
     Ok(())
 }
 
+#[inline(never)]
 fn hash_one_file(config: &Config, path: &Path, buffer: &mut [u8]) -> Result<u64> {
     let mut file = File::open(path)?;
-    let mut hasher = XxHash64::with_seed(0);
+    let mut hasher = XxHash3_64::with_seed(0);
 
     let (tx_empty, rx_empty) = mpsc::channel();
     let (tx_filled, rx_filled) = mpsc::channel();

From b9fccc5700dda1e7201809d614f9d40ec5b13362 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 20 Aug 2024 10:50:59 -0400
Subject: [PATCH 127/166] categ

---
 compare/benches/benchmark.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 1c40f1d72..fe2c7f693 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -177,7 +177,9 @@ mod xxhash3_64 {
 
         // Visual inspection of all the data points showed these as
         // examples of thier nearby neighbors.
-        let categories = [0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230];
+        let categories = [
+            0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230,
+        ];
 
         for size in categories {
             let data = &data[..size];

From 88ecd94130bc619313abde63e243f607f964d949 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 20 Aug 2024 10:53:22 -0400
Subject: [PATCH 128/166] document LLVM missed optimization

---
 src/xxhash3_64.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 84fcd96e2..d450313b0 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -595,6 +595,8 @@ where
         // (which rounds down) and then multiplied back. That's not
         // evident to the compiler and `split_at` results in a
         // potential panic.
+        //
+        // https://github.com/llvm/llvm-project/issues/104827
         let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) };
         let (stripes, _) = stripes.bp_as_chunks();
 

From 80db4d946a4002e139d6472fd55d44a9cbc3b33b Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 20 Aug 2024 13:54:35 -0400
Subject: [PATCH 129/166] safety

---
 src/xxhash3_64.rs | 416 +++++++++++++++++++++++++++++++---------------
 1 file changed, 281 insertions(+), 135 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index d450313b0..696f8e626 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -18,7 +18,7 @@ const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
 
 const DEFAULT_SEED: u64 = 0;
 
-const DEFAULT_SECRET: [u8; 192] = [
+const DEFAULT_SECRET_RAW: [u8; 192] = [
     0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
     0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
     0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
@@ -33,7 +33,8 @@ const DEFAULT_SECRET: [u8; 192] = [
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 ];
 
-const DEFAULT_SECRET2: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET) };
+// Safety: The default secret is long enough
+const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) };
 
 pub const SECRET_MINIMUM_LENGTH: usize = 136;
 
@@ -43,50 +44,56 @@ struct Secret([u8]);
 impl Secret {
     #[inline]
     fn new(bytes: &[u8]) -> Result<&Self, ()> {
-        if bytes.len() >= SECRET_MINIMUM_LENGTH {
-            unsafe { Ok(Self::new_unchecked(bytes)) }
-        } else {
-            Err(()) // TODO error
+        // Safety: We check for validity before returning.
+        unsafe {
+            let this = Self::new_unchecked(bytes);
+            if this.is_valid() {
+                Ok(this)
+            } else {
+                Err(()) // TODO error
+            }
         }
     }
 
+    /// # Safety
+    ///
+    /// You must ensure that the secret byte length is >=
+    /// SECRET_MINIMUM_LENGTH.
     #[inline]
     const unsafe fn new_unchecked(bytes: &[u8]) -> &Self {
+        // Safety: We are `#[repr(transparent)]`. It's up to the
+        // caller to ensure the length
         unsafe { mem::transmute(bytes) }
     }
 
     #[inline]
     fn words_for_0(&self) -> [u64; 2] {
-        // unsafe { self.0.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }
-
         self.reassert_preconditions();
+
         let (q, _) = self.0[56..].bp_as_chunks();
         [q[0], q[1]].map(u64::from_ne_bytes)
     }
 
     #[inline]
     fn words_for_1_to_3(&self) -> [u32; 2] {
-        // unsafe { self.0.as_ptr().cast::<[u32; 2]>().read_unaligned() }
-
         self.reassert_preconditions();
+
         let (q, _) = self.0.bp_as_chunks();
         [q[0], q[1]].map(u32::from_ne_bytes)
     }
 
     #[inline]
     fn words_for_4_to_8(&self) -> [u64; 2] {
-        //unsafe { self.0.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }
-
         self.reassert_preconditions();
+
         let (q, _) = self.0[8..].bp_as_chunks();
         [q[0], q[1]].map(u64::from_ne_bytes)
     }
 
     #[inline]
     fn words_for_9_to_16(&self) -> [u64; 4] {
-        // unsafe { self.0.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }
-
         self.reassert_preconditions();
+
         let (q, _) = self.0[24..].bp_as_chunks();
         [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes)
     }
@@ -94,21 +101,62 @@ impl Secret {
     #[inline]
     fn words_for_17_to_128(&self) -> &[[u8; 16]] {
         self.reassert_preconditions();
+
         let (words, _) = self.0.bp_as_chunks();
         words
     }
 
+    #[inline]
+    fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] {
+        self.reassert_preconditions();
+
+        let (ss, _) = self.0.bp_as_chunks();
+        ss
+    }
+
+    #[inline]
+    fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] {
+        self.reassert_preconditions();
+
+        let (ss, _) = self.0[3..].bp_as_chunks();
+        ss
+    }
+
+    #[inline]
+    fn words_for_127_to_240_part3(&self) -> &[u8; 16] {
+        self.reassert_preconditions();
+
+        self.0[119..].first_chunk().unwrap()
+    }
+
     #[inline]
     fn stripe(&self, i: usize) -> &[u8; 64] {
+        self.reassert_preconditions();
+
         unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() }
     }
 
     #[inline]
     fn last_stripe(&self) -> &[u8; 64] {
         self.reassert_preconditions();
+
         self.0.last_chunk().unwrap()
     }
 
+    #[inline]
+    fn last_stripe_secret_better_name(&self) -> &[u8; 64] {
+        self.reassert_preconditions();
+
+        self.0[self.0.len() - 71..].first_chunk().unwrap()
+    }
+
+    #[inline]
+    fn final_secret(&self) -> &[u8; 64] {
+        self.reassert_preconditions();
+
+        self.0[11..].first_chunk().unwrap()
+    }
+
     #[inline]
     fn len(&self) -> usize {
         self.0.len()
@@ -116,7 +164,17 @@ impl Secret {
 
     #[inline(always)]
     fn reassert_preconditions(&self) {
-        unsafe { assert_unchecked(self.0.len() >= SECRET_MINIMUM_LENGTH) }
+        // Safety: The length of the bytes was checked at value
+        // construction time.
+        unsafe {
+            debug_assert!(self.is_valid());
+            assert_unchecked(self.is_valid());
+        }
+    }
+
+    #[inline(always)]
+    fn is_valid(&self) -> bool {
+        self.0.len() >= SECRET_MINIMUM_LENGTH
     }
 }
 
@@ -129,12 +187,12 @@ pub struct XxHash3_64 {
 impl XxHash3_64 {
     #[inline]
     pub fn oneshot(input: &[u8]) -> u64 {
-        impl_oneshot(DEFAULT_SECRET2, DEFAULT_SEED, input)
+        impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
     }
 
     #[inline]
     pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
-        let mut secret = DEFAULT_SECRET;
+        let mut secret = DEFAULT_SECRET_RAW;
 
         // We know that the secret will only be used if we have more
         // than 240 bytes, so don't waste time computing it otherwise.
@@ -142,9 +200,9 @@ impl XxHash3_64 {
             derive_secret(seed, &mut secret);
         }
 
-        let s = unsafe { Secret::new_unchecked(&secret) };
+        let secret = Secret::new(&secret).expect("The default secret length is invalid");
 
-        impl_oneshot(s, seed, input)
+        impl_oneshot(secret, seed, input)
     }
 
     #[inline]
@@ -157,21 +215,36 @@ impl XxHash3_64 {
 const STRIPE_BYTES: usize = 64;
 const BUFFERED_STRIPES: usize = 4;
 const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES;
+type Buffer = [u8; BUFFERED_BYTES];
 
 // Ensure that a full buffer always implies we are in the 241+ byte case.
 const _: () = assert!(BUFFERED_BYTES > 240);
 
+/// # Safety
+///
+/// Must always return a slice with the same number of elements.
+pub unsafe trait FixedBuffer: AsRef<[u8]> {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedBuffer for [u8; N] {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedBuffer for &[u8; N] {}
+
+// Safety: A plain slice will never change size.
+unsafe impl FixedBuffer for Box<[u8]> {}
+
 /// Holds secret and temporary buffers that are ensured to be
 /// appropriately sized.
 pub struct SecretBuffer<S> {
     seed: u64,
     secret: S,
-    buffer: [u8; BUFFERED_BYTES],
+    buffer: Buffer,
 }
 
 impl<S> SecretBuffer<S>
 where
-    S: AsRef<[u8]>,
+    S: FixedBuffer,
 {
     /// Takes the seed, secret, and buffer and performs no
     /// modifications to them, only validating that the sizes are
@@ -190,6 +263,7 @@ where
         }
     }
 
+    #[inline(always)]
     fn is_valid(&self) -> bool {
         let secret = self.secret.as_ref();
 
@@ -208,6 +282,26 @@ where
     pub fn decompose(self) -> S {
         self.secret
     }
+
+    #[inline]
+    fn parts(&self) -> (u64, &Secret, &Buffer) {
+        let secret = self.secret.as_ref();
+        // Safety: We established the length at construction and the
+        // length is not allowed to change.
+        let secret = unsafe { Secret::new_unchecked(secret) };
+
+        (self.seed, secret, &self.buffer)
+    }
+
+    #[inline]
+    fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) {
+        let secret = self.secret.as_ref();
+        // Safety: We established the length at construction and the
+        // length is not allowed to change.
+        let secret = unsafe { Secret::new_unchecked(secret) };
+
+        (self.seed, secret, &mut self.buffer)
+    }
 }
 
 impl SecretBuffer<&'static [u8; 192]> {
@@ -218,7 +312,7 @@ impl SecretBuffer<&'static [u8; 192]> {
     pub const fn default() -> Self {
         SecretBuffer {
             seed: DEFAULT_SEED,
-            secret: &DEFAULT_SECRET,
+            secret: &DEFAULT_SECRET_RAW,
             buffer: [0; BUFFERED_BYTES],
         }
     }
@@ -280,7 +374,7 @@ mod with_alloc {
         pub fn allocate_default() -> Self {
             Self {
                 seed: DEFAULT_SEED,
-                secret: DEFAULT_SECRET.to_vec().into(),
+                secret: DEFAULT_SECRET_RAW.to_vec().into(),
                 buffer: [0; BUFFERED_BYTES],
             }
         }
@@ -288,7 +382,7 @@ mod with_alloc {
         /// Allocates the secret and temporary buffers and uses the
         /// provided seed to construct the secret value.
         pub fn allocate_with_seed(seed: u64) -> Self {
-            let mut secret = DEFAULT_SECRET;
+            let mut secret = DEFAULT_SECRET_RAW;
             derive_secret(seed, &mut secret);
 
             Self {
@@ -331,7 +425,7 @@ mod with_alloc {
 
 impl<S> SecretBuffer<S>
 where
-    S: AsRef<[u8]> + AsMut<[u8]>,
+    S: FixedBuffer + AsMut<[u8]>,
 {
     /// Fills the secret buffer with a secret derived from the seed
     /// and the default secret.
@@ -341,7 +435,7 @@ where
             Err(_) => return Err(secret),
         };
 
-        *secret_slice = DEFAULT_SECRET;
+        *secret_slice = DEFAULT_SECRET_RAW;
         derive_secret(seed, secret_slice);
 
         Self::new(seed, secret)
@@ -404,7 +498,7 @@ impl StripeAccumulator {
 /// generic type.
 pub struct RawHasher<S> {
     secret_buffer: SecretBuffer<S>,
-    buffer_len: usize,
+    buffer_usage: usize,
     stripe_accumulator: StripeAccumulator,
     total_bytes: usize,
 }
@@ -413,7 +507,7 @@ impl<S> RawHasher<S> {
     pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
         Self {
             secret_buffer,
-            buffer_len: 0,
+            buffer_usage: 0,
             stripe_accumulator: StripeAccumulator::new(),
             total_bytes: 0,
         }
@@ -442,7 +536,7 @@ macro_rules! dispatch {
         where
             $($wheres)*
         {
-            // SAFETY: the caller has ensured we have the NEON feature
+            // Safety: The caller has ensured we have the NEON feature
             unsafe {
                 $fn_name(neon::Impl::new_unchecked(), $($arg_name),*)
             }
@@ -485,6 +579,7 @@ macro_rules! dispatch {
         #[cfg(all(target_arch = "aarch64", feature = "std"))]
         {
             if std::arch::is_aarch64_feature_detected!("neon") {
+                // Safety: We just ensured we have the NEON feature
                 return unsafe { do_neon($($arg_name),*) };
             }
         }
@@ -492,8 +587,10 @@ macro_rules! dispatch {
         #[cfg(all(target_arch = "x86_64", feature = "std"))]
         {
             if is_x86_feature_detected!("avx2") {
+                // Safety: We just ensured we have the AVX2 feature
                 return unsafe { do_avx2($($arg_name),*) };
             } else if is_x86_feature_detected!("sse2") {
+                // Safety: We just ensured we have the SSE2 feature
                 return unsafe { do_sse2($($arg_name),*) };
             }
         }
@@ -504,14 +601,14 @@ macro_rules! dispatch {
 
 impl<S> hash::Hasher for RawHasher<S>
 where
-    S: AsRef<[u8]>,
+    S: FixedBuffer,
 {
     #[inline]
     fn write(&mut self, input: &[u8]) {
         let this = self;
         dispatch! {
             fn write_impl<S>(this: &mut RawHasher<S>, input: &[u8])
-            [S: AsRef<[u8]>]
+            [S: FixedBuffer]
         }
     }
 
@@ -520,7 +617,7 @@ where
         let this = self;
         dispatch! {
             fn finish_impl<S>(this: &RawHasher<S>) -> u64
-            [S: AsRef<[u8]>]
+            [S: FixedBuffer]
         }
     }
 }
@@ -528,7 +625,7 @@ where
 #[inline(always)]
 fn write_impl<S>(vector: impl Vector, this: &mut RawHasher<S>, mut input: &[u8])
 where
-    S: AsRef<[u8]>,
+    S: FixedBuffer,
 {
     if input.is_empty() {
         return;
@@ -536,33 +633,33 @@ where
 
     let RawHasher {
         secret_buffer,
-        buffer_len,
+        buffer_usage,
         stripe_accumulator,
         total_bytes,
         ..
     } = this;
 
     let n_stripes = secret_buffer.n_stripes();
-
-    let SecretBuffer { secret, buffer, .. } = secret_buffer;
-    let secret = secret.as_ref();
-    let secret = unsafe { Secret::new_unchecked(secret) };
+    let (_, secret, buffer) = secret_buffer.parts_mut();
 
     *total_bytes += input.len();
 
-    debug_assert!(*buffer_len <= buffer.len());
-    unsafe { assert_unchecked(*buffer_len <= buffer.len()) };
+    // Safety: This is an invariant of the buffer.
+    unsafe {
+        debug_assert!(*buffer_usage <= buffer.len());
+        assert_unchecked(*buffer_usage <= buffer.len())
+    };
 
     // We have some previous data saved; try to fill it up and process it first
     if !buffer.is_empty() {
-        let remaining = &mut buffer[*buffer_len..];
+        let remaining = &mut buffer[*buffer_usage..];
         let n_to_copy = usize::min(remaining.len(), input.len());
 
         let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy);
         let (input_head, input_tail) = input.split_at(n_to_copy);
 
         remaining_head.copy_from_slice(input_head);
-        *buffer_len += n_to_copy;
+        *buffer_usage += n_to_copy;
 
         input = input_tail;
 
@@ -580,10 +677,10 @@ where
         for stripe in stripes {
             stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
         }
-        *buffer_len = 0;
+        *buffer_usage = 0;
     }
 
-    debug_assert!(*buffer_len == 0);
+    debug_assert!(*buffer_usage == 0);
 
     // Process as much of the input data in-place as possible,
     // while leaving at least one full stripe for the
@@ -608,44 +705,45 @@ where
 
     // Any remaining data has to be less than the buffer, and the
     // buffer is empty so just fill up the buffer.
-    debug_assert!(*buffer_len == 0);
+    debug_assert!(*buffer_usage == 0);
     debug_assert!(!input.is_empty());
-    debug_assert!(input.len() < 2 * STRIPE_BYTES);
-    debug_assert!(2 * STRIPE_BYTES < buffer.len());
 
-    // SAFETY: We have parsed all the full blocks of input except one
+    // Safety: We have parsed all the full blocks of input except one
     // and potentially a full block minus one byte. That amount of
     // data must be less than the buffer.
-    unsafe { buffer.get_unchecked_mut(..input.len()) }.copy_from_slice(input);
-    *buffer_len = input.len();
+    let buffer_head = unsafe {
+        debug_assert!(input.len() < 2 * STRIPE_BYTES);
+        debug_assert!(2 * STRIPE_BYTES < buffer.len());
+        buffer.get_unchecked_mut(..input.len())
+    };
+
+    buffer_head.copy_from_slice(input);
+    *buffer_usage = input.len();
 }
 
 #[inline(always)]
 fn finish_impl<S>(vector: impl Vector, this: &RawHasher<S>) -> u64
 where
-    S: AsRef<[u8]>,
+    S: FixedBuffer,
 {
     let RawHasher {
         ref secret_buffer,
-        buffer_len,
+        buffer_usage,
         mut stripe_accumulator,
         total_bytes,
     } = *this;
-    let n_stripes = secret_buffer.n_stripes();
-    let SecretBuffer {
-        seed,
-        ref secret,
-        ref buffer,
-    } = *secret_buffer;
-    let secret = secret.as_ref();
-    let secret = unsafe { Secret::new_unchecked(secret) };
-    let buffer = buffer.as_ref();
 
-    unsafe { assert_unchecked(buffer_len <= buffer.len()) };
+    let n_stripes = secret_buffer.n_stripes();
+    let (seed, secret, buffer) = secret_buffer.parts();
 
+    // Safety: This is an invariant of the buffer.
+    unsafe {
+        debug_assert!(buffer_usage <= buffer.len());
+        assert_unchecked(buffer_usage <= buffer.len())
+    };
 
     if total_bytes >= 241 {
-        let input = &buffer[..buffer_len];
+        let input = &buffer[..buffer_usage];
 
         // Ingest final stripes
         let (stripes, remainder) = stripes_with_tail(input);
@@ -677,7 +775,7 @@ where
             total_bytes,
         )
     } else {
-        impl_oneshot(&DEFAULT_SECRET2, seed, &buffer[..total_bytes])
+        impl_oneshot(DEFAULT_SECRET, seed, &buffer[..total_bytes])
     }
 }
 
@@ -691,20 +789,18 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) {
         return;
     }
 
-    let base = secret.as_mut_ptr().cast::<u64>();
-
-    for i in 0..12 {
-        let a_p = unsafe { base.add(i * 2) };
-        let b_p = unsafe { base.add(i * 2 + 1) };
+    let (words, _) = secret.bp_as_chunks_mut();
+    let (pairs, _) = words.bp_as_chunks_mut();
 
-        let mut a = unsafe { a_p.read_unaligned() };
-        let mut b = unsafe { b_p.read_unaligned() };
+    for [a_p, b_p] in pairs {
+        let a = u64::from_ne_bytes(*a_p);
+        let b = u64::from_ne_bytes(*b_p);
 
-        a = a.wrapping_add(seed);
-        b = b.wrapping_sub(seed);
+        let a = a.wrapping_add(seed);
+        let b = b.wrapping_sub(seed);
 
-        unsafe { a_p.write_unaligned(a) };
-        unsafe { b_p.write_unaligned(b) };
+        *a_p = a.to_ne_bytes();
+        *b_p = b.to_ne_bytes();
     }
 }
 
@@ -764,15 +860,8 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
 #[inline(always)]
 fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     assert_input_range!(4..=8, input.len());
-    let input_first = unsafe { input.as_ptr().cast::<u32>().read_unaligned() };
-    let input_last = unsafe {
-        input
-            .as_ptr()
-            .add(input.len())
-            .sub(mem::size_of::<u32>())
-            .cast::<u32>()
-            .read_unaligned()
-    };
+    let input_first = input.first_u32().unwrap();
+    let input_last = input.last_u32().unwrap();
 
     let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
     let secret_words = secret.words_for_4_to_8();
@@ -795,15 +884,8 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
 #[inline(always)]
 fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     assert_input_range!(9..=16, input.len());
-    let input_first = unsafe { input.as_ptr().cast::<u64>().read_unaligned() };
-    let input_last = unsafe {
-        input
-            .as_ptr()
-            .add(input.len())
-            .sub(mem::size_of::<u64>())
-            .cast::<u64>()
-            .read_unaligned()
-    };
+    let input_first = input.first_u64().unwrap();
+    let input_last = input.last_u64().unwrap();
 
     let secret_words = secret.words_for_9_to_16();
     let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
@@ -858,34 +940,37 @@ fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
 
     let (head, _) = input.bp_as_chunks();
-    let last_chunk = input.last_chunk().unwrap();
     let mut head = head.iter();
 
-    let (ss, _) = secret.0.bp_as_chunks();
-    let (ss2, _) = secret.0[3..].bp_as_chunks();
-
-    let qq = head.by_ref().zip(ss);
-
-    for (chunk, s) in qq.take(8) {
-        acc = acc.wrapping_add(mix_step(chunk, s, seed));
+    let ss = secret.words_for_127_to_240_part1();
+    for (chunk, secret) in head.by_ref().zip(ss).take(8) {
+        acc = acc.wrapping_add(mix_step(chunk, secret, seed));
     }
 
     acc = avalanche(acc);
 
-    for (chunk, s) in head.zip(ss2) {
-        acc = acc.wrapping_add(mix_step(chunk, s, seed));
+    let ss = secret.words_for_127_to_240_part2();
+    for (chunk, secret) in head.zip(ss) {
+        acc = acc.wrapping_add(mix_step(chunk, secret, seed));
     }
 
-    let ss3 = &secret.0[119..].first_chunk().unwrap();
-    acc = acc.wrapping_add(mix_step(last_chunk, ss3, seed));
+    let last_chunk = input.last_chunk().unwrap();
+    let ss = secret.words_for_127_to_240_part3();
+    acc = acc.wrapping_add(mix_step(last_chunk, ss, seed));
 
     avalanche(acc)
 }
 
 #[inline]
 fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 {
-    let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() };
-    let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() };
+    #[inline]
+    fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] {
+        let (pair, _) = bytes.bp_as_chunks::<8>();
+        [pair[0], pair[1]].map(u64::from_ne_bytes)
+    }
+
+    let data_words = to_u64s(data);
+    let secret_words = to_u64s(secret);
 
     let mul_result = {
         let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();
@@ -922,18 +1007,16 @@ struct Algorithm<V>(V);
 impl<V: Vector> Algorithm<V> {
     #[inline]
     fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 {
+        assert_input_range!(241.., input.len());
         let mut acc = INITIAL_ACCUMULATORS;
 
-        //assert!(secret.len() >= SECRET_MINIMUM_LENGTH);
-        assert!(input.len() >= 241);
-
         let stripes_per_block = (secret.len() - 64) / 8;
         let block_size = 64 * stripes_per_block;
 
         let mut blocks = input.chunks_exact(block_size);
 
         let last_block = if blocks.remainder().is_empty() {
-            // SAFETY: We know that `input` is non-empty, which means
+            // Safety: We know that `input` is non-empty, which means
             // that either there will be a remainder or one or more
             // full blocks. That info isn't flowing to the optimizer,
             // so we use `unwrap_unchecked`.
@@ -966,7 +1049,7 @@ impl<V: Vector> Algorithm<V> {
 
     #[inline]
     fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) {
-        let secret_end = secret.0.last_chunk().unwrap();
+        let secret_end = secret.last_stripe();
 
         self.round_accumulate(acc, stripes, secret);
         self.0.round_scramble(acc, secret_end);
@@ -994,7 +1077,7 @@ impl<V: Vector> Algorithm<V> {
         debug_assert!(!last_block.is_empty());
         self.last_round(&mut acc, last_block, last_stripe, secret);
 
-        self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret, 11)
+        self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret)
     }
 
     #[inline]
@@ -1016,22 +1099,14 @@ impl<V: Vector> Algorithm<V> {
             self.0.accumulate(acc, stripe, secret);
         }
 
-        unsafe { assert_unchecked(secret.len() >= SECRET_MINIMUM_LENGTH) };
-
-        let q = secret.0[secret.len() - 71..].first_chunk().unwrap();
-        self.0.accumulate(acc, last_stripe, q);
+        let last_stripe_secret = secret.last_stripe_secret_better_name();
+        self.0.accumulate(acc, last_stripe, last_stripe_secret);
     }
 
     #[inline]
-    fn final_merge(
-        &self,
-        acc: &mut [u64; 8],
-        init_value: u64,
-        secret: &Secret,
-        secret_offset: usize,
-    ) -> u64 {
-        let secrets = secret.0[secret_offset..].first_chunk::<64>().unwrap();
-        let (secrets, _) = secrets.bp_as_chunks();
+    fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &Secret) -> u64 {
+        let secret = secret.final_secret();
+        let (secrets, _) = secret.bp_as_chunks();
         let mut result = init_value;
         for i in 0..4 {
             // 64-bit by 64-bit multiplication to 128-bit full result
@@ -1117,17 +1192,18 @@ mod scalar {
     // https://github.com/llvm/llvm-project/issues/98481
     #[cfg(target_arch = "aarch64")]
     fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-        use core::arch::asm;
-
         let res;
 
+        // Safety: We only compute using our argument values and do
+        // not change memory.
         unsafe {
-            asm!(
+            core::arch::asm!(
                 "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
                 lhs = in(reg) lhs,
                 rhs = in(reg) rhs,
                 acc = in(reg) acc,
                 res = out(reg) res,
+                options(pure, nomem, nostack),
             )
         }
 
@@ -1146,6 +1222,7 @@ mod neon {
 
     impl Impl {
         /// # Safety
+        ///
         /// You must ensure that the CPU has the NEON feature
         #[inline]
         pub unsafe fn new_unchecked() -> Self {
@@ -1156,11 +1233,13 @@ mod neon {
     impl Vector for Impl {
         #[inline]
         fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+            // Safety: Type can only be constructed when NEON feature is present
             unsafe { round_scramble_neon(acc, secret_end) }
         }
 
         #[inline]
         fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+            // Safety: Type can only be constructed when NEON feature is present
             unsafe { accumulate_neon(acc, stripe, secret) }
         }
     }
@@ -1199,7 +1278,7 @@ mod neon {
     #[inline]
     unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
         let (acc2, _) = acc.bp_as_chunks_mut::<4>();
-        for (i, acc) in acc2.into_iter().enumerate() {
+        for (i, acc) in acc2.iter_mut().enumerate() {
             unsafe {
                 let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
                 let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
@@ -1289,6 +1368,8 @@ mod neon {
     // operation.
     #[inline]
     pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
+        // Safety: We only compute using our argument values and do
+        // not change memory.
         unsafe {
             let input_as_u32 = vreinterpretq_u32_u64(input);
             let factor = vmov_n_u32(og_factor);
@@ -1303,11 +1384,24 @@ mod neon {
         }
     }
 
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the NEON feature
+    //
     // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
     #[inline]
     #[target_feature(enable = "neon")]
     unsafe fn reordering_barrier(r: uint64x2_t) {
-        unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) }
+        // Safety: The caller has ensured we have the NEON feature. We
+        // aren't doing anything with the argument, so we shouldn't be
+        // able to cause unsafety!
+        unsafe {
+            core::arch::asm!(
+                "/* {r:v} */",
+                r = in(vreg) r,
+                options(nomem, nostack),
+            )
+        }
     }
 }
 
@@ -1332,13 +1426,13 @@ mod avx2 {
     impl Vector for Impl {
         #[inline]
         fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // SAFETY: Type can only be constructed when AVX2 feature is present
+            // Safety: Type can only be constructed when AVX2 feature is present
             unsafe { round_scramble_avx2(acc, secret_end) }
         }
 
         #[inline]
         fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-            // SAFETY: Type can only be constructed when AVX2 feature is present
+            // Safety: Type can only be constructed when AVX2 feature is present
             unsafe { accumulate_avx2(acc, stripe, secret) }
         }
     }
@@ -1409,13 +1503,13 @@ mod sse2 {
     impl Vector for Impl {
         #[inline]
         fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // SAFETY: Type can only be constructed when SSE2 feature is present
+            // Safety: Type can only be constructed when SSE2 feature is present
             unsafe { round_scramble_sse2(acc, secret_end) }
         }
 
         #[inline]
         fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-            // SAFETY: Type can only be constructed when SSE2 feature is present
+            // Safety: Type can only be constructed when SSE2 feature is present
             unsafe { accumulate_sse2(acc, stripe, secret) }
         }
     }
@@ -1516,6 +1610,38 @@ impl Halves for u128 {
     }
 }
 
+trait U8SliceExt {
+    fn first_u32(&self) -> Option<u32>;
+
+    fn last_u32(&self) -> Option<u32>;
+
+    fn first_u64(&self) -> Option<u64>;
+
+    fn last_u64(&self) -> Option<u64>;
+}
+
+impl U8SliceExt for [u8] {
+    #[inline]
+    fn first_u32(&self) -> Option<u32> {
+        self.first_chunk().copied().map(u32::from_ne_bytes)
+    }
+
+    #[inline]
+    fn last_u32(&self) -> Option<u32> {
+        self.last_chunk().copied().map(u32::from_ne_bytes)
+    }
+
+    #[inline]
+    fn first_u64(&self) -> Option<u64> {
+        self.first_chunk().copied().map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    fn last_u64(&self) -> Option<u64> {
+        self.last_chunk().copied().map(u64::from_ne_bytes)
+    }
+}
+
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
 
@@ -1529,7 +1655,12 @@ impl<T> SliceBackport<T> for [T] {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
         assert_ne!(N, 0);
         let len = self.len() / N;
+        // Safety: `(len / N) * N` has to be less-than-or-equal to `len`
         let (head, tail) = unsafe { self.split_at_unchecked(len * N) };
+        // Safety: (1) `head` points to valid data, (2) the alignment
+        // of an array and the individual type are the same, (3) the
+        // valid elements are less-than-or-equal to the original
+        // slice.
         let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) };
         (head, tail)
     }
@@ -1538,7 +1669,12 @@ impl<T> SliceBackport<T> for [T] {
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
         assert_ne!(N, 0);
         let len = self.len() / N;
+        // Safety: `(len / N) * N` has to be less than or equal to `len`
         let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) };
+        // Safety: (1) `head` points to valid data, (2) the alignment
+        // of an array and the individual type are the same, (3) the
+        // valid elements are less-than-or-equal to the original
+        // slice.
         let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) };
         (head, tail)
     }
@@ -1546,7 +1682,12 @@ impl<T> SliceBackport<T> for [T] {
     fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
         assert_ne!(N, 0);
         let len = self.len() / N;
+        // Safety: `(len / N) * N` has to be less than or equal to `len`
         let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
+        // Safety: (1) `tail` points to valid data, (2) the alignment
+        // of an array and the individual type are the same, (3) the
+        // valid elements are less-than-or-equal to the original
+        // slice.
         let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
         (head, tail)
     }
@@ -1558,6 +1699,11 @@ mod test {
 
     use super::*;
 
+    #[test]
+    fn default_secret_is_valid() {
+        assert!(DEFAULT_SECRET.is_valid())
+    }
+
     #[test]
     fn secret_buffer_default_is_valid() {
         assert!(SecretBuffer::default().is_valid());

From bb684842b471dbdaa452cfee28bc17e330fefd85 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 09:07:38 -0400
Subject: [PATCH 130/166] simpler blackbox

---
 compare/Cargo.toml           |  2 +-
 compare/benches/benchmark.rs | 82 +++++++++---------------------------
 2 files changed, 22 insertions(+), 62 deletions(-)

diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index c405aefa4..f71c802db 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -11,7 +11,7 @@ name = "benchmark"
 harness = false
 
 [dependencies]
-criterion = "0.5.1"
+criterion = { version = "0.5.1", features = ["real_blackbox"] }
 proptest = "1.5.0"
 rand = "0.8.5"
 twox-hash = "1.6.3"
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index fe2c7f693..0affcbb89 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -1,6 +1,6 @@
 use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 use rand::{Rng, RngCore, SeedableRng};
-use std::{hash::Hasher, hint::black_box, iter};
+use std::{hash::Hasher, iter};
 
 use xx_hash_sys as c;
 use xx_renu as rust;
@@ -20,42 +20,26 @@ fn tiny_data(c: &mut Criterion) {
         g.throughput(Throughput::Bytes(data.len() as _));
 
         let id = format!("impl-c/fn-oneshot/size-{size:02}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let hash = c::XxHash64::oneshot(seed, data);
-                black_box(hash);
-            })
-        });
+        g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data)));
 
         let id = format!("impl-c/fn-streaming/size-{size:02}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = {
-                    let mut hasher = c::XxHash64::with_seed(seed);
-                    hasher.write(data);
-                    hasher.finish()
-                };
-                black_box(hash);
+                let mut hasher = c::XxHash64::with_seed(seed);
+                hasher.write(data);
+                hasher.finish()
             })
         });
 
         let id = format!("impl-rust/fn-oneshot/size-{size:02}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let hash = rust::XxHash64::oneshot(seed, data);
-                black_box(hash);
-            })
-        });
+        g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data)));
 
         let id = format!("impl-rust/fn-streaming/size-{size:02}");
         g.bench_function(id, |b| {
             b.iter(|| {
-                let hash = {
-                    let mut hasher = rust::XxHash64::with_seed(seed);
-                    hasher.write(data);
-                    hasher.finish()
-                };
-                black_box(hash);
+                let mut hasher = rust::XxHash64::with_seed(seed);
+                hasher.write(data);
+                hasher.finish()
             })
         });
     }
@@ -72,20 +56,10 @@ fn oneshot(c: &mut Criterion) {
         g.throughput(Throughput::Bytes(data.len() as _));
 
         let id = format!("impl-c/size-{size:07}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let hash = c::XxHash64::oneshot(seed, data);
-                black_box(hash);
-            })
-        });
+        g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data)));
 
         let id = format!("impl-rust/size-{size:07}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let hash = rust::XxHash64::oneshot(seed, data);
-                black_box(hash);
-            })
-        });
+        g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data)));
     }
 
     g.finish();
@@ -106,8 +80,7 @@ fn streaming(c: &mut Criterion) {
                     for chunk in &chunks {
                         hasher.write(chunk);
                     }
-                    let hash = hasher.finish();
-                    black_box(hash);
+                    hasher.finish()
                 })
             });
 
@@ -118,8 +91,7 @@ fn streaming(c: &mut Criterion) {
                     for chunk in &chunks {
                         hasher.write(chunk);
                     }
-                    let hash = hasher.finish();
-                    black_box(hash);
+                    hasher.finish()
                 })
             });
         }
@@ -187,18 +159,12 @@ mod xxhash3_64 {
 
             let id = format!("impl-c/fn-oneshot/size-{size:03}");
             g.bench_function(id, |b| {
-                b.iter(|| {
-                    let hash = c::XxHash3_64::oneshot_with_seed(seed, data);
-                    black_box(hash);
-                })
+                b.iter(|| c::XxHash3_64::oneshot_with_seed(seed, data))
             });
 
             let id = format!("impl-rust/fn-oneshot/size-{size:03}");
             g.bench_function(id, |b| {
-                b.iter(|| {
-                    let hash = rust::XxHash3_64::oneshot_with_seed(seed, data);
-                    black_box(hash);
-                })
+                b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data))
             });
         }
 
@@ -268,8 +234,7 @@ mod xxhash3_64 {
                         for chunk in &chunks {
                             hasher.write(chunk);
                         }
-                        let hash = hasher.finish();
-                        black_box(hash);
+                        hasher.finish()
                     })
                 });
 
@@ -280,8 +245,7 @@ mod xxhash3_64 {
                         for chunk in &chunks {
                             hasher.write(chunk);
                         }
-                        let hash = hasher.finish();
-                        black_box(hash);
+                        hasher.finish()
                     })
                 });
 
@@ -294,8 +258,7 @@ mod xxhash3_64 {
                             for chunk in &chunks {
                                 hasher.write(chunk);
                             }
-                            let hash = hasher.finish();
-                            black_box(hash);
+                            hasher.finish()
                         })
                     });
                 }
@@ -309,8 +272,7 @@ mod xxhash3_64 {
                             for chunk in &chunks {
                                 hasher.write(chunk);
                             }
-                            let hash = hasher.finish();
-                            black_box(hash);
+                            hasher.finish()
                         })
                     });
 
@@ -321,8 +283,7 @@ mod xxhash3_64 {
                             for chunk in &chunks {
                                 hasher.write(chunk);
                             }
-                            let hash = hasher.finish();
-                            black_box(hash);
+                            hasher.finish()
                         })
                     });
                 }
@@ -334,8 +295,7 @@ mod xxhash3_64 {
                         for chunk in &chunks {
                             hasher.write(chunk);
                         }
-                        let hash = hasher.finish();
-                        black_box(hash);
+                        hasher.finish()
                     })
                 });
             }

From dc2ba205f635863c18d7ee6fbe17ee30bb52f9cc Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 09:21:37 -0400
Subject: [PATCH 131/166] Inline the sys crate functions

---
 xx_hash-sys/src/lib.rs | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 0de80c556..6cf194ea1 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -30,10 +30,12 @@ extern "C" {
 pub struct XxHash32(*mut XXH32_state_t);
 
 impl XxHash32 {
+    #[inline]
     pub fn oneshot(seed: u32, data: &[u8]) -> u32 {
         unsafe { XXH32(data.as_ptr().cast(), data.len(), seed) }
     }
 
+    #[inline]
     pub fn with_seed(seed: u32) -> Self {
         let state = unsafe {
             let state = XXH32_createState();
@@ -44,11 +46,13 @@ impl XxHash32 {
         Self(state)
     }
 
+    #[inline]
     pub fn write(&mut self, data: &[u8]) {
         let retval = unsafe { XXH32_update(self.0, data.as_ptr().cast(), data.len()) };
         assert_eq!(retval, XXH_OK);
     }
 
+    #[inline]
     pub fn finish(&mut self) -> u32 {
         unsafe { XXH32_digest(self.0) }
     }
@@ -88,10 +92,12 @@ extern "C" {
 pub struct XxHash64(*mut XXH64_state_t);
 
 impl XxHash64 {
+    #[inline]
     pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
         unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) }
     }
 
+    #[inline]
     pub fn with_seed(seed: u64) -> Self {
         let state = unsafe {
             let state = XXH64_createState();
@@ -102,11 +108,13 @@ impl XxHash64 {
         Self(state)
     }
 
+    #[inline]
     pub fn write(&mut self, data: &[u8]) {
         let retval = unsafe { XXH64_update(self.0, data.as_ptr().cast(), data.len()) };
         assert_eq!(retval, XXH_OK);
     }
 
+    #[inline]
     pub fn finish(&mut self) -> u64 {
         unsafe { XXH64_digest(self.0) }
     }
@@ -169,14 +177,17 @@ macro_rules! xxh3_64b_template {
             pub struct XxHash3_64(*mut crate::XXH3_state_t);
 
             impl XxHash3_64 {
+                #[inline]
                 pub fn oneshot(data: &[u8]) -> u64 {
                     unsafe { [<$prefix _64bits>](data.as_ptr().cast(), data.len()) }
                 }
 
+                #[inline]
                 pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 {
                     unsafe { [<$prefix _64bits_withSeed>](data.as_ptr().cast(), data.len(), seed) }
                 }
 
+                #[inline]
                 pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 {
                     unsafe {
                         [<$prefix _64bits_withSecret>](
@@ -188,6 +199,7 @@ macro_rules! xxh3_64b_template {
                     }
                 }
 
+                #[inline]
                 pub fn new() -> Self {
                     let state = unsafe {
                         let state = [<$prefix _createState>]();
@@ -198,6 +210,7 @@ macro_rules! xxh3_64b_template {
                     Self(state)
                 }
 
+                #[inline]
                 pub fn with_seed(seed: u64) -> Self {
                     let state = unsafe {
                         let state = [<$prefix _createState>]();
@@ -208,12 +221,14 @@ macro_rules! xxh3_64b_template {
                     Self(state)
                 }
 
+                #[inline]
                 pub fn write(&mut self, data: &[u8]) {
                     let retval =
                     unsafe { [<$prefix _64bits_update>](self.0, data.as_ptr().cast(), data.len()) };
                     assert_eq!(retval, crate::XXH_OK);
                 }
 
+                #[inline]
                 pub fn finish(&mut self) -> u64 {
                     unsafe { [<$prefix _64bits_digest>](self.0) }
                 }

From 902a0a72e638dcfaebfa541960df54e5551a0735 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Tue, 20 Aug 2024 20:38:34 -0400
Subject: [PATCH 132/166] x86fixin

---
 src/xxhash3_64.rs | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 696f8e626..838790af0 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -549,7 +549,9 @@ macro_rules! dispatch {
         where
             $($wheres)*
         {
-            $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*)
+            unsafe {
+                $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*)
+            }
         }
 
         #[inline]
@@ -559,7 +561,9 @@ macro_rules! dispatch {
         where
             $($wheres)*
         {
-            $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*)
+            unsafe {
+                $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*)
+            }
         }
 
         // Now we invoke the right function
@@ -1451,6 +1455,7 @@ mod avx2 {
         let stripe = stripe.as_ptr().cast::<__m256i>();
         let secret = secret.as_ptr().cast::<__m256i>();
 
+        unsafe {
         for i in 0..2 {
             // [align-acc]: The C code aligns the accumulator to avoid
             // the unaligned load and store here, but that doesn't
@@ -1480,6 +1485,7 @@ mod avx2 {
             _mm256_storeu_si256(acc.add(i), acc_0);
         }
     }
+    }
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -1528,6 +1534,7 @@ mod sse2 {
         let stripe = stripe.as_ptr().cast::<__m128i>();
         let secret = secret.as_ptr().cast::<__m128i>();
 
+        unsafe {
         for i in 0..4 {
             // See [align-acc].
             let mut acc_0 = _mm_loadu_si128(acc.add(i));
@@ -1555,6 +1562,7 @@ mod sse2 {
             _mm_storeu_si128(acc.add(i), acc_0);
         }
     }
+    }
 }
 
 #[inline]
@@ -1645,7 +1653,6 @@ impl U8SliceExt for [u8] {
 trait SliceBackport<T> {
     fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
 
-    #[cfg(target_arch = "aarch64")]
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]);
 
     fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
@@ -1665,7 +1672,6 @@ impl<T> SliceBackport<T> for [T] {
         (head, tail)
     }
 
-    #[cfg(target_arch = "aarch64")]
     fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
         assert_ne!(N, 0);
         let len = self.len() / N;

From 31a751c9cdcbc49d45a2282f95c8309ee276f743 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 11:00:18 -0400
Subject: [PATCH 133/166] x86

---
 src/xxhash3_64.rs | 80 +++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 838790af0..304788a19 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1456,36 +1456,36 @@ mod avx2 {
         let secret = secret.as_ptr().cast::<__m256i>();
 
         unsafe {
-        for i in 0..2 {
-            // [align-acc]: The C code aligns the accumulator to avoid
-            // the unaligned load and store here, but that doesn't
-            // seem to be a big performance loss.
-            let mut acc_0 = _mm256_loadu_si256(acc.add(i));
-            let stripe_0 = _mm256_loadu_si256(stripe.add(i));
-            let secret_0 = _mm256_loadu_si256(secret.add(i));
+            for i in 0..2 {
+                // [align-acc]: The C code aligns the accumulator to avoid
+                // the unaligned load and store here, but that doesn't
+                // seem to be a big performance loss.
+                let mut acc_0 = _mm256_loadu_si256(acc.add(i));
+                let stripe_0 = _mm256_loadu_si256(stripe.add(i));
+                let secret_0 = _mm256_loadu_si256(secret.add(i));
 
-            // let value[i] = stripe[i] ^ secret[i];
-            let value_0 = _mm256_xor_si256(stripe_0, secret_0);
+                // let value[i] = stripe[i] ^ secret[i];
+                let value_0 = _mm256_xor_si256(stripe_0, secret_0);
 
-            // stripe_swap[i] = stripe[i ^ 1]
-            let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+                // stripe_swap[i] = stripe[i ^ 1]
+                let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0);
 
-            // acc[i] += stripe_swap[i]
-            acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
+                // acc[i] += stripe_swap[i]
+                acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
 
-            // value_shift[i] = value[i] >> 32
-            let value_shift_0 = _mm256_srli_epi64::<32>(value_0);
+                // value_shift[i] = value[i] >> 32
+                let value_shift_0 = _mm256_srli_epi64::<32>(value_0);
 
-            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
-            let product_0 = _mm256_mul_epu32(value_0, value_shift_0);
+                // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+                let product_0 = _mm256_mul_epu32(value_0, value_shift_0);
 
-            // acc[i] += product[i]
-            acc_0 = _mm256_add_epi64(acc_0, product_0);
+                // acc[i] += product[i]
+                acc_0 = _mm256_add_epi64(acc_0, product_0);
 
-            _mm256_storeu_si256(acc.add(i), acc_0);
+                _mm256_storeu_si256(acc.add(i), acc_0);
+            }
         }
     }
-    }
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -1535,34 +1535,34 @@ mod sse2 {
         let secret = secret.as_ptr().cast::<__m128i>();
 
         unsafe {
-        for i in 0..4 {
-            // See [align-acc].
-            let mut acc_0 = _mm_loadu_si128(acc.add(i));
-            let stripe_0 = _mm_loadu_si128(stripe.add(i));
-            let secret_0 = _mm_loadu_si128(secret.add(i));
+            for i in 0..4 {
+                // See [align-acc].
+                let mut acc_0 = _mm_loadu_si128(acc.add(i));
+                let stripe_0 = _mm_loadu_si128(stripe.add(i));
+                let secret_0 = _mm_loadu_si128(secret.add(i));
 
-            // let value[i] = stripe[i] ^ secret[i];
-            let value_0 = _mm_xor_si128(stripe_0, secret_0);
+                // let value[i] = stripe[i] ^ secret[i];
+                let value_0 = _mm_xor_si128(stripe_0, secret_0);
 
-            // stripe_swap[i] = stripe[i ^ 1]
-            let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+                // stripe_swap[i] = stripe[i ^ 1]
+                let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0);
 
-            // acc[i] += stripe_swap[i]
-            acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
+                // acc[i] += stripe_swap[i]
+                acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
 
-            // value_shift[i] = value[i] >> 32
-            let value_shift_0 = _mm_srli_epi64::<32>(value_0);
+                // value_shift[i] = value[i] >> 32
+                let value_shift_0 = _mm_srli_epi64::<32>(value_0);
 
-            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
-            let product_0 = _mm_mul_epu32(value_0, value_shift_0);
+                // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+                let product_0 = _mm_mul_epu32(value_0, value_shift_0);
 
-            // acc[i] += product[i]
-            acc_0 = _mm_add_epi64(acc_0, product_0);
+                // acc[i] += product[i]
+                acc_0 = _mm_add_epi64(acc_0, product_0);
 
-            _mm_storeu_si128(acc.add(i), acc_0);
+                _mm_storeu_si128(acc.add(i), acc_0);
+            }
         }
     }
-    }
 }
 
 #[inline]

From 2391766afa1d3f2ad09bafb2395a6a818a604256 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 11:15:11 -0400
Subject: [PATCH 134/166] safety

---
 src/xxhash3_64.rs | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 304788a19..574fb8c8d 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1248,13 +1248,22 @@ mod neon {
         }
     }
 
-    #[inline]
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the NEON feature
     #[target_feature(enable = "neon")]
+    #[inline]
     unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        unsafe {
-            let secret_base = secret_end.as_ptr().cast::<u64>();
-            let (acc, _) = acc.bp_as_chunks_mut::<2>();
-            for (i, acc) in acc.iter_mut().enumerate() {
+        let secret_base = secret_end.as_ptr().cast::<u64>();
+        let (acc, _) = acc.bp_as_chunks_mut::<2>();
+
+        for (i, acc) in acc.iter_mut().enumerate() {
+            // Safety: The caller has ensured we have the NEON
+            // feature. We load from and store to references so we
+            // know that data is valid. We use unaligned loads /
+            // stores. Data manipulation is otherwise done on
+            // intermediate values.
+            unsafe {
                 let mut accv = vld1q_u64(acc.as_ptr());
                 let secret = vld1q_u64(secret_base.add(i * 2));
 
@@ -1275,14 +1284,23 @@ mod neon {
         }
     }
 
-    // We process 4x u64 at a time as that allows us to completely
-    // fill a `uint64x2_t` with useful values when performing the
-    // multiplication.
+    /// We process 4x u64 at a time as that allows us to completely
+    /// fill a `uint64x2_t` with useful values when performing the
+    /// multiplication.
+    ///
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the NEON feature
     #[target_feature(enable = "neon")]
     #[inline]
     unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
         let (acc2, _) = acc.bp_as_chunks_mut::<4>();
         for (i, acc) in acc2.iter_mut().enumerate() {
+            // Safety: The caller has ensured we have the NEON
+            // feature. We load from and store to references so we
+            // know that data is valid. We use unaligned loads /
+            // stores. Data manipulation is otherwise done on
+            // intermediate values.
             unsafe {
                 let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
                 let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));

From b6825e696458619ee6ca55634bd3b39111c7ec9e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 11:39:50 -0400
Subject: [PATCH 135/166] safety

---
 clippy.toml       |   1 +
 src/xxhash3_64.rs | 116 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 88 insertions(+), 29 deletions(-)
 create mode 100644 clippy.toml

diff --git a/clippy.toml b/clippy.toml
new file mode 100644
index 000000000..8483b87c6
--- /dev/null
+++ b/clippy.toml
@@ -0,0 +1 @@
+check-private-items = true
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 574fb8c8d..20532e4a6 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,5 +1,9 @@
 #![allow(missing_docs)]
-#![deny(unsafe_op_in_unsafe_fn)]
+#![deny(
+    clippy::missing_safety_doc,
+    clippy::undocumented_unsafe_blocks,
+    unsafe_op_in_unsafe_fn
+)]
 
 use core::{hash, hint::assert_unchecked, mem, slice};
 
@@ -129,11 +133,20 @@ impl Secret {
         self.0[119..].first_chunk().unwrap()
     }
 
+    /// # Safety
+    ///
+    /// `i` must be less than the number of stripes in the secret
+    /// ([`Self::n_stripes`][]).
     #[inline]
-    fn stripe(&self, i: usize) -> &[u8; 64] {
+    unsafe fn stripe(&self, i: usize) -> &[u8; 64] {
         self.reassert_preconditions();
 
-        unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() }
+        // Safety: The caller has ensured that `i` is
+        // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment.
+        unsafe {
+            debug_assert!(i < self.n_stripes());
+            &*self.0.get_unchecked(i * 8..).as_ptr().cast()
+        }
     }
 
     #[inline]
@@ -162,6 +175,12 @@ impl Secret {
         self.0.len()
     }
 
+    #[inline]
+    fn n_stripes(&self) -> usize {
+        // stripes_per_block
+        (self.len() - 64) / 8
+    }
+
     #[inline(always)]
     fn reassert_preconditions(&self) {
         // Safety: The length of the bytes was checked at value
@@ -272,10 +291,7 @@ where
 
     #[inline]
     fn n_stripes(&self) -> usize {
-        let secret = self.secret.as_ref();
-
-        // stripes_per_block
-        (secret.len() - 64) / 8
+        Self::secret(&self.secret).n_stripes()
     }
 
     /// Returns the secret and buffer values.
@@ -285,22 +301,19 @@ where
 
     #[inline]
     fn parts(&self) -> (u64, &Secret, &Buffer) {
-        let secret = self.secret.as_ref();
-        // Safety: We established the length at construction and the
-        // length is not allowed to change.
-        let secret = unsafe { Secret::new_unchecked(secret) };
-
-        (self.seed, secret, &self.buffer)
+        (self.seed, Self::secret(&self.secret), &self.buffer)
     }
 
     #[inline]
     fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) {
-        let secret = self.secret.as_ref();
+        (self.seed, Self::secret(&self.secret), &mut self.buffer)
+    }
+
+    fn secret(secret: &S) -> &Secret {
+        let secret = secret.as_ref();
         // Safety: We established the length at construction and the
         // length is not allowed to change.
-        let secret = unsafe { Secret::new_unchecked(secret) };
-
-        (self.seed, secret, &mut self.buffer)
+        unsafe { Secret::new_unchecked(secret) }
     }
 }
 
@@ -459,9 +472,9 @@ impl StripeAccumulator {
     }
 
     #[inline]
-    fn process_stripe<V: Vector>(
+    fn process_stripe(
         &mut self,
-        vector: V,
+        vector: impl Vector,
         stripe: &[u8; 64],
         n_stripes: usize,
         secret: &Secret,
@@ -472,17 +485,20 @@ impl StripeAccumulator {
             ..
         } = self;
 
-        let secret_end = secret.last_stripe();
+        // For each stripe
 
-        // each stripe
-        let secret = secret.stripe(*current_stripe);
-        vector.accumulate(accumulator, stripe, secret);
+        // Safety: The number of stripes is determined by the
+        // block size, which is determined by the secret size.
+        let secret_stripe = unsafe { secret.stripe(*current_stripe) };
+        vector.accumulate(accumulator, stripe, secret_stripe);
 
         *current_stripe += 1;
 
+        // After a full block's worth
         if *current_stripe == n_stripes {
-            // after block's worth
+            let secret_end = secret.last_stripe();
             vector.round_scramble(accumulator, secret_end);
+
             *current_stripe = 0;
         }
     }
@@ -528,6 +544,7 @@ macro_rules! dispatch {
         }
 
         /// # Safety
+        ///
         /// You must ensure that the CPU has the NEON feature
         #[inline]
         #[target_feature(enable = "neon")]
@@ -542,6 +559,9 @@ macro_rules! dispatch {
             }
         }
 
+        /// # Safety
+        ///
+        /// You must ensure that the CPU has the AVX2 feature
         #[inline]
         #[target_feature(enable = "avx2")]
         #[cfg(target_arch = "x86_64")]
@@ -549,11 +569,15 @@ macro_rules! dispatch {
         where
             $($wheres)*
         {
+            // Safety: The caller has ensured we have the AVX2 feature
             unsafe {
                 $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*)
             }
         }
 
+        /// # Safety
+        ///
+        /// You must ensure that the CPU has the SSE2 feature
         #[inline]
         #[target_feature(enable = "sse2")]
         #[cfg(target_arch = "x86_64")]
@@ -561,6 +585,7 @@ macro_rules! dispatch {
         where
             $($wheres)*
         {
+            // Safety: The caller has ensured we have the SSE2 feature
             unsafe {
                 $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*)
             }
@@ -1008,7 +1033,10 @@ fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
 
 struct Algorithm<V>(V);
 
-impl<V: Vector> Algorithm<V> {
+impl<V> Algorithm<V>
+where
+    V: Vector,
+{
     #[inline]
     fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 {
         assert_input_range!(241.., input.len());
@@ -1061,8 +1089,11 @@ impl<V: Vector> Algorithm<V> {
 
     #[inline]
     fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) {
-        // TODO: [unify]
-        let secrets = (0..stripes.len()).map(|i| secret.stripe(i));
+        let secrets = (0..stripes.len()).map(|i| {
+            // Safety: The number of stripes is determined by the
+            // block size, which is determined by the secret size.
+            unsafe { secret.stripe(i) }
+        });
 
         for (stripe, secret) in stripes.iter().zip(secrets) {
             self.0.accumulate(acc, stripe, secret);
@@ -1096,8 +1127,11 @@ impl<V: Vector> Algorithm<V> {
         // except for the last stripe (whether it is full or not)
         let (stripes, _) = stripes_with_tail(block);
 
-        // TODO: [unify]
-        let secrets = (0..stripes.len()).map(|i| secret.stripe(i));
+        let secrets = (0..stripes.len()).map(|i| {
+            // Safety: The number of stripes is determined by the
+            // block size, which is determined by the secret size.
+            unsafe { secret.stripe(i) }
+        });
 
         for (stripe, secret) in stripes.iter().zip(secrets) {
             self.0.accumulate(acc, stripe, secret);
@@ -1438,6 +1472,7 @@ mod avx2 {
 
     impl Impl {
         /// # Safety
+        ///
         /// You must ensure that the CPU has the AVX2 feature
         #[inline]
         pub unsafe fn new_unchecked() -> Impl {
@@ -1459,6 +1494,9 @@ mod avx2 {
         }
     }
 
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the AVX2 feature
     #[inline]
     #[target_feature(enable = "avx2")]
     unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
@@ -1466,6 +1504,9 @@ mod avx2 {
         scalar::Impl.round_scramble(acc, secret_end)
     }
 
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the AVX2 feature
     #[inline]
     #[target_feature(enable = "avx2")]
     unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
@@ -1473,6 +1514,11 @@ mod avx2 {
         let stripe = stripe.as_ptr().cast::<__m256i>();
         let secret = secret.as_ptr().cast::<__m256i>();
 
+        // Safety: The caller has ensured we have the AVX2
+        // feature. We load from and store to references so we
+        // know that data is valid. We use unaligned loads /
+        // stores. Data manipulation is otherwise done on
+        // intermediate values.
         unsafe {
             for i in 0..2 {
                 // [align-acc]: The C code aligns the accumulator to avoid
@@ -1517,6 +1563,7 @@ mod sse2 {
 
     impl Impl {
         /// # Safety
+        ///
         /// You must ensure that the CPU has the SSE2 feature
         #[inline]
         pub unsafe fn new_unchecked() -> Impl {
@@ -1538,6 +1585,9 @@ mod sse2 {
         }
     }
 
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the SSE2 feature
     #[inline]
     #[target_feature(enable = "sse2")]
     unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
@@ -1545,6 +1595,9 @@ mod sse2 {
         scalar::Impl.round_scramble(acc, secret_end)
     }
 
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the SSE2 feature
     #[inline]
     #[target_feature(enable = "sse2")]
     unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
@@ -1552,6 +1605,11 @@ mod sse2 {
         let stripe = stripe.as_ptr().cast::<__m128i>();
         let secret = secret.as_ptr().cast::<__m128i>();
 
+        // Safety: The caller has ensured we have the SSE2
+        // feature. We load from and store to references so we
+        // know that data is valid. We use unaligned loads /
+        // stores. Data manipulation is otherwise done on
+        // intermediate values.
         unsafe {
             for i in 0..4 {
                 // See [align-acc].

From c955390e8024fbeb7abb96d9fe0303eccfdb6023 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 12:04:36 -0400
Subject: [PATCH 136/166] Move details to separate modules

---
 src/xxhash3_64.rs        | 643 ++-------------------------------------
 src/xxhash3_64/avx2.rs   |  87 ++++++
 src/xxhash3_64/neon.rs   | 208 +++++++++++++
 src/xxhash3_64/scalar.rs |  68 +++++
 src/xxhash3_64/secret.rs | 160 ++++++++++
 src/xxhash3_64/sse2.rs   |  85 ++++++
 6 files changed, 628 insertions(+), 623 deletions(-)
 create mode 100644 src/xxhash3_64/avx2.rs
 create mode 100644 src/xxhash3_64/neon.rs
 create mode 100644 src/xxhash3_64/scalar.rs
 create mode 100644 src/xxhash3_64/secret.rs
 create mode 100644 src/xxhash3_64/sse2.rs

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 20532e4a6..0e4b3a5fe 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -5,10 +5,29 @@
     unsafe_op_in_unsafe_fn
 )]
 
-use core::{hash, hint::assert_unchecked, mem, slice};
+use core::{hash, hint::assert_unchecked, slice};
 
 use crate::{IntoU128, IntoU32, IntoU64};
 
+mod secret;
+
+use secret::Secret;
+
+pub use secret::SECRET_MINIMUM_LENGTH;
+
+// This module is not `cfg`-gated because it is used by some of the
+// SIMD implementations.
+mod scalar;
+
+#[cfg(target_arch = "aarch64")]
+mod neon;
+
+#[cfg(target_arch = "x86_64")]
+mod avx2;
+
+#[cfg(target_arch = "x86_64")]
+mod sse2;
+
 const PRIME32_1: u64 = 0x9E3779B1;
 const PRIME32_2: u64 = 0x85EBCA77;
 const PRIME32_3: u64 = 0xC2B2AE3D;
@@ -40,163 +59,6 @@ const DEFAULT_SECRET_RAW: [u8; 192] = [
 // Safety: The default secret is long enough
 const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) };
 
-pub const SECRET_MINIMUM_LENGTH: usize = 136;
-
-#[repr(transparent)]
-struct Secret([u8]);
-
-impl Secret {
-    #[inline]
-    fn new(bytes: &[u8]) -> Result<&Self, ()> {
-        // Safety: We check for validity before returning.
-        unsafe {
-            let this = Self::new_unchecked(bytes);
-            if this.is_valid() {
-                Ok(this)
-            } else {
-                Err(()) // TODO error
-            }
-        }
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the secret byte length is >=
-    /// SECRET_MINIMUM_LENGTH.
-    #[inline]
-    const unsafe fn new_unchecked(bytes: &[u8]) -> &Self {
-        // Safety: We are `#[repr(transparent)]`. It's up to the
-        // caller to ensure the length
-        unsafe { mem::transmute(bytes) }
-    }
-
-    #[inline]
-    fn words_for_0(&self) -> [u64; 2] {
-        self.reassert_preconditions();
-
-        let (q, _) = self.0[56..].bp_as_chunks();
-        [q[0], q[1]].map(u64::from_ne_bytes)
-    }
-
-    #[inline]
-    fn words_for_1_to_3(&self) -> [u32; 2] {
-        self.reassert_preconditions();
-
-        let (q, _) = self.0.bp_as_chunks();
-        [q[0], q[1]].map(u32::from_ne_bytes)
-    }
-
-    #[inline]
-    fn words_for_4_to_8(&self) -> [u64; 2] {
-        self.reassert_preconditions();
-
-        let (q, _) = self.0[8..].bp_as_chunks();
-        [q[0], q[1]].map(u64::from_ne_bytes)
-    }
-
-    #[inline]
-    fn words_for_9_to_16(&self) -> [u64; 4] {
-        self.reassert_preconditions();
-
-        let (q, _) = self.0[24..].bp_as_chunks();
-        [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes)
-    }
-
-    #[inline]
-    fn words_for_17_to_128(&self) -> &[[u8; 16]] {
-        self.reassert_preconditions();
-
-        let (words, _) = self.0.bp_as_chunks();
-        words
-    }
-
-    #[inline]
-    fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] {
-        self.reassert_preconditions();
-
-        let (ss, _) = self.0.bp_as_chunks();
-        ss
-    }
-
-    #[inline]
-    fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] {
-        self.reassert_preconditions();
-
-        let (ss, _) = self.0[3..].bp_as_chunks();
-        ss
-    }
-
-    #[inline]
-    fn words_for_127_to_240_part3(&self) -> &[u8; 16] {
-        self.reassert_preconditions();
-
-        self.0[119..].first_chunk().unwrap()
-    }
-
-    /// # Safety
-    ///
-    /// `i` must be less than the number of stripes in the secret
-    /// ([`Self::n_stripes`][]).
-    #[inline]
-    unsafe fn stripe(&self, i: usize) -> &[u8; 64] {
-        self.reassert_preconditions();
-
-        // Safety: The caller has ensured that `i` is
-        // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment.
-        unsafe {
-            debug_assert!(i < self.n_stripes());
-            &*self.0.get_unchecked(i * 8..).as_ptr().cast()
-        }
-    }
-
-    #[inline]
-    fn last_stripe(&self) -> &[u8; 64] {
-        self.reassert_preconditions();
-
-        self.0.last_chunk().unwrap()
-    }
-
-    #[inline]
-    fn last_stripe_secret_better_name(&self) -> &[u8; 64] {
-        self.reassert_preconditions();
-
-        self.0[self.0.len() - 71..].first_chunk().unwrap()
-    }
-
-    #[inline]
-    fn final_secret(&self) -> &[u8; 64] {
-        self.reassert_preconditions();
-
-        self.0[11..].first_chunk().unwrap()
-    }
-
-    #[inline]
-    fn len(&self) -> usize {
-        self.0.len()
-    }
-
-    #[inline]
-    fn n_stripes(&self) -> usize {
-        // stripes_per_block
-        (self.len() - 64) / 8
-    }
-
-    #[inline(always)]
-    fn reassert_preconditions(&self) {
-        // Safety: The length of the bytes was checked at value
-        // construction time.
-        unsafe {
-            debug_assert!(self.is_valid());
-            assert_unchecked(self.is_valid());
-        }
-    }
-
-    #[inline(always)]
-    fn is_valid(&self) -> bool {
-        self.0.len() >= SECRET_MINIMUM_LENGTH
-    }
-}
-
 pub struct XxHash3_64 {
     #[cfg(feature = "alloc")]
     inner: with_alloc::AllocRawHasher,
@@ -1176,471 +1038,6 @@ trait Vector: Copy {
     fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]);
 }
 
-// This module is not `cfg`-gated because it is used by some of the
-// SIMD implementations.
-mod scalar {
-    use super::{SliceBackport as _, Vector, PRIME32_1};
-
-    #[derive(Copy, Clone)]
-    pub struct Impl;
-
-    impl Vector for Impl {
-        #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            let (last, _) = secret_end.bp_as_chunks();
-            let last = last.iter().copied().map(u64::from_ne_bytes);
-
-            for (acc, secret) in acc.iter_mut().zip(last) {
-                *acc ^= *acc >> 47;
-                *acc ^= secret;
-                *acc = acc.wrapping_mul(PRIME32_1);
-            }
-        }
-
-        #[inline]
-        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-            let (stripe, _) = stripe.bp_as_chunks();
-            let (secret, _) = secret.bp_as_chunks();
-
-            for i in 0..8 {
-                let stripe = u64::from_ne_bytes(stripe[i]);
-                let secret = u64::from_ne_bytes(secret[i]);
-
-                let value = stripe ^ secret;
-                acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
-                acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
-            }
-        }
-    }
-
-    #[inline]
-    #[cfg(not(target_arch = "aarch64"))]
-    fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-        use super::IntoU64;
-
-        let lhs = (lhs as u32).into_u64();
-        let rhs = (rhs as u32).into_u64();
-
-        let product = lhs.wrapping_mul(rhs);
-        acc.wrapping_add(product)
-    }
-
-    #[inline]
-    // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
-    // https://github.com/llvm/llvm-project/issues/98481
-    #[cfg(target_arch = "aarch64")]
-    fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
-        let res;
-
-        // Safety: We only compute using our argument values and do
-        // not change memory.
-        unsafe {
-            core::arch::asm!(
-                "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
-                lhs = in(reg) lhs,
-                rhs = in(reg) rhs,
-                acc = in(reg) acc,
-                res = out(reg) res,
-                options(pure, nomem, nostack),
-            )
-        }
-
-        res
-    }
-}
-
-#[cfg(target_arch = "aarch64")]
-mod neon {
-    use core::arch::aarch64::*;
-
-    use super::{SliceBackport as _, Vector, PRIME32_1};
-
-    #[derive(Copy, Clone)]
-    pub struct Impl(());
-
-    impl Impl {
-        /// # Safety
-        ///
-        /// You must ensure that the CPU has the NEON feature
-        #[inline]
-        pub unsafe fn new_unchecked() -> Self {
-            Self(())
-        }
-    }
-
-    impl Vector for Impl {
-        #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // Safety: Type can only be constructed when NEON feature is present
-            unsafe { round_scramble_neon(acc, secret_end) }
-        }
-
-        #[inline]
-        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-            // Safety: Type can only be constructed when NEON feature is present
-            unsafe { accumulate_neon(acc, stripe, secret) }
-        }
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the NEON feature
-    #[target_feature(enable = "neon")]
-    #[inline]
-    unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        let secret_base = secret_end.as_ptr().cast::<u64>();
-        let (acc, _) = acc.bp_as_chunks_mut::<2>();
-
-        for (i, acc) in acc.iter_mut().enumerate() {
-            // Safety: The caller has ensured we have the NEON
-            // feature. We load from and store to references so we
-            // know that data is valid. We use unaligned loads /
-            // stores. Data manipulation is otherwise done on
-            // intermediate values.
-            unsafe {
-                let mut accv = vld1q_u64(acc.as_ptr());
-                let secret = vld1q_u64(secret_base.add(i * 2));
-
-                // tmp[i] = acc[i] >> 47
-                let shifted = vshrq_n_u64::<47>(accv);
-
-                // acc[i] ^= tmp[i]
-                accv = veorq_u64(accv, shifted);
-
-                // acc[i] ^= secret[i]
-                accv = veorq_u64(accv, secret);
-
-                // acc[i] *= PRIME32_1
-                accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
-
-                vst1q_u64(acc.as_mut_ptr(), accv);
-            }
-        }
-    }
-
-    /// We process 4x u64 at a time as that allows us to completely
-    /// fill a `uint64x2_t` with useful values when performing the
-    /// multiplication.
-    ///
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the NEON feature
-    #[target_feature(enable = "neon")]
-    #[inline]
-    unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        let (acc2, _) = acc.bp_as_chunks_mut::<4>();
-        for (i, acc) in acc2.iter_mut().enumerate() {
-            // Safety: The caller has ensured we have the NEON
-            // feature. We load from and store to references so we
-            // know that data is valid. We use unaligned loads /
-            // stores. Data manipulation is otherwise done on
-            // intermediate values.
-            unsafe {
-                let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
-                let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
-                let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
-                let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
-                let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
-                let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
-
-                // stripe_rot[i ^ 1] = stripe[i];
-                let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
-                let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
-
-                // value[i] = stripe[i] ^ secret[i];
-                let value_0 = veorq_u64(stripe_0, secret_0);
-                let value_1 = veorq_u64(stripe_1, secret_1);
-
-                // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
-                //
-                // Each vector has 64-bit values, but we treat them as
-                // 32-bit and then unzip them. This naturally splits
-                // the upper and lower 32 bits.
-                let parts_0 = vreinterpretq_u32_u64(value_0);
-                let parts_1 = vreinterpretq_u32_u64(value_1);
-
-                let hi = vuzp1q_u32(parts_0, parts_1);
-                let lo = vuzp2q_u32(parts_0, parts_1);
-
-                let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
-                let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
-
-                reordering_barrier(sum_0);
-                reordering_barrier(sum_1);
-
-                // acc[i] += sum[i]
-                accv_0 = vaddq_u64(accv_0, sum_0);
-                accv_1 = vaddq_u64(accv_1, sum_1);
-
-                vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
-                vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
-            };
-        }
-    }
-
-    // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
-    // lower 64 bits of the result) operation, so we have to make our
-    // own out of 32-bit operations . We can simplify by realizing
-    // that we are always multiplying by a 32-bit number.
-    //
-    // The basic algorithm is traditional long multiplication. `[]`
-    // denotes groups of 32 bits.
-    //
-    //         [AAAA][BBBB]
-    // x             [CCCC]
-    // --------------------
-    //         [BCBC][BCBC]
-    // + [ACAC][ACAC]
-    // --------------------
-    //         [ACBC][BCBC] // 64-bit truncation occurs
-    //
-    // This can be written in NEON as a vectorwise wrapping
-    // multiplication of the high-order chunk of the input (`A`)
-    // against the constant and then a multiply-widen-and-accumulate
-    // of the low-order chunk of the input and the constant:
-    //
-    // 1. High-order, vectorwise
-    //
-    //         [AAAA][BBBB]
-    // x       [CCCC][0000]
-    // --------------------
-    //         [ACAC][0000]
-    //
-    // 2. Low-order, widening
-    //
-    //               [BBBB]
-    // x             [CCCC] // widening
-    // --------------------
-    //         [BCBC][BCBC]
-    //
-    // 3. Accumulation
-    //
-    //         [ACAC][0000]
-    // +       [BCBC][BCBC] // vectorwise
-    // --------------------
-    //         [ACBC][BCBC]
-    //
-    // Thankfully, NEON has a single multiply-widen-and-accumulate
-    // operation.
-    #[inline]
-    pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
-        // Safety: We only compute using our argument values and do
-        // not change memory.
-        unsafe {
-            let input_as_u32 = vreinterpretq_u32_u64(input);
-            let factor = vmov_n_u32(og_factor);
-            let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
-            let factor_striped = vreinterpretq_u32_u64(factor_striped);
-
-            let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
-            let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
-
-            let input_lo = vmovn_u64(input);
-            vmlal_u32(high_shifted, input_lo, factor)
-        }
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the NEON feature
-    //
-    // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
-    #[inline]
-    #[target_feature(enable = "neon")]
-    unsafe fn reordering_barrier(r: uint64x2_t) {
-        // Safety: The caller has ensured we have the NEON feature. We
-        // aren't doing anything with the argument, so we shouldn't be
-        // able to cause unsafety!
-        unsafe {
-            core::arch::asm!(
-                "/* {r:v} */",
-                r = in(vreg) r,
-                options(nomem, nostack),
-            )
-        }
-    }
-}
-
-#[cfg(target_arch = "x86_64")]
-mod avx2 {
-    use core::arch::x86_64::*;
-
-    use super::{scalar, Vector};
-
-    #[derive(Copy, Clone)]
-    pub struct Impl(());
-
-    impl Impl {
-        /// # Safety
-        ///
-        /// You must ensure that the CPU has the AVX2 feature
-        #[inline]
-        pub unsafe fn new_unchecked() -> Impl {
-            Impl(())
-        }
-    }
-
-    impl Vector for Impl {
-        #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // Safety: Type can only be constructed when AVX2 feature is present
-            unsafe { round_scramble_avx2(acc, secret_end) }
-        }
-
-        #[inline]
-        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-            // Safety: Type can only be constructed when AVX2 feature is present
-            unsafe { accumulate_avx2(acc, stripe, secret) }
-        }
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the AVX2 feature
-    #[inline]
-    #[target_feature(enable = "avx2")]
-    unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        // The scalar implementation is autovectorized nicely enough
-        scalar::Impl.round_scramble(acc, secret_end)
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the AVX2 feature
-    #[inline]
-    #[target_feature(enable = "avx2")]
-    unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        let acc = acc.as_mut_ptr().cast::<__m256i>();
-        let stripe = stripe.as_ptr().cast::<__m256i>();
-        let secret = secret.as_ptr().cast::<__m256i>();
-
-        // Safety: The caller has ensured we have the AVX2
-        // feature. We load from and store to references so we
-        // know that data is valid. We use unaligned loads /
-        // stores. Data manipulation is otherwise done on
-        // intermediate values.
-        unsafe {
-            for i in 0..2 {
-                // [align-acc]: The C code aligns the accumulator to avoid
-                // the unaligned load and store here, but that doesn't
-                // seem to be a big performance loss.
-                let mut acc_0 = _mm256_loadu_si256(acc.add(i));
-                let stripe_0 = _mm256_loadu_si256(stripe.add(i));
-                let secret_0 = _mm256_loadu_si256(secret.add(i));
-
-                // let value[i] = stripe[i] ^ secret[i];
-                let value_0 = _mm256_xor_si256(stripe_0, secret_0);
-
-                // stripe_swap[i] = stripe[i ^ 1]
-                let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0);
-
-                // acc[i] += stripe_swap[i]
-                acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
-
-                // value_shift[i] = value[i] >> 32
-                let value_shift_0 = _mm256_srli_epi64::<32>(value_0);
-
-                // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
-                let product_0 = _mm256_mul_epu32(value_0, value_shift_0);
-
-                // acc[i] += product[i]
-                acc_0 = _mm256_add_epi64(acc_0, product_0);
-
-                _mm256_storeu_si256(acc.add(i), acc_0);
-            }
-        }
-    }
-}
-
-#[cfg(target_arch = "x86_64")]
-mod sse2 {
-    use core::arch::x86_64::*;
-
-    use super::{scalar, Vector};
-
-    #[derive(Copy, Clone)]
-    pub struct Impl(());
-
-    impl Impl {
-        /// # Safety
-        ///
-        /// You must ensure that the CPU has the SSE2 feature
-        #[inline]
-        pub unsafe fn new_unchecked() -> Impl {
-            Impl(())
-        }
-    }
-
-    impl Vector for Impl {
-        #[inline]
-        fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-            // Safety: Type can only be constructed when SSE2 feature is present
-            unsafe { round_scramble_sse2(acc, secret_end) }
-        }
-
-        #[inline]
-        fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-            // Safety: Type can only be constructed when SSE2 feature is present
-            unsafe { accumulate_sse2(acc, stripe, secret) }
-        }
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the SSE2 feature
-    #[inline]
-    #[target_feature(enable = "sse2")]
-    unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
-        // The scalar implementation is autovectorized nicely enough
-        scalar::Impl.round_scramble(acc, secret_end)
-    }
-
-    /// # Safety
-    ///
-    /// You must ensure that the CPU has the SSE2 feature
-    #[inline]
-    #[target_feature(enable = "sse2")]
-    unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
-        let acc = acc.as_mut_ptr().cast::<__m128i>();
-        let stripe = stripe.as_ptr().cast::<__m128i>();
-        let secret = secret.as_ptr().cast::<__m128i>();
-
-        // Safety: The caller has ensured we have the SSE2
-        // feature. We load from and store to references so we
-        // know that data is valid. We use unaligned loads /
-        // stores. Data manipulation is otherwise done on
-        // intermediate values.
-        unsafe {
-            for i in 0..4 {
-                // See [align-acc].
-                let mut acc_0 = _mm_loadu_si128(acc.add(i));
-                let stripe_0 = _mm_loadu_si128(stripe.add(i));
-                let secret_0 = _mm_loadu_si128(secret.add(i));
-
-                // let value[i] = stripe[i] ^ secret[i];
-                let value_0 = _mm_xor_si128(stripe_0, secret_0);
-
-                // stripe_swap[i] = stripe[i ^ 1]
-                let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0);
-
-                // acc[i] += stripe_swap[i]
-                acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
-
-                // value_shift[i] = value[i] >> 32
-                let value_shift_0 = _mm_srli_epi64::<32>(value_0);
-
-                // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
-                let product_0 = _mm_mul_epu32(value_0, value_shift_0);
-
-                // acc[i] += product[i]
-                acc_0 = _mm_add_epi64(acc_0, product_0);
-
-                _mm_storeu_si128(acc.add(i), acc_0);
-            }
-        }
-    }
-}
-
 #[inline]
 fn avalanche(mut x: u64) -> u64 {
     x ^= x >> 37;
diff --git a/src/xxhash3_64/avx2.rs b/src/xxhash3_64/avx2.rs
new file mode 100644
index 000000000..8cfb54f15
--- /dev/null
+++ b/src/xxhash3_64/avx2.rs
@@ -0,0 +1,87 @@
+use core::arch::x86_64::*;
+
+use super::{scalar, Vector};
+
+#[derive(Copy, Clone)]
+pub struct Impl(());
+
+impl Impl {
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the AVX2 feature
+    #[inline]
+    pub unsafe fn new_unchecked() -> Impl {
+        Impl(())
+    }
+}
+
+impl Vector for Impl {
+    #[inline]
+    fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        // Safety: Type can only be constructed when AVX2 feature is present
+        unsafe { round_scramble_avx2(acc, secret_end) }
+    }
+
+    #[inline]
+    fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        // Safety: Type can only be constructed when AVX2 feature is present
+        unsafe { accumulate_avx2(acc, stripe, secret) }
+    }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the AVX2 feature
+#[inline]
+#[target_feature(enable = "avx2")]
+unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+    // The scalar implementation is autovectorized nicely enough
+    scalar::Impl.round_scramble(acc, secret_end)
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the AVX2 feature
+#[inline]
+#[target_feature(enable = "avx2")]
+unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+    let acc = acc.as_mut_ptr().cast::<__m256i>();
+    let stripe = stripe.as_ptr().cast::<__m256i>();
+    let secret = secret.as_ptr().cast::<__m256i>();
+
+    // Safety: The caller has ensured we have the AVX2
+    // feature. We load from and store to references so we
+    // know that data is valid. We use unaligned loads /
+    // stores. Data manipulation is otherwise done on
+    // intermediate values.
+    unsafe {
+        for i in 0..2 {
+            // [align-acc]: The C code aligns the accumulator to avoid
+            // the unaligned load and store here, but that doesn't
+            // seem to be a big performance loss.
+            let mut acc_0 = _mm256_loadu_si256(acc.add(i));
+            let stripe_0 = _mm256_loadu_si256(stripe.add(i));
+            let secret_0 = _mm256_loadu_si256(secret.add(i));
+
+            // let value[i] = stripe[i] ^ secret[i];
+            let value_0 = _mm256_xor_si256(stripe_0, secret_0);
+
+            // stripe_swap[i] = stripe[i ^ 1]
+            let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+
+            // acc[i] += stripe_swap[i]
+            acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
+
+            // value_shift[i] = value[i] >> 32
+            let value_shift_0 = _mm256_srli_epi64::<32>(value_0);
+
+            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+            let product_0 = _mm256_mul_epu32(value_0, value_shift_0);
+
+            // acc[i] += product[i]
+            acc_0 = _mm256_add_epi64(acc_0, product_0);
+
+            _mm256_storeu_si256(acc.add(i), acc_0);
+        }
+    }
+}
diff --git a/src/xxhash3_64/neon.rs b/src/xxhash3_64/neon.rs
new file mode 100644
index 000000000..372bca749
--- /dev/null
+++ b/src/xxhash3_64/neon.rs
@@ -0,0 +1,208 @@
+use core::arch::aarch64::*;
+
+use super::{SliceBackport as _, Vector, PRIME32_1};
+
+#[derive(Copy, Clone)]
+pub struct Impl(());
+
+impl Impl {
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the NEON feature
+    #[inline]
+    pub unsafe fn new_unchecked() -> Self {
+        Self(())
+    }
+}
+
+impl Vector for Impl {
+    #[inline]
+    fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        // Safety: Type can only be constructed when NEON feature is present
+        unsafe { round_scramble_neon(acc, secret_end) }
+    }
+
+    #[inline]
+    fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        // Safety: Type can only be constructed when NEON feature is present
+        unsafe { accumulate_neon(acc, stripe, secret) }
+    }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the NEON feature
+#[target_feature(enable = "neon")]
+#[inline]
+unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+    let secret_base = secret_end.as_ptr().cast::<u64>();
+    let (acc, _) = acc.bp_as_chunks_mut::<2>();
+
+    for (i, acc) in acc.iter_mut().enumerate() {
+        // Safety: The caller has ensured we have the NEON
+        // feature. We load from and store to references so we
+        // know that data is valid. We use unaligned loads /
+        // stores. Data manipulation is otherwise done on
+        // intermediate values.
+        unsafe {
+            let mut accv = vld1q_u64(acc.as_ptr());
+            let secret = vld1q_u64(secret_base.add(i * 2));
+
+            // tmp[i] = acc[i] >> 47
+            let shifted = vshrq_n_u64::<47>(accv);
+
+            // acc[i] ^= tmp[i]
+            accv = veorq_u64(accv, shifted);
+
+            // acc[i] ^= secret[i]
+            accv = veorq_u64(accv, secret);
+
+            // acc[i] *= PRIME32_1
+            accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+
+            vst1q_u64(acc.as_mut_ptr(), accv);
+        }
+    }
+}
+
+/// We process 4x u64 at a time as that allows us to completely
+/// fill a `uint64x2_t` with useful values when performing the
+/// multiplication.
+///
+/// # Safety
+///
+/// You must ensure that the CPU has the NEON feature
+#[target_feature(enable = "neon")]
+#[inline]
+unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+    let (acc2, _) = acc.bp_as_chunks_mut::<4>();
+    for (i, acc) in acc2.iter_mut().enumerate() {
+        // Safety: The caller has ensured we have the NEON
+        // feature. We load from and store to references so we
+        // know that data is valid. We use unaligned loads /
+        // stores. Data manipulation is otherwise done on
+        // intermediate values.
+        unsafe {
+            let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
+            let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
+            let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
+            let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
+            let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
+            let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
+
+            // stripe_rot[i ^ 1] = stripe[i];
+            let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+            let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+
+            // value[i] = stripe[i] ^ secret[i];
+            let value_0 = veorq_u64(stripe_0, secret_0);
+            let value_1 = veorq_u64(stripe_1, secret_1);
+
+            // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
+            //
+            // Each vector has 64-bit values, but we treat them as
+            // 32-bit and then unzip them. This naturally splits
+            // the upper and lower 32 bits.
+            let parts_0 = vreinterpretq_u32_u64(value_0);
+            let parts_1 = vreinterpretq_u32_u64(value_1);
+
+            let hi = vuzp1q_u32(parts_0, parts_1);
+            let lo = vuzp2q_u32(parts_0, parts_1);
+
+            let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
+            let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
+
+            reordering_barrier(sum_0);
+            reordering_barrier(sum_1);
+
+            // acc[i] += sum[i]
+            accv_0 = vaddq_u64(accv_0, sum_0);
+            accv_1 = vaddq_u64(accv_1, sum_1);
+
+            vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
+            vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
+        };
+    }
+}
+
+// There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
+// lower 64 bits of the result) operation, so we have to make our
+// own out of 32-bit operations . We can simplify by realizing
+// that we are always multiplying by a 32-bit number.
+//
+// The basic algorithm is traditional long multiplication. `[]`
+// denotes groups of 32 bits.
+//
+//         [AAAA][BBBB]
+// x             [CCCC]
+// --------------------
+//         [BCBC][BCBC]
+// + [ACAC][ACAC]
+// --------------------
+//         [ACBC][BCBC] // 64-bit truncation occurs
+//
+// This can be written in NEON as a vectorwise wrapping
+// multiplication of the high-order chunk of the input (`A`)
+// against the constant and then a multiply-widen-and-accumulate
+// of the low-order chunk of the input and the constant:
+//
+// 1. High-order, vectorwise
+//
+//         [AAAA][BBBB]
+// x       [CCCC][0000]
+// --------------------
+//         [ACAC][0000]
+//
+// 2. Low-order, widening
+//
+//               [BBBB]
+// x             [CCCC] // widening
+// --------------------
+//         [BCBC][BCBC]
+//
+// 3. Accumulation
+//
+//         [ACAC][0000]
+// +       [BCBC][BCBC] // vectorwise
+// --------------------
+//         [ACBC][BCBC]
+//
+// Thankfully, NEON has a single multiply-widen-and-accumulate
+// operation.
+#[inline]
+pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
+    // Safety: We only compute using our argument values and do
+    // not change memory.
+    unsafe {
+        let input_as_u32 = vreinterpretq_u32_u64(input);
+        let factor = vmov_n_u32(og_factor);
+        let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
+        let factor_striped = vreinterpretq_u32_u64(factor_striped);
+
+        let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
+        let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
+
+        let input_lo = vmovn_u64(input);
+        vmlal_u32(high_shifted, input_lo, factor)
+    }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the NEON feature
+//
+// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
+#[inline]
+#[target_feature(enable = "neon")]
+unsafe fn reordering_barrier(r: uint64x2_t) {
+    // Safety: The caller has ensured we have the NEON feature. We
+    // aren't doing anything with the argument, so we shouldn't be
+    // able to cause unsafety!
+    unsafe {
+        core::arch::asm!(
+            "/* {r:v} */",
+            r = in(vreg) r,
+            options(nomem, nostack),
+        )
+    }
+}
diff --git a/src/xxhash3_64/scalar.rs b/src/xxhash3_64/scalar.rs
new file mode 100644
index 000000000..3a91464b1
--- /dev/null
+++ b/src/xxhash3_64/scalar.rs
@@ -0,0 +1,68 @@
+use super::{SliceBackport as _, Vector, PRIME32_1};
+
+#[derive(Copy, Clone)]
+pub struct Impl;
+
+impl Vector for Impl {
+    #[inline]
+    fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        let (last, _) = secret_end.bp_as_chunks();
+        let last = last.iter().copied().map(u64::from_ne_bytes);
+
+        for (acc, secret) in acc.iter_mut().zip(last) {
+            *acc ^= *acc >> 47;
+            *acc ^= secret;
+            *acc = acc.wrapping_mul(PRIME32_1);
+        }
+    }
+
+    #[inline]
+    fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        let (stripe, _) = stripe.bp_as_chunks();
+        let (secret, _) = secret.bp_as_chunks();
+
+        for i in 0..8 {
+            let stripe = u64::from_ne_bytes(stripe[i]);
+            let secret = u64::from_ne_bytes(secret[i]);
+
+            let value = stripe ^ secret;
+            acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+            acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+        }
+    }
+}
+
+#[inline]
+#[cfg(not(target_arch = "aarch64"))]
+fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+    use super::IntoU64;
+
+    let lhs = (lhs as u32).into_u64();
+    let rhs = (rhs as u32).into_u64();
+
+    let product = lhs.wrapping_mul(rhs);
+    acc.wrapping_add(product)
+}
+
+#[inline]
+// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
+// https://github.com/llvm/llvm-project/issues/98481
+#[cfg(target_arch = "aarch64")]
+fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+    let res;
+
+    // Safety: We only compute using our argument values and do
+    // not change memory.
+    unsafe {
+        core::arch::asm!(
+            "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
+            lhs = in(reg) lhs,
+            rhs = in(reg) rhs,
+            acc = in(reg) acc,
+            res = out(reg) res,
+            options(pure, nomem, nostack),
+        )
+    }
+
+    res
+}
diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs
new file mode 100644
index 000000000..14070d8e7
--- /dev/null
+++ b/src/xxhash3_64/secret.rs
@@ -0,0 +1,160 @@
+use core::{hint::assert_unchecked, mem};
+
+use super::SliceBackport as _;
+
+pub const SECRET_MINIMUM_LENGTH: usize = 136;
+
+#[repr(transparent)]
+pub struct Secret([u8]);
+
+impl Secret {
+    #[inline]
+    pub fn new(bytes: &[u8]) -> Result<&Self, ()> {
+        // Safety: We check for validity before returning.
+        unsafe {
+            let this = Self::new_unchecked(bytes);
+            if this.is_valid() {
+                Ok(this)
+            } else {
+                Err(()) // TODO error
+            }
+        }
+    }
+
+    /// # Safety
+    ///
+    /// You must ensure that the secret byte length is >=
+    /// SECRET_MINIMUM_LENGTH.
+    #[inline]
+    pub const unsafe fn new_unchecked(bytes: &[u8]) -> &Self {
+        // Safety: We are `#[repr(transparent)]`. It's up to the
+        // caller to ensure the length
+        unsafe { mem::transmute(bytes) }
+    }
+
+    #[inline]
+    pub fn words_for_0(&self) -> [u64; 2] {
+        self.reassert_preconditions();
+
+        let (q, _) = self.0[56..].bp_as_chunks();
+        [q[0], q[1]].map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    pub fn words_for_1_to_3(&self) -> [u32; 2] {
+        self.reassert_preconditions();
+
+        let (q, _) = self.0.bp_as_chunks();
+        [q[0], q[1]].map(u32::from_ne_bytes)
+    }
+
+    #[inline]
+    pub fn words_for_4_to_8(&self) -> [u64; 2] {
+        self.reassert_preconditions();
+
+        let (q, _) = self.0[8..].bp_as_chunks();
+        [q[0], q[1]].map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    pub fn words_for_9_to_16(&self) -> [u64; 4] {
+        self.reassert_preconditions();
+
+        let (q, _) = self.0[24..].bp_as_chunks();
+        [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes)
+    }
+
+    #[inline]
+    pub fn words_for_17_to_128(&self) -> &[[u8; 16]] {
+        self.reassert_preconditions();
+
+        let (words, _) = self.0.bp_as_chunks();
+        words
+    }
+
+    #[inline]
+    pub fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] {
+        self.reassert_preconditions();
+
+        let (ss, _) = self.0.bp_as_chunks();
+        ss
+    }
+
+    #[inline]
+    pub fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] {
+        self.reassert_preconditions();
+
+        let (ss, _) = self.0[3..].bp_as_chunks();
+        ss
+    }
+
+    #[inline]
+    pub fn words_for_127_to_240_part3(&self) -> &[u8; 16] {
+        self.reassert_preconditions();
+
+        self.0[119..].first_chunk().unwrap()
+    }
+
+    /// # Safety
+    ///
+    /// `i` must be less than the number of stripes in the secret
+    /// ([`Self::n_stripes`][]).
+    #[inline]
+    pub unsafe fn stripe(&self, i: usize) -> &[u8; 64] {
+        self.reassert_preconditions();
+
+        // Safety: The caller has ensured that `i` is
+        // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment.
+        unsafe {
+            debug_assert!(i < self.n_stripes());
+            &*self.0.get_unchecked(i * 8..).as_ptr().cast()
+        }
+    }
+
+    #[inline]
+    pub fn last_stripe(&self) -> &[u8; 64] {
+        self.reassert_preconditions();
+
+        self.0.last_chunk().unwrap()
+    }
+
+    #[inline]
+    pub fn last_stripe_secret_better_name(&self) -> &[u8; 64] {
+        self.reassert_preconditions();
+
+        self.0[self.0.len() - 71..].first_chunk().unwrap()
+    }
+
+    #[inline]
+    pub fn final_secret(&self) -> &[u8; 64] {
+        self.reassert_preconditions();
+
+        self.0[11..].first_chunk().unwrap()
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    #[inline]
+    pub fn n_stripes(&self) -> usize {
+        // stripes_per_block
+        (self.len() - 64) / 8
+    }
+
+    #[inline(always)]
+    fn reassert_preconditions(&self) {
+        // Safety: The length of the bytes was checked at value
+        // construction time.
+        unsafe {
+            debug_assert!(self.is_valid());
+            assert_unchecked(self.is_valid());
+        }
+    }
+
+    #[inline(always)]
+    pub fn is_valid(&self) -> bool {
+        self.0.len() >= SECRET_MINIMUM_LENGTH
+    }
+}
diff --git a/src/xxhash3_64/sse2.rs b/src/xxhash3_64/sse2.rs
new file mode 100644
index 000000000..0290038e4
--- /dev/null
+++ b/src/xxhash3_64/sse2.rs
@@ -0,0 +1,85 @@
+use core::arch::x86_64::*;
+
+use super::{scalar, Vector};
+
+#[derive(Copy, Clone)]
+pub struct Impl(());
+
+impl Impl {
+    /// # Safety
+    ///
+    /// You must ensure that the CPU has the SSE2 feature
+    #[inline]
+    pub unsafe fn new_unchecked() -> Impl {
+        Impl(())
+    }
+}
+
+impl Vector for Impl {
+    #[inline]
+    fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+        // Safety: Type can only be constructed when SSE2 feature is present
+        unsafe { round_scramble_sse2(acc, secret_end) }
+    }
+
+    #[inline]
+    fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+        // Safety: Type can only be constructed when SSE2 feature is present
+        unsafe { accumulate_sse2(acc, stripe, secret) }
+    }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the SSE2 feature
+#[inline]
+#[target_feature(enable = "sse2")]
+unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+    // The scalar implementation is autovectorized nicely enough
+    scalar::Impl.round_scramble(acc, secret_end)
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the SSE2 feature
+#[inline]
+#[target_feature(enable = "sse2")]
+unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+    let acc = acc.as_mut_ptr().cast::<__m128i>();
+    let stripe = stripe.as_ptr().cast::<__m128i>();
+    let secret = secret.as_ptr().cast::<__m128i>();
+
+    // Safety: The caller has ensured we have the SSE2
+    // feature. We load from and store to references so we
+    // know that data is valid. We use unaligned loads /
+    // stores. Data manipulation is otherwise done on
+    // intermediate values.
+    unsafe {
+        for i in 0..4 {
+            // See [align-acc].
+            let mut acc_0 = _mm_loadu_si128(acc.add(i));
+            let stripe_0 = _mm_loadu_si128(stripe.add(i));
+            let secret_0 = _mm_loadu_si128(secret.add(i));
+
+            // let value[i] = stripe[i] ^ secret[i];
+            let value_0 = _mm_xor_si128(stripe_0, secret_0);
+
+            // stripe_swap[i] = stripe[i ^ 1]
+            let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+
+            // acc[i] += stripe_swap[i]
+            acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
+
+            // value_shift[i] = value[i] >> 32
+            let value_shift_0 = _mm_srli_epi64::<32>(value_0);
+
+            // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+            let product_0 = _mm_mul_epu32(value_0, value_shift_0);
+
+            // acc[i] += product[i]
+            acc_0 = _mm_add_epi64(acc_0, product_0);
+
+            _mm_storeu_si128(acc.add(i), acc_0);
+        }
+    }
+}

From f7c221ec73c1735b63f089f3c2af42bb99b93c43 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 13:56:19 -0400
Subject: [PATCH 137/166] Return errors

---
 src/xxhash3_64.rs        | 209 +++++++++++++++++++++++++++++----------
 src/xxhash3_64/secret.rs |  18 +++-
 2 files changed, 172 insertions(+), 55 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 0e4b3a5fe..dc5888318 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -41,7 +41,10 @@ const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
 
 const DEFAULT_SEED: u64 = 0;
 
-const DEFAULT_SECRET_RAW: [u8; 192] = [
+pub const DEFAULT_SECRET_LENGTH: usize = 192;
+type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH];
+
+const DEFAULT_SECRET_RAW: DefaultSecret = [
     0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
     0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
     0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
@@ -87,9 +90,22 @@ impl XxHash3_64 {
     }
 
     #[inline]
-    pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 {
-        let secret = Secret::new(secret).unwrap(); // TODO: ERROR
-        impl_oneshot(secret, DEFAULT_SEED, input)
+    pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
+        let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
+        Ok(impl_oneshot(secret, DEFAULT_SEED, input))
+    }
+}
+
+/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][]
+/// bytes.
+#[derive(Debug)]
+pub struct OneshotWithSecretError(secret::Error);
+
+impl core::error::Error for OneshotWithSecretError {}
+
+impl core::fmt::Display for OneshotWithSecretError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        self.0.fmt(f)
     }
 }
 
@@ -101,20 +117,43 @@ type Buffer = [u8; BUFFERED_BYTES];
 // Ensure that a full buffer always implies we are in the 241+ byte case.
 const _: () = assert!(BUFFERED_BYTES > 240);
 
+/// A buffer containing the secret bytes.
+///
 /// # Safety
 ///
 /// Must always return a slice with the same number of elements.
 pub unsafe trait FixedBuffer: AsRef<[u8]> {}
 
+/// A mutable buffer to contain the secret bytes.
+///
+/// # Safety
+///
+/// Must always return a slice with the same number of elements. The
+/// slice must always be the same as that returned from
+/// [`AsRef::as_ref`][].
+pub unsafe trait FixedMutBuffer: FixedBuffer + AsMut<[u8]> {}
+
 // Safety: An array will never change size.
 unsafe impl<const N: usize> FixedBuffer for [u8; N] {}
 
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedMutBuffer for [u8; N] {}
+
 // Safety: An array will never change size.
 unsafe impl<const N: usize> FixedBuffer for &[u8; N] {}
 
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedBuffer for &mut [u8; N] {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedMutBuffer for &mut [u8; N] {}
+
 // Safety: A plain slice will never change size.
 unsafe impl FixedBuffer for Box<[u8]> {}
 
+// Safety: A plain slice will never change size.
+unsafe impl FixedMutBuffer for Box<[u8]> {}
+
 /// Holds secret and temporary buffers that are ensured to be
 /// appropriately sized.
 pub struct SecretBuffer<S> {
@@ -130,21 +169,19 @@ where
     /// Takes the seed, secret, and buffer and performs no
     /// modifications to them, only validating that the sizes are
     /// appropriate.
-    pub fn new(seed: u64, secret: S) -> Result<Self, S> {
-        let this = Self {
-            seed,
-            secret,
-            buffer: [0; BUFFERED_BYTES],
-        };
-
-        if this.is_valid() {
-            Ok(this)
-        } else {
-            Err(this.decompose())
+    pub fn new(seed: u64, secret: S) -> Result<Self, SecretTooShortError<S>> {
+        match Secret::new(secret.as_ref()) {
+            Ok(_) => Ok(Self {
+                seed,
+                secret,
+                buffer: [0; BUFFERED_BYTES],
+            }),
+            Err(e) => Err(SecretTooShortError(e, secret)),
         }
     }
 
     #[inline(always)]
+    #[cfg(test)]
     fn is_valid(&self) -> bool {
         let secret = self.secret.as_ref();
 
@@ -156,11 +193,6 @@ where
         Self::secret(&self.secret).n_stripes()
     }
 
-    /// Returns the secret and buffer values.
-    pub fn decompose(self) -> S {
-        self.secret
-    }
-
     #[inline]
     fn parts(&self) -> (u64, &Secret, &Buffer) {
         (self.seed, Self::secret(&self.secret), &self.buffer)
@@ -179,7 +211,14 @@ where
     }
 }
 
-impl SecretBuffer<&'static [u8; 192]> {
+impl<S> SecretBuffer<S> {
+    /// Returns the secret.
+    pub fn into_secret(self) -> S {
+        self.secret
+    }
+}
+
+impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> {
     /// Use the default seed and secret values while allocating nothing.
     ///
     /// Note that this type may take up a surprising amount of stack space.
@@ -215,11 +254,19 @@ mod with_alloc {
             }
         }
 
-        pub fn with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
-            Self {
-                inner: RawHasher::allocate_with_seed_and_secret(seed, secret),
+        pub fn with_seed_and_secret(
+            seed: u64,
+            secret: impl Into<Box<[u8]>>,
+        ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+            Ok(Self {
+                inner: RawHasher::allocate_with_seed_and_secret(seed, secret)?,
                 _private: (),
-            }
+            })
+        }
+
+        /// Returns the secret.
+        pub fn into_secret(self) -> Box<[u8]> {
+            self.inner.into_secret()
         }
     }
 
@@ -269,15 +316,11 @@ mod with_alloc {
 
         /// Allocates the temporary buffer and uses the provided seed
         /// and secret buffer.
-        pub fn allocate_with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
-            let secret = secret.into();
-            assert!(secret.len() > SECRET_MINIMUM_LENGTH); // todo result
-
-            Self {
-                seed,
-                secret,
-                buffer: [0; BUFFERED_BYTES],
-            }
+        pub fn allocate_with_seed_and_secret(
+            seed: u64,
+            secret: impl Into<Box<[u8]>>,
+        ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+            Self::new(seed, secret.into())
         }
     }
 
@@ -292,28 +335,89 @@ mod with_alloc {
             Self::new(SecretBuffer::allocate_with_seed(seed))
         }
 
-        fn allocate_with_seed_and_secret(seed: u64, secret: impl Into<Box<[u8]>>) -> Self {
-            Self::new(SecretBuffer::allocate_with_seed_and_secret(seed, secret))
+        fn allocate_with_seed_and_secret(
+            seed: u64,
+            secret: impl Into<Box<[u8]>>,
+        ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+            SecretBuffer::allocate_with_seed_and_secret(seed, secret).map(Self::new)
         }
     }
 }
 
 impl<S> SecretBuffer<S>
 where
-    S: FixedBuffer + AsMut<[u8]>,
+    S: FixedMutBuffer,
 {
     /// Fills the secret buffer with a secret derived from the seed
-    /// and the default secret.
-    pub fn with_seed(seed: u64, mut secret: S) -> Result<Self, S> {
-        let secret_slice: &mut [u8; 192] = match secret.as_mut().try_into() {
-            Ok(s) => s,
-            Err(_) => return Err(secret),
-        };
+    /// and the default secret. The secret must be exactly
+    /// [`DEFAULT_SECRET_LENGTH`][] bytes long.
+    pub fn with_seed(seed: u64, mut secret: S) -> Result<Self, SecretWithSeedError<S>> {
+        match <&mut DefaultSecret>::try_from(secret.as_mut()) {
+            Ok(secret_slice) => {
+                *secret_slice = DEFAULT_SECRET_RAW;
+                derive_secret(seed, secret_slice);
+
+                Ok(Self {
+                    seed,
+                    secret,
+                    buffer: [0; BUFFERED_BYTES],
+                })
+            }
+            Err(_) => Err(SecretWithSeedError(secret)),
+        }
+    }
+}
+
+/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][]
+/// bytes.
+pub struct SecretTooShortError<S>(secret::Error, S);
+
+impl<S> SecretTooShortError<S> {
+    /// Returns the secret.
+    pub fn into_secret(self) -> S {
+        self.1
+    }
+}
+
+impl<S> core::error::Error for SecretTooShortError<S> {}
+
+impl<S> core::fmt::Debug for SecretTooShortError<S> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_tuple("SecretTooShortError").finish()
+    }
+}
+
+impl<S> core::fmt::Display for SecretTooShortError<S> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+/// The provided secret was not exactly [`DEFAULT_SECRET_LENGTH`][]
+/// bytes.
+pub struct SecretWithSeedError<S>(S);
+
+impl<S> SecretWithSeedError<S> {
+    /// Returns the secret.
+    pub fn into_secret(self) -> S {
+        self.0
+    }
+}
 
-        *secret_slice = DEFAULT_SECRET_RAW;
-        derive_secret(seed, secret_slice);
+impl<S> core::error::Error for SecretWithSeedError<S> {}
 
-        Self::new(seed, secret)
+impl<S> core::fmt::Debug for SecretWithSeedError<S> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_tuple("SecretWithSeedError").finish()
+    }
+}
+
+impl<S> core::fmt::Display for SecretWithSeedError<S> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(
+            f,
+            "The secret must be exactly {DEFAULT_SECRET_LENGTH} bytes"
+        )
     }
 }
 
@@ -390,6 +494,11 @@ impl<S> RawHasher<S> {
             total_bytes: 0,
         }
     }
+
+    /// Returns the secret.
+    pub fn into_secret(self) -> S {
+        self.secret_buffer.into_secret()
+    }
 }
 
 macro_rules! dispatch {
@@ -675,7 +784,7 @@ where
 /// This function assumes that the incoming buffer has been populated
 /// with the default secret.
 #[inline]
-fn derive_secret(seed: u64, secret: &mut [u8; 192]) {
+fn derive_secret(seed: u64, secret: &mut DefaultSecret) {
     if seed == DEFAULT_SEED {
         return;
     }
@@ -1198,12 +1307,6 @@ mod test {
         assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid())
     }
 
-    #[test]
-    fn secret_buffer_allocate_with_seed_and_secret_is_valid() {
-        let secret = [42; 1024];
-        assert!(SecretBuffer::allocate_with_seed_and_secret(0xdead_beef, secret).is_valid())
-    }
-
     macro_rules! bytes {
         ($($n: literal),* $(,)?) => {
             &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]]
diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs
index 14070d8e7..db55597ed 100644
--- a/src/xxhash3_64/secret.rs
+++ b/src/xxhash3_64/secret.rs
@@ -9,14 +9,14 @@ pub struct Secret([u8]);
 
 impl Secret {
     #[inline]
-    pub fn new(bytes: &[u8]) -> Result<&Self, ()> {
+    pub fn new(bytes: &[u8]) -> Result<&Self, Error> {
         // Safety: We check for validity before returning.
         unsafe {
             let this = Self::new_unchecked(bytes);
             if this.is_valid() {
                 Ok(this)
             } else {
-                Err(()) // TODO error
+                Err(Error(()))
             }
         }
     }
@@ -158,3 +158,17 @@ impl Secret {
         self.0.len() >= SECRET_MINIMUM_LENGTH
     }
 }
+
+#[derive(Debug)]
+pub struct Error(());
+
+impl core::error::Error for Error {}
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(
+            f,
+            "The secret must have at least {SECRET_MINIMUM_LENGTH} bytes"
+        )
+    }
+}

From 9329ff80c34704278f9e0e0cddbe97192814fb68 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 13:56:37 -0400
Subject: [PATCH 138/166] Add basic docs

---
 src/xxhash3_64.rs        | 22 +++++++++++++++++++++-
 src/xxhash3_64/secret.rs |  1 +
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index dc5888318..ab46d0e88 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1,4 +1,5 @@
-#![allow(missing_docs)]
+//! The implementation of XXH3_64.
+
 #![deny(
     clippy::missing_safety_doc,
     clippy::undocumented_unsafe_blocks,
@@ -41,7 +42,9 @@ const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
 
 const DEFAULT_SEED: u64 = 0;
 
+/// The length of the default secret.
 pub const DEFAULT_SECRET_LENGTH: usize = 192;
+
 type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH];
 
 const DEFAULT_SECRET_RAW: DefaultSecret = [
@@ -62,6 +65,7 @@ const DEFAULT_SECRET_RAW: DefaultSecret = [
 // Safety: The default secret is long enough
 const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) };
 
+/// Calculates the 64-bit hash.
 pub struct XxHash3_64 {
     #[cfg(feature = "alloc")]
     inner: with_alloc::AllocRawHasher,
@@ -69,11 +73,18 @@ pub struct XxHash3_64 {
 }
 
 impl XxHash3_64 {
+    /// Hash all data at once. If you can use this function, you may
+    /// see noticable speed gains for certain types of input.
+    #[must_use]
     #[inline]
     pub fn oneshot(input: &[u8]) -> u64 {
         impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
     }
 
+    /// Hash all data at once using the provided seed and a secret
+    /// derived from the seed. If you can use this function, you may
+    /// see noticable speed gains for certain types of input.
+    #[must_use]
     #[inline]
     pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
         let mut secret = DEFAULT_SECRET_RAW;
@@ -89,6 +100,9 @@ impl XxHash3_64 {
         impl_oneshot(secret, seed, input)
     }
 
+    /// Hash all data at once using the provided secret. If you can
+    /// use this function, you may see noticable speed gains for
+    /// certain types of input.
     #[inline]
     pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
         let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
@@ -240,6 +254,7 @@ mod with_alloc {
     use super::*;
 
     impl XxHash3_64 {
+        /// Constructs the hasher using the default seed and secret values.
         pub fn new() -> Self {
             Self {
                 inner: RawHasher::allocate_default(),
@@ -247,6 +262,8 @@ mod with_alloc {
             }
         }
 
+        /// Constructs the hasher using the provided seed and a secret
+        /// derived from the seed.
         pub fn with_seed(seed: u64) -> Self {
             Self {
                 inner: RawHasher::allocate_with_seed(seed),
@@ -254,6 +271,7 @@ mod with_alloc {
             }
         }
 
+        /// Constructs the hasher using the provided seed and secret.
         pub fn with_seed_and_secret(
             seed: u64,
             secret: impl Into<Box<[u8]>>,
@@ -486,6 +504,8 @@ pub struct RawHasher<S> {
 }
 
 impl<S> RawHasher<S> {
+    /// Construct the hasher with the provided seed, secret, and
+    /// temporary buffer.
     pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
         Self {
             secret_buffer,
diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs
index db55597ed..9741fabb0 100644
--- a/src/xxhash3_64/secret.rs
+++ b/src/xxhash3_64/secret.rs
@@ -2,6 +2,7 @@ use core::{hint::assert_unchecked, mem};
 
 use super::SliceBackport as _;
 
+/// The minimum length of a secret.
 pub const SECRET_MINIMUM_LENGTH: usize = 136;
 
 #[repr(transparent)]

From 4bebba7ececf7db832a656f318e6dfc03e6b794e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 14:07:26 -0400
Subject: [PATCH 139/166] error

---
 compare/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index ddda2fea2..44e171c45 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -292,7 +292,7 @@ mod xxhash3_64 {
 
     fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult {
         let native = c::XxHash3_64::oneshot_with_secret(secret, data);
-        let rust = rust::XxHash3_64::oneshot_with_secret(secret, data);
+        let rust = rust::XxHash3_64::oneshot_with_secret(secret, data).unwrap();
 
         prop_assert_eq!(native, rust);
         Ok(())

From dec40462cacaec313a7c1a69459a15d52cf782bb Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 21 Aug 2024 14:07:45 -0400
Subject: [PATCH 140/166] Rename

---
 asmasm/src/main.rs           |  4 ++--
 compare/benches/benchmark.rs |  2 +-
 src/lib.rs                   |  2 +-
 src/xxhash3_64.rs            | 34 +++++++++++++++++-----------------
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs
index b1244716b..0561ba274 100644
--- a/asmasm/src/main.rs
+++ b/asmasm/src/main.rs
@@ -1,6 +1,6 @@
-use std::{hash::Hasher, time::Instant};
+use std::{hash::Hasher as _, time::Instant};
 use xx_hash_sys::XxHash3_64 as C;
-use xx_renu::xxhash3_64::XxHash3_64;
+use xx_renu::XxHash3_64;
 
 fn main() {
     let filename = std::env::args().nth(1).expect("filename");
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 0affcbb89..0e48f85a4 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -1,6 +1,6 @@
 use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 use rand::{Rng, RngCore, SeedableRng};
-use std::{hash::Hasher, iter};
+use std::{hash::Hasher as _, iter};
 
 use xx_hash_sys as c;
 use xx_renu as rust;
diff --git a/src/lib.rs b/src/lib.rs
index ad243166d..524a2cc5a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -97,7 +97,7 @@ pub mod xxhash3_64;
 
 #[cfg(feature = "xxhash3_64")]
 #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
-pub use xxhash3_64::XxHash3_64;
+pub use xxhash3_64::Hasher as XxHash3_64;
 
 trait IntoU32 {
     fn into_u32(self) -> u32;
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index ab46d0e88..504f9b0fe 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -66,13 +66,13 @@ const DEFAULT_SECRET_RAW: DefaultSecret = [
 const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) };
 
 /// Calculates the 64-bit hash.
-pub struct XxHash3_64 {
+pub struct Hasher {
     #[cfg(feature = "alloc")]
     inner: with_alloc::AllocRawHasher,
     _private: (),
 }
 
-impl XxHash3_64 {
+impl Hasher {
     /// Hash all data at once. If you can use this function, you may
     /// see noticable speed gains for certain types of input.
     #[must_use]
@@ -253,7 +253,7 @@ mod with_alloc {
 
     use super::*;
 
-    impl XxHash3_64 {
+    impl Hasher {
         /// Constructs the hasher using the default seed and secret values.
         pub fn new() -> Self {
             Self {
@@ -288,13 +288,13 @@ mod with_alloc {
         }
     }
 
-    impl Default for XxHash3_64 {
+    impl Default for Hasher {
         fn default() -> Self {
             Self::new()
         }
     }
 
-    impl hash::Hasher for XxHash3_64 {
+    impl hash::Hasher for Hasher {
         #[inline]
         fn write(&mut self, input: &[u8]) {
             self.inner.write(input)
@@ -491,7 +491,7 @@ impl StripeAccumulator {
 /// A lower-level interface for computing a hash from streaming data.
 ///
 /// The algorithm requires a secret which can be a reasonably large
-/// piece of data. [`XxHash3_64`][] makes one concrete implementation
+/// piece of data. [`Hasher`][] makes one concrete implementation
 /// decision that uses dynamic memory allocation, but specialized
 /// usages may desire more flexibility. This type, combined with
 /// [`SecretBuffer`][], offer that flexibility at the cost of a
@@ -1303,7 +1303,7 @@ impl<T> SliceBackport<T> for [T] {
 
 #[cfg(test)]
 mod test {
-    use std::{array, hash::Hasher};
+    use std::{array, hash::Hasher as _};
 
     use super::*;
 
@@ -1340,7 +1340,7 @@ mod test {
     }
 
     fn hash_byte_by_byte(input: &[u8]) -> u64 {
-        let mut hasher = XxHash3_64::new();
+        let mut hasher = Hasher::new();
         for byte in input.chunks(1) {
             hasher.write(byte)
         }
@@ -1348,7 +1348,7 @@ mod test {
     }
 
     fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 {
-        let mut hasher = XxHash3_64::with_seed(seed);
+        let mut hasher = Hasher::with_seed(seed);
         for byte in input.chunks(1) {
             hasher.write(byte)
         }
@@ -1357,7 +1357,7 @@ mod test {
 
     #[test]
     fn oneshot_empty() {
-        let hash = XxHash3_64::oneshot(&[]);
+        let hash = Hasher::oneshot(&[]);
         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
     }
 
@@ -1369,7 +1369,7 @@ mod test {
 
     #[test]
     fn oneshot_1_to_3_bytes() {
-        test_1_to_3_bytes(XxHash3_64::oneshot)
+        test_1_to_3_bytes(Hasher::oneshot)
     }
 
     #[test]
@@ -1395,7 +1395,7 @@ mod test {
 
     #[test]
     fn oneshot_4_to_8_bytes() {
-        test_4_to_8_bytes(XxHash3_64::oneshot)
+        test_4_to_8_bytes(Hasher::oneshot)
     }
 
     #[test]
@@ -1423,7 +1423,7 @@ mod test {
 
     #[test]
     fn oneshot_9_to_16_bytes() {
-        test_9_to_16_bytes(XxHash3_64::oneshot)
+        test_9_to_16_bytes(Hasher::oneshot)
     }
 
     #[test]
@@ -1454,7 +1454,7 @@ mod test {
 
     #[test]
     fn oneshot_17_to_128_bytes() {
-        test_17_to_128_bytes(XxHash3_64::oneshot)
+        test_17_to_128_bytes(Hasher::oneshot)
     }
 
     #[test]
@@ -1496,7 +1496,7 @@ mod test {
 
     #[test]
     fn oneshot_129_to_240_bytes() {
-        test_129_to_240_bytes(XxHash3_64::oneshot)
+        test_129_to_240_bytes(Hasher::oneshot)
     }
 
     #[test]
@@ -1530,7 +1530,7 @@ mod test {
 
     #[test]
     fn oneshot_241_plus_bytes() {
-        test_241_plus_bytes(XxHash3_64::oneshot)
+        test_241_plus_bytes(Hasher::oneshot)
     }
 
     #[test]
@@ -1559,7 +1559,7 @@ mod test {
 
     #[test]
     fn oneshot_with_seed() {
-        test_with_seed(XxHash3_64::oneshot_with_seed)
+        test_with_seed(Hasher::oneshot_with_seed)
     }
 
     #[test]

From 00c5464632b7357e3fa0a61bc7124005b660decb Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 22 Aug 2024 12:26:33 -0400
Subject: [PATCH 141/166] Generate comparison graphs from benchmarks

---
 compare/Cargo.toml                            |   2 +-
 compare/README.md                             | 202 ++++++++++++
 compare/benches/benchmark.rs                  | 305 +++++++++---------
 compare/benchmark.sh                          |  56 ++++
 compare/generate-graph.R                      | 150 +++++++++
 compare/prepare-data.jq                       |  21 ++
 .../results/xxhash3_64-streaming-aarch64.svg  | 174 ++++++++++
 .../results/xxhash3_64-streaming-x86_64.svg   | 200 ++++++++++++
 .../results/xxhash3_64-tiny_data-aarch64.svg  | 126 ++++++++
 .../results/xxhash3_64-tiny_data-x86_64.svg   | 146 +++++++++
 .../results/xxhash64-streaming-aarch64.svg    | 116 +++++++
 compare/results/xxhash64-streaming-x86_64.svg | 116 +++++++
 .../results/xxhash64-tiny_data-aarch64.svg    | 120 +++++++
 compare/results/xxhash64-tiny_data-x86_64.svg | 120 +++++++
 compare/svgo.config.js                        |  12 +
 15 files changed, 1715 insertions(+), 151 deletions(-)
 create mode 100644 compare/README.md
 create mode 100755 compare/benchmark.sh
 create mode 100755 compare/generate-graph.R
 create mode 100755 compare/prepare-data.jq
 create mode 100644 compare/results/xxhash3_64-streaming-aarch64.svg
 create mode 100644 compare/results/xxhash3_64-streaming-x86_64.svg
 create mode 100644 compare/results/xxhash3_64-tiny_data-aarch64.svg
 create mode 100644 compare/results/xxhash3_64-tiny_data-x86_64.svg
 create mode 100644 compare/results/xxhash64-streaming-aarch64.svg
 create mode 100644 compare/results/xxhash64-streaming-x86_64.svg
 create mode 100644 compare/results/xxhash64-tiny_data-aarch64.svg
 create mode 100644 compare/results/xxhash64-tiny_data-x86_64.svg
 create mode 100644 compare/svgo.config.js

diff --git a/compare/Cargo.toml b/compare/Cargo.toml
index f71c802db..b6495a7b3 100644
--- a/compare/Cargo.toml
+++ b/compare/Cargo.toml
@@ -11,7 +11,7 @@ name = "benchmark"
 harness = false
 
 [dependencies]
-criterion = { version = "0.5.1", features = ["real_blackbox"] }
+criterion = { version = "0.5.1", features = [] }
 proptest = "1.5.0"
 rand = "0.8.5"
 twox-hash = "1.6.3"
diff --git a/compare/README.md b/compare/README.md
new file mode 100644
index 000000000..efb868e2b
--- /dev/null
+++ b/compare/README.md
@@ -0,0 +1,202 @@
+# Overview
+
+Tests compare calling [the reference implementation in
+C](https://xxhash.com) against equivalent functions in this crate. No
+link-time optimization (LTO) is used, so the C performance numbers
+have additional overhead for each function call.
+
+Click any graph to see it full-size.
+
+# XXHash64
+
+## Oneshot hashing
+
+Compares the **speed** of hashing an entire buffer of data in one
+function call. Data sizes from 256 KiB to 4 MiB are tested. These
+graphs are boring flat lines, so a table is used instead.
+
+### aarch64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 13.4               |
+| C              | 13.4               |
+
+## x86_64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 15.7               |
+| C              | 15.8               |
+
+
+## Streaming data
+
+Compares the **speed** of hashing a 1 MiB buffer of data split into
+various chunk sizes.
+
+### aarch64
+
+<a href="./results/xxhash64-streaming-aarch64.svg">
+  <img
+    src="./results/xxhash64-streaming-aarch64.svg"
+    alt="XXHash64, streaming data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash64-streaming-x86_64.svg">
+  <img
+    src="./results/xxhash64-streaming-x86_64.svg"
+    alt="XXHash64, streaming data, on an x86_64 processor"
+    />
+</a>
+
+## Small amounts of data
+
+Compares the **time taken** to hash 0 to 32 bytes of data.
+
+### aarch64
+
+<a href="./results/xxhash64-tiny_data-aarch64.svg">
+  <img
+    src="./results/xxhash64-tiny_data-aarch64.svg"
+    alt="XXHash64, small data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash64-tiny_data-x86_64.svg">
+  <img
+    src="./results/xxhash64-tiny_data-x86_64.svg"
+    alt="XXHash64, small data, on an x86_64 processor"
+    />
+</a>
+
+
+# XXHash3 (64-bit)
+
+## Oneshot hashing
+
+Compares the **speed** of hashing an entire buffer of data in one
+function call. Data sizes from 256 KiB to 4 MiB are tested. These
+graphs are boring flat lines, so a table is used instead.
+
+### aarch64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 34.8               |
+| C              | 34.8               |
+| C (scalar)     | 21.0               |
+| C (NEON)       | 34.7               |
+
+### x86_64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 58.3               |
+| C              | 25.0               |
+| C (scalar)     | 7.5                |
+| C (SSE2)       | 25.1               |
+| C (AVX2)       | 58.1               |
+
+## Streaming data
+
+Compares the **speed** of hashing a 1 MiB buffer of data split into
+various chunk sizes.
+
+### aarch64
+
+<a href="./results/xxhash3_64-streaming-aarch64.svg">
+  <img
+    src="./results/xxhash3_64-streaming-aarch64.svg"
+    alt="XXHash3, 64-bit, streaming data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash3_64-streaming-x86_64.svg">
+  <img
+    src="./results/xxhash3_64-streaming-x86_64.svg"
+    alt="XXHash3, 64-bit, streaming data, on an x86_64 processor"
+    />
+</a>
+
+## Small amounts of data
+
+Compares the **time taken** to hash 0 to 230 bytes of
+data. Representative samples are taken from similar times to avoid
+cluttering the graph and wasting benchmarking time.
+
+### aarch64
+
+<a href="./results/xxhash3_64-tiny_data-aarch64.svg">
+  <img
+    src="./results/xxhash3_64-tiny_data-aarch64.svg"
+    alt="XXHash3, 64-bit, small data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash3_64-tiny_data-x86_64.svg">
+  <img
+    src="./results/xxhash3_64-tiny_data-x86_64.svg"
+    alt="XXHash3, 64-bit, small data, on an x86_64 processor"
+    />
+</a>
+
+# Benchmark machines
+
+## Overview
+
+| CPU               | Memory | C compiler         |
+|-------------------|--------|--------------------|
+| Apple M1 Max      | 64 GiB | clang 15.0.0       |
+| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 |
+
+Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
+
+## Details
+
+### aarch64
+
+<table>
+  <tr>
+    <th>CPU</th>
+    <td>Apple M1 Max</td>
+  </tr>
+
+  <tr>
+    <th>Memory</th>
+    <td>64 GiB</td>
+  </tr>
+
+  <tr>
+    <th>C compiler</th>
+    <td>Apple clang version 15.0.0 (clang-1500.3.9.4)</td>
+  </tr>
+</table>
+
+### x86_64
+
+<table>
+  <tr>
+    <th>CPU</th>
+    <td>AMD Ryzen 9 3950X 16-Core Processor, 3501 Mhz, 16 Core(s), 32 Logical Processor(s)</td>
+  </tr>
+
+  <tr>
+    <th>Memory</th>
+    <td>32 GiB (3600 MT/s)</td>
+  </tr>
+
+  <tr>
+    <th>C compiler</th>
+    <td>Microsoft (R) C/C++ Optimizing Compiler Version 19.41.34120 for x86</td>
+  </tr>
+</table>
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs
index 0e48f85a4..acf5ab8ec 100644
--- a/compare/benches/benchmark.rs
+++ b/compare/benches/benchmark.rs
@@ -1,79 +1,94 @@
-use criterion::{criterion_group, criterion_main, Criterion, Throughput};
+use criterion::{
+    criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, Throughput,
+};
 use rand::{Rng, RngCore, SeedableRng};
-use std::{hash::Hasher as _, iter};
+use std::{env::consts::ARCH, hash::Hasher as _, iter};
 
 use xx_hash_sys as c;
 use xx_renu as rust;
 
-const TINY_DATA_SIZE: usize = 32;
 const BIG_DATA_SIZE: usize = 4 * 1024 * 1024;
 const MIN_BIG_DATA_SIZE: usize = 256 * 1024;
-const MAX_CHUNKS: usize = 64;
 const SEED: u64 = 0xc651_4843_1995_363f;
 
-fn tiny_data(c: &mut Criterion) {
-    let (seed, data) = gen_data(TINY_DATA_SIZE);
-    let mut g = c.benchmark_group("tiny_data");
-
-    for size in 0..=data.len() {
-        let data = &data[..size];
-        g.throughput(Throughput::Bytes(data.len() as _));
-
-        let id = format!("impl-c/fn-oneshot/size-{size:02}");
-        g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data)));
-
-        let id = format!("impl-c/fn-streaming/size-{size:02}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let mut hasher = c::XxHash64::with_seed(seed);
-                hasher.write(data);
-                hasher.finish()
-            })
-        });
-
-        let id = format!("impl-rust/fn-oneshot/size-{size:02}");
-        g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data)));
-
-        let id = format!("impl-rust/fn-streaming/size-{size:02}");
-        g.bench_function(id, |b| {
-            b.iter(|| {
-                let mut hasher = rust::XxHash64::with_seed(seed);
-                hasher.write(data);
-                hasher.finish()
-            })
-        });
+trait CriterionExt {
+    fn my_benchmark_group(&mut self, algo: &str, bench: &str) -> BenchmarkGroup<'_, WallTime>;
+}
+
+impl CriterionExt for Criterion {
+    fn my_benchmark_group(&mut self, algo: &str, bench: &str) -> BenchmarkGroup<'_, WallTime> {
+        self.benchmark_group(format!("arch-{ARCH}/algo-{algo}/bench-{bench}"))
     }
+}
 
-    g.finish();
+fn gen_data(length: usize) -> (u64, Vec<u8>) {
+    let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
+
+    let seed = rng.gen();
+
+    let mut data = vec![0; length];
+    rng.fill_bytes(&mut data);
+
+    (seed, data)
 }
 
-fn oneshot(c: &mut Criterion) {
-    let (seed, data) = gen_data(BIG_DATA_SIZE);
-    let mut g = c.benchmark_group("oneshot");
+fn half_sizes(max: usize) -> impl Iterator<Item = usize> {
+    iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) })
+}
 
-    for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
-        let data = &data[..size];
-        g.throughput(Throughput::Bytes(data.len() as _));
+mod xxhash64 {
+    use super::*;
 
-        let id = format!("impl-c/size-{size:07}");
-        g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data)));
+    const TINY_DATA_SIZE: usize = 32;
 
-        let id = format!("impl-rust/size-{size:07}");
-        g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data)));
+    fn tiny_data(c: &mut Criterion) {
+        let (seed, data) = gen_data(TINY_DATA_SIZE);
+        let mut g = c.my_benchmark_group("xxhash64", "tiny_data");
+
+        for size in 0..=data.len() {
+            let data = &data[..size];
+            g.throughput(Throughput::Bytes(data.len() as _));
+
+            let id = format!("impl-c/size-{size:02}");
+            g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data)));
+
+            let id = format!("impl-rust/size-{size:02}");
+            g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data)));
+        }
+
+        g.finish();
     }
 
-    g.finish();
-}
+    fn oneshot(c: &mut Criterion) {
+        let (seed, data) = gen_data(BIG_DATA_SIZE);
+        let mut g = c.my_benchmark_group("xxhash64", "oneshot");
+
+        for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
+            let data = &data[..size];
+            g.throughput(Throughput::Bytes(data.len() as _));
 
-fn streaming(c: &mut Criterion) {
-    let mut g = c.benchmark_group("streaming_many_chunks");
+            let id = format!("impl-c/size-{size:07}");
+            g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data)));
+
+            let id = format!("impl-rust/size-{size:07}");
+            g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data)));
+        }
+
+        g.finish();
+    }
+
+    fn streaming(c: &mut Criterion) {
+        let mut g = c.my_benchmark_group("xxhash64", "streaming");
+
+        let size = 1024 * 1024;
+        let (seed, data) = gen_data(size);
+
+        for chunk_size in half_sizes(size) {
+            let chunks = data.chunks(chunk_size).collect::<Vec<_>>();
 
-    for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
-        for n_chunks in half_sizes(MAX_CHUNKS) {
-            let (seed, chunks) = gen_chunked_data(size, n_chunks);
             g.throughput(Throughput::Bytes(size as _));
 
-            let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}");
+            let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:02}");
             g.bench_function(id, |b| {
                 b.iter(|| {
                     let mut hasher = c::XxHash64::with_seed(seed);
@@ -84,7 +99,7 @@ fn streaming(c: &mut Criterion) {
                 })
             });
 
-            let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}");
+            let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:02}");
             g.bench_function(id, |b| {
                 b.iter(|| {
                     let mut hasher = rust::XxHash64::with_seed(seed);
@@ -95,47 +110,11 @@ fn streaming(c: &mut Criterion) {
                 })
             });
         }
-    }
-
-    g.finish();
-}
-
-fn gen_data(length: usize) -> (u64, Vec<u8>) {
-    let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
-
-    let seed = rng.gen();
-
-    let mut data = vec![0; length];
-    rng.fill_bytes(&mut data);
-
-    (seed, data)
-}
-
-fn gen_chunked_data(length: usize, n_chunks: usize) -> (u64, Vec<Vec<u8>>) {
-    assert!(length >= n_chunks);
-
-    let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
-
-    let seed = rng.gen();
-
-    let chunk_size = length / n_chunks;
-
-    let mut total = 0;
-    let mut chunks = Vec::with_capacity(2 * n_chunks);
-
-    while total < length {
-        let mut data = vec![0; chunk_size];
-        rng.fill_bytes(&mut data);
 
-        total += data.len();
-        chunks.push(data)
+        g.finish();
     }
 
-    (seed, chunks)
-}
-
-fn half_sizes(max: usize) -> impl Iterator<Item = usize> {
-    iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) })
+    criterion_group!(benches, tiny_data, oneshot, streaming);
 }
 
 mod xxhash3_64 {
@@ -143,7 +122,7 @@ mod xxhash3_64 {
 
     fn tiny_data(c: &mut Criterion) {
         let (seed, data) = gen_data(240);
-        let mut g = c.benchmark_group("xxhash3_64/tiny_data");
+        let mut g = c.my_benchmark_group("xxhash3_64", "tiny_data");
 
         // let categories = 0..=data.len();
 
@@ -157,12 +136,38 @@ mod xxhash3_64 {
             let data = &data[..size];
             g.throughput(Throughput::Bytes(data.len() as _));
 
-            let id = format!("impl-c/fn-oneshot/size-{size:03}");
+            let id = format!("impl-c/size-{size:03}");
             g.bench_function(id, |b| {
                 b.iter(|| c::XxHash3_64::oneshot_with_seed(seed, data))
             });
 
-            let id = format!("impl-rust/fn-oneshot/size-{size:03}");
+            let id = format!("impl-c-scalar/size-{size:03}");
+            g.bench_function(id, |b| {
+                b.iter(|| c::scalar::XxHash3_64::oneshot_with_seed(seed, data))
+            });
+
+            #[cfg(target_arch = "aarch64")]
+            {
+                let id = format!("impl-c-neon/size-{size:03}");
+                g.bench_function(id, |b| {
+                    b.iter(|| c::neon::XxHash3_64::oneshot_with_seed(seed, data))
+                });
+            }
+
+            #[cfg(target_arch = "x86_64")]
+            {
+                let id = format!("impl-c-avx2/size-{size:03}");
+                g.bench_function(id, |b| {
+                    b.iter(|| c::avx2::XxHash3_64::oneshot_with_seed(seed, data))
+                });
+
+                let id = format!("impl-c-sse2/size-{size:03}");
+                g.bench_function(id, |b| {
+                    b.iter(|| c::sse2::XxHash3_64::oneshot_with_seed(seed, data))
+                });
+            }
+
+            let id = format!("impl-rust/size-{size:03}");
             g.bench_function(id, |b| {
                 b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data))
             });
@@ -173,7 +178,7 @@ mod xxhash3_64 {
 
     fn oneshot(c: &mut Criterion) {
         let (seed, data) = gen_data(BIG_DATA_SIZE);
-        let mut g = c.benchmark_group("xxhash3_64/oneshot");
+        let mut g = c.my_benchmark_group("xxhash3_64", "oneshot");
 
         for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
             let data = &data[..size];
@@ -220,28 +225,58 @@ mod xxhash3_64 {
     }
 
     fn streaming(c: &mut Criterion) {
-        let mut g = c.benchmark_group("xxhash3_64/streaming_many_chunks");
+        let mut g = c.my_benchmark_group("xxhash3_64", "streaming");
+
+        let size = 1024 * 1024;
+        let (seed, data) = gen_data(size);
+
+        for chunk_size in half_sizes(size) {
+            let chunks = data.chunks(chunk_size).collect::<Vec<_>>();
+
+            g.throughput(Throughput::Bytes(size as _));
 
-        for size in [1024 * 1024] {
-            for n_chunks in half_sizes(size) {
-                let (seed, chunks) = gen_chunked_data(size, n_chunks);
-                g.throughput(Throughput::Bytes(size as _));
+            let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let mut hasher = c::XxHash3_64::with_seed(seed);
+                    for chunk in &chunks {
+                        hasher.write(chunk);
+                    }
+                    hasher.finish()
+                })
+            });
+
+            let id = format!("impl-c-scalar/size-{size:07}/chunk_size-{chunk_size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let mut hasher = c::scalar::XxHash3_64::with_seed(seed);
+                    for chunk in &chunks {
+                        hasher.write(chunk);
+                    }
+                    hasher.finish()
+                })
+            });
 
-                let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}");
+            #[cfg(target_arch = "aarch64")]
+            {
+                let id = format!("impl-c-neon/size-{size:07}/chunk_size-{chunk_size:07}");
                 g.bench_function(id, |b| {
                     b.iter(|| {
-                        let mut hasher = c::XxHash3_64::with_seed(seed);
+                        let mut hasher = c::neon::XxHash3_64::with_seed(seed);
                         for chunk in &chunks {
                             hasher.write(chunk);
                         }
                         hasher.finish()
                     })
                 });
+            }
 
-                let id = format!("impl-c-scalar/size-{size:07}/chunks-{n_chunks:02}");
+            #[cfg(target_arch = "x86_64")]
+            {
+                let id = format!("impl-c-avx2/size-{size:07}/chunk_size-{chunk_size:07}");
                 g.bench_function(id, |b| {
                     b.iter(|| {
-                        let mut hasher = c::scalar::XxHash3_64::with_seed(seed);
+                        let mut hasher = c::avx2::XxHash3_64::with_seed(seed);
                         for chunk in &chunks {
                             hasher.write(chunk);
                         }
@@ -249,49 +284,10 @@ mod xxhash3_64 {
                     })
                 });
 
-                #[cfg(target_arch = "aarch64")]
-                {
-                    let id = format!("impl-c-neon/size-{size:07}/chunks-{n_chunks:02}");
-                    g.bench_function(id, |b| {
-                        b.iter(|| {
-                            let mut hasher = c::neon::XxHash3_64::with_seed(seed);
-                            for chunk in &chunks {
-                                hasher.write(chunk);
-                            }
-                            hasher.finish()
-                        })
-                    });
-                }
-
-                #[cfg(target_arch = "x86_64")]
-                {
-                    let id = format!("impl-c-avx2/size-{size:07}/chunks-{n_chunks:02}");
-                    g.bench_function(id, |b| {
-                        b.iter(|| {
-                            let mut hasher = c::avx2::XxHash3_64::with_seed(seed);
-                            for chunk in &chunks {
-                                hasher.write(chunk);
-                            }
-                            hasher.finish()
-                        })
-                    });
-
-                    let id = format!("impl-c-sse2/size-{size:07}/chunks-{n_chunks:02}");
-                    g.bench_function(id, |b| {
-                        b.iter(|| {
-                            let mut hasher = c::sse2::XxHash3_64::with_seed(seed);
-                            for chunk in &chunks {
-                                hasher.write(chunk);
-                            }
-                            hasher.finish()
-                        })
-                    });
-                }
-
-                let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}");
+                let id = format!("impl-c-sse2/size-{size:07}/chunk_size-{chunk_size:07}");
                 g.bench_function(id, |b| {
                     b.iter(|| {
-                        let mut hasher = rust::XxHash3_64::with_seed(seed);
+                        let mut hasher = c::sse2::XxHash3_64::with_seed(seed);
                         for chunk in &chunks {
                             hasher.write(chunk);
                         }
@@ -299,6 +295,17 @@ mod xxhash3_64 {
                     })
                 });
             }
+
+            let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:07}");
+            g.bench_function(id, |b| {
+                b.iter(|| {
+                    let mut hasher = rust::XxHash3_64::with_seed(seed);
+                    for chunk in &chunks {
+                        hasher.write(chunk);
+                    }
+                    hasher.finish()
+                })
+            });
         }
 
         g.finish();
@@ -307,6 +314,4 @@ mod xxhash3_64 {
     criterion_group!(benches, tiny_data, oneshot, streaming);
 }
 
-criterion_group!(benches, tiny_data, oneshot, streaming);
-
-criterion_main!(benches, xxhash3_64::benches);
+criterion_main!(xxhash64::benches, xxhash3_64::benches);
diff --git a/compare/benchmark.sh b/compare/benchmark.sh
new file mode 100755
index 000000000..145cabbcd
--- /dev/null
+++ b/compare/benchmark.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+set -eu
+
+SCRIPT_INVOKED_AS="${0}"
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
+
+temp_dir=$(mktemp -d)
+
+function capture() {
+    subset="${1:-}"
+
+    raw_data="${temp_dir}/raw-data.streaming-json"
+
+    cargo criterion -p compare --message-format=json -- "${subset}" > "${raw_data}"
+
+    echo "Raw benchmark data captured to ${raw_data}"
+    echo "Next, run \`${SCRIPT_INVOKED_AS} analyze ${raw_data}\`"
+}
+
+function analyze() {
+    cleaned_data="${temp_dir}/cleaned-data.streaming-json"
+
+    # Capture our input to keep things consistent
+    cp "${@}" "${temp_dir}"
+
+    "${SCRIPT_DIR}/prepare-data.jq" "${@}" > "${cleaned_data}"
+    "${SCRIPT_DIR}/generate-graph.R" "${cleaned_data}" "${temp_dir}"
+
+    svgo \
+        --quiet \
+        --config "${SCRIPT_DIR}/svgo.config.js" \
+        --multipass \
+        --pretty \
+        --indent 2 \
+        --final-newline \
+        --recursive \
+        "${temp_dir}"
+
+    echo "Graphs saved in ${temp_dir}"
+}
+
+mode="${1:-}"
+case "${mode}" in
+    capture)
+        capture "${@:2}"
+        ;;
+
+    analyze)
+        analyze "${@:2}"
+        ;;
+
+    *)
+        echo "Unknown command '${mode}'"
+        exit 1
+        ;;
+esac
diff --git a/compare/generate-graph.R b/compare/generate-graph.R
new file mode 100755
index 000000000..aacf9b09b
--- /dev/null
+++ b/compare/generate-graph.R
@@ -0,0 +1,150 @@
+#!/usr/bin/env Rscript
+
+library(forcats)
+library(ggplot2)
+library(nlme)
+library(rlang)
+library(scales)
+
+args = commandArgs(trailingOnly = TRUE)
+
+filename = args[1]
+output_dir = args[2]
+
+make_filename = function(algo, bench, arch) {
+    paste0(output_dir, "/", algo, "-", bench, "-", arch, ".svg")
+}
+
+log2min = function(x) { 2 ^ floor(log2(min(x))) }
+log2max = function(x) { 2 ^ ceiling(log2(max(x))) }
+
+MiB = 2^20
+GiB = 2^30
+TiB = 2^40
+powers_of_two = 2^(0:40)
+
+byte_labels_raw = label_bytes(units = "auto_binary")
+byte_labels = function(x) {
+    l = byte_labels_raw(x)
+    l = gsub(" iB", " B", l) # Why would you call them "iB"
+    gsub(" kiB", " KiB", l) # That K should be capitalized
+}
+bytes_per_second_labels = function(x) {
+    paste0(byte_labels(x), "/sec")
+}
+
+## Load the data
+data = jsonlite::stream_in(file(filename), verbose = FALSE)
+
+## Reorder and rename the implementation factor
+data$impl = fct_relevel(data$impl, "rust", "c", "c-scalar", "c-neon", "c-sse2", "c-avx2")
+impl_names = c("rust" = "Rust", "c" = "C", "c-scalar" = "C (scalar)", "c-neon" = "C (NEON)", "c-sse2" = "C (SSE2)" , "c-avx2" = "C (AVX2)")
+impl_name = function(n) { impl_names[n] }
+
+cpus = c(aarch64 = "Apple M1 Max", x86_64 = "AMD Ryzen 9 3950X")
+
+common_theme = theme(legend.position = "inside", legend.position.inside = c(0.8, 0.2), plot.margin = unit(c(0.1, 1, 0.1, 0.1), 'cm'))
+
+for (algo in c("xxhash64", "xxhash3_64")) {
+    message("# ", algo)
+
+    algo_data = data[data$algo == algo,]
+
+    all_tiny_data = algo_data[algo_data$bench == "tiny_data",]
+    all_oneshot = algo_data[algo_data$bench == "oneshot",]
+    all_streaming = algo_data[algo_data$bench == "streaming",]
+
+    ## Convert to a duration type
+    all_tiny_data$mean_estimate = lubridate::dnanoseconds(all_tiny_data$mean_estimate)
+
+    ## Get bytes per second; the time estimate is in nanoseconds
+    all_oneshot$throughput = all_oneshot$size/(all_oneshot$mean_estimate / 1e9)
+
+    ## Get bytes per second; the time estimate is in nanoseconds
+    all_streaming$throughput = all_streaming$size / (all_streaming$mean_estimate / 1e9)
+
+    tiny_data_y_limits = c(min(all_tiny_data$mean_estimate), max(all_tiny_data$mean_estimate))
+    oneshot_y_limits = c(log2min(all_oneshot$throughput), log2max(all_oneshot$throughput))
+    streaming_y_limits = c(log2min(all_streaming$throughput), log2max(all_streaming$throughput))
+
+    for (arch in c("aarch64", "x86_64")) {
+        message("## ", arch)
+
+        oneshot = all_oneshot[all_oneshot$arch == arch,]
+        tiny_data = all_tiny_data[all_tiny_data$arch == arch,]
+        streaming = all_streaming[all_streaming$arch == arch,]
+
+        cpu = cpus[arch]
+        subtitle = paste0(arch, " (", cpu, ")")
+
+        if (nrow(tiny_data) != 0) {
+            message("### Tiny data")
+
+            title = paste0("[", algo, "] Hashing small amounts of bytes (lower is better)")
+
+            p = ggplot(tiny_data, aes(x = size, y = mean_estimate, colour = impl)) +
+                geom_point(alpha = 0.7) +
+                geom_line(alpha = 0.3) +
+                scale_x_continuous(labels = byte_labels) +
+                scale_y_time(labels = label_timespan(), limits = tiny_data_y_limits) +
+                scale_colour_brewer(labels = impl_name, palette = "Set1") +
+                labs(title = title, subtitle = subtitle, x = "Size", y = "Time", colour = "Implementation") +
+                common_theme
+
+            output_filename = make_filename(algo = algo, bench = "tiny_data", arch = arch)
+            ggsave(output_filename, width = 3000, height = 2000, units = "px", scale = 1.5)
+        }
+
+        if (nrow(oneshot) != 0) {
+            message("### Oneshot")
+
+            fit = lmList(throughput ~ size | impl, data = oneshot, pool = FALSE, na.action = na.pass)
+            coef = as.data.frame(t(sapply(fit, coefficients)))
+            speeds = round(coef$"(Intercept)" / GiB, digits = 1)
+            names(speeds) = rownames(coef)
+
+            impl_name_and_speed = function(n) {
+                name = impl_name(n)
+                paste(name, "—", speeds[n], "GiB/sec")
+            }
+
+            title = paste0("[", algo, "] Throughput to hash a buffer (higher is better)")
+
+            p = ggplot(oneshot, aes(x = size, y = throughput, colour = impl)) +
+                geom_point(alpha = 0.7) +
+                geom_line(alpha = 0.3) +
+                scale_x_continuous(transform = transform_log2(), labels = byte_labels, minor_breaks = NULL) +
+                scale_y_continuous(transform = transform_log2(), labels = bytes_per_second_labels, breaks = powers_of_two, minor_breaks = NULL, limits = oneshot_y_limits) +
+                scale_colour_brewer(labels = impl_name_and_speed, palette = "Set1") +
+                labs(title = title, subtitle = subtitle, x = "Buffer Size", y = "Throughput", colour = "Implementation") +
+                common_theme
+
+            output_filename = make_filename(algo = algo, bench = "oneshot", arch = arch)
+            ggsave(output_filename, width = 3000, height = 2000, units = "px", scale = 1.5)
+
+            speeds_table = data.frame(speeds)
+            rownames(speeds_table) = impl_names[rownames(speeds_table)]
+            print(speeds_table)
+        }
+
+        if (nrow(streaming) != 0) {
+            message("### Streaming")
+
+            title = paste0("[", algo, "] Throughput of a 1 MiB buffer by chunk size (higher is better)")
+
+            p = ggplot(streaming, aes(x = chunk_size, y = throughput, colour = impl)) +
+                geom_point(alpha = 0.7) +
+                geom_line(alpha = 0.3) +
+                scale_x_continuous(transform = transform_log2(), labels = byte_labels, breaks = powers_of_two, minor_breaks = NULL) +
+                scale_y_continuous(transform = transform_log2(), labels = bytes_per_second_labels, breaks = powers_of_two, minor_breaks = NULL, limits = streaming_y_limits) +
+                scale_colour_brewer(palette = "Set1", labels = impl_name) +
+                labs(title = title , subtitle = subtitle, x = "Chunk Size", y = "Throughput", colour = "Implementation") +
+                common_theme
+
+            output_filename = make_filename(algo = algo, bench = "streaming", arch = arch)
+            ggsave(output_filename, width = 3000, height = 2000, units = "px", scale = 1.5)
+        }
+    }
+}
+
+warnings()
diff --git a/compare/prepare-data.jq b/compare/prepare-data.jq
new file mode 100755
index 000000000..99795add5
--- /dev/null
+++ b/compare/prepare-data.jq
@@ -0,0 +1,21 @@
+#!/usr/bin/env jq --from-file --compact-output
+
+select(.reason == "benchmark-complete") |
+  # Split the ID string into separate fields
+  (
+    .id
+    | split("/")
+    | map(split("-") | { key: .[0], value: .[1:] | join("-")})
+    | from_entries
+    # Clean up the separate fields
+    | .size |= tonumber
+    | if .chunk_size then .chunk_size |= tonumber end
+  )
+
+  +
+
+  # Add the benchmark numbers
+  {
+    throughput: .throughput[0].per_iteration,
+    mean_estimate: .mean.estimate,
+  }
diff --git a/compare/results/xxhash3_64-streaming-aarch64.svg b/compare/results/xxhash3_64-streaming-aarch64.svg
new file mode 100644
index 000000000..8e21742b4
--- /dev/null
+++ b/compare/results/xxhash3_64-streaming-aarch64.svg
@@ -0,0 +1,174 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M69.24 36.5h982.41v654.42H69.24z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M69.24 661.16h982.41M69.24 615.4h982.41M69.24 569.64h982.41M69.24 523.87h982.41M69.24 478.11h982.41M69.24 432.35h982.41M69.24 386.58h982.41M69.24 340.82h982.41M69.24 295.06h982.41M69.24 249.29h982.41M69.24 203.53h982.41M69.24 157.77h982.41M69.24 112h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="1007" cy="157.45" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="157.27" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1007" cy="157.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1007" cy="106.34" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="157.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="157.31" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="157.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="106.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="157.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="157.23" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="157.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="106.79" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="157.35" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="157.38" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="157.31" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="106.52" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="157.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="157.44" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="156.9" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="109.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="157.5" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="157.59" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="157.13" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="106.88" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="157.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="158.07" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="160.27" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="116.89" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="157.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="157.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="158.47" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="112.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="160.1" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="159.08" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="159" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="109.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="157.6" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="158.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="158.65" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="113.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="157.97" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="161.65" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="158.11" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="122.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="158.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="165.39" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="158.81" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="132.08" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="160.94" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="173.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="160.87" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="157.33" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="166.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="179.42" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="166.19" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="165.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="190.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="200.24" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="190.26" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="177.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="240.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="245.82" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="240.29" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="228.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="277" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="280.16" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="276.95" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="261.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="306.55" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="308.72" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="306.5" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="296.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="367.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="368.62" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="367.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="351.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="402.19" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="403.78" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="402.21" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="378.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="425.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="427.08" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="425.53" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="424.06" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 424.06 44.65-45.8 44.66-27.02 44.65-55.22 44.66-34.3 44.65-33.57 44.66-50.34 44.65-12.46 44.66-8.02 44.65-25.25 44.66-9.72 44.65-8.93 44.66-3.89 44.65 2.66 44.66 4.69 44.65-10.01 44.66 2.83 44.65-3.19 44.66.27 44.65-.41 44.66-.04" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 425.44 44.65-23.25 44.66-34.61 44.65-61.03L292.52 277l44.65-36.32 44.66-50.46 44.65-24.01 44.66-5.27 44.65-2.09 44.66-.88 44.65-.37 44.66 2.5 44.65-2.62 44.66.22 44.65-.2 44.66-.09 44.65-.06 44.66.09 44.65-.35 44.66.36" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 427.08 44.65-23.3 44.66-35.16 44.65-59.9 44.66-28.56 44.65-34.34 44.66-45.58 44.65-20.82 44.66-6.14 44.65-7.89 44.66-3.74 44.65-2.82 44.66.25 44.65-1.8 44.66.79 44.65-.48 44.66-.15 44.65-.06 44.66-.15 44.65.08 44.66-.04" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 425.53 44.65-23.32 44.66-34.6 44.65-61.11 44.66-29.55 44.65-36.66 44.66-50.03 44.65-24.07 44.66-5.32 44.65-2.06 44.66-.7 44.65.54 44.66.35 44.65-.53 44.66 1.8 44.65-3.14 44.66-.23 44.65.41 44.66-.15 44.65.09 44.66.39" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.62">8 MiB/sec</text>
+    <text x="64.31" y="618.56" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.52">16 MiB/sec</text>
+    <text x="64.31" y="572.79" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.52">32 MiB/sec</text>
+    <text x="64.31" y="527.03" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.52">64 MiB/sec</text>
+    <text x="64.31" y="481.26" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">128 MiB/sec</text>
+    <text x="64.31" y="435.5" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
+    <text x="64.31" y="389.74" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
+    <text x="64.31" y="343.97" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
+    <text x="64.31" y="298.21" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
+    <text x="64.31" y="252.45" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
+    <text x="64.31" y="206.68" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
+    <text x="64.31" y="160.92" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
+    <text x="64.31" y="115.16" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">32 GiB/sec</text>
+    <text x="64.31" y="69.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">64 GiB/sec</text>
+    <path d="M66.5 661.16h2.74M66.5 615.4h2.74m-2.74-45.76h2.74m-2.74-45.77h2.74m-2.74-45.76h2.74m-2.74-45.76h2.74m-2.74-45.77h2.74m-2.74-45.76h2.74m-2.74-45.76h2.74m-2.74-45.77h2.74m-2.74-45.76h2.74m-2.74-45.76h2.74M66.5 112h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="113.9" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">1 B</text>
+    <text x="158.55" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">2 B</text>
+    <text x="203.21" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">4 B</text>
+    <text x="247.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">8 B</text>
+    <text x="292.52" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">16 B</text>
+    <text x="337.17" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">32 B</text>
+    <text x="381.83" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">64 B</text>
+    <text x="426.48" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">128 B</text>
+    <text x="471.14" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">256 B</text>
+    <text x="515.79" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">512 B</text>
+    <text x="560.45" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">1 KiB</text>
+    <text x="605.1" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">2 KiB</text>
+    <text x="649.76" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">4 KiB</text>
+    <text x="694.41" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">8 KiB</text>
+    <text x="739.07" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">16 KiB</text>
+    <text x="783.72" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">32 KiB</text>
+    <text x="828.38" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">64 KiB</text>
+    <text x="873.03" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">128 KiB</text>
+    <text x="917.69" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">256 KiB</text>
+    <text x="962.34" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">512 KiB</text>
+    <text x="1007" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">1 MiB</text>
+    <text x="1051.65" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">2 MiB</text>
+    <text x="560.45" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.25">Chunk Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.28" transform="rotate(-90 187.215 176.485)">Throughput</text>
+    <path d="M812.09 512.09h86.17v95.88h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="817.57" y="526.67" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M817.57 533.37h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="542.01" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M819.29 542.01h13.83" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 550.65h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="559.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M819.29 559.29h13.83" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 567.93h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="576.57" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <path d="M819.29 576.57h13.83" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 585.21h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="593.85" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <path d="M819.29 593.85h13.83" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="840.33" y="545.16" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="840.33" y="562.44" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="840.33" y="579.72" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="38.12">C (scalar)</text>
+    <text x="840.33" y="597" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="40.06">C (NEON)</text>
+    <text x="69.24" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="120.46">aarch64 (Apple M1 Max)</text>
+    <text x="69.24" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="433.35">[xxhash3_64] Throughput of a 1 MiB buffer by chunk size (higher is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash3_64-streaming-x86_64.svg b/compare/results/xxhash3_64-streaming-x86_64.svg
new file mode 100644
index 000000000..38428ce59
--- /dev/null
+++ b/compare/results/xxhash3_64-streaming-x86_64.svg
@@ -0,0 +1,200 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M69.24 36.5h982.41v654.42H69.24z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M69.24 661.16h982.41M69.24 615.4h982.41M69.24 569.64h982.41M69.24 523.87h982.41M69.24 478.11h982.41M69.24 432.35h982.41M69.24 386.58h982.41M69.24 340.82h982.41M69.24 295.06h982.41M69.24 249.29h982.41M69.24 203.53h982.41M69.24 157.77h982.41M69.24 112h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="1007" cy="124.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="202.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1007" cy="76.7" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="1007" cy="124.74" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1007" cy="74.13" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="124.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="201.66" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="77.03" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="125.06" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="74.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="124.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="201.8" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="77.7" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="124.78" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="74.8" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="125.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="201.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="79.46" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="125.51" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="75.5" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="125.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="202" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="82.68" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="125.82" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="76.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="126.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="202.67" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="87.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="126.02" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="77.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="127.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="202.89" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="95.55" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="127.27" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="80.34" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="129.06" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="204.05" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="109.79" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="129.59" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="85.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="131.99" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="205.59" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="130.45" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="131.62" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="89.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="136.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="206.95" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="158.55" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="137.17" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="101.39" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="145.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="206.52" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="193.35" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="145.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="118.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="158.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="213.51" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="233.25" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="157.99" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="141.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="169.3" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="220.8" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="270.51" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="169.45" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="171.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="191.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="228.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="277.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="190.09" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="183.31" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="214.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="244.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="286.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="212.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="205.85" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="231.01" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="264.04" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="403.53" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="228.62" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="226.27" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="272.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="297.57" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="444.42" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="270.95" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="269.28" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="312.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="330.64" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="486.87" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="309.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="310.85" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="353.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="370.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="531.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="350.76" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="354.4" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="398.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="413.72" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="577.09" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="393.69" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="398.39" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="442.23" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="457.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="630.56" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="439.37" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="443.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 443.22 44.65-44.83 44.66-43.99 44.65-43.55 44.66-41.57 44.65-43.01 44.66-20.42 44.65-22.54 44.66-11.66 44.65-29.94 44.66-23.56 44.65-16.76 44.66-11.61 44.65-4.22 44.66-5.22 44.65-2.53 44.66-1.79 44.65-.52 44.66-.7 44.65-.26 44.66-.41" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 442.23 44.65-44.08 44.66-44.73 44.65-41.22 44.66-39.42 44.65-41.77 44.66-16.94 44.65-22.87 44.66-21.9 44.65-10.51 44.66-13.37 44.65-8.62 44.66-4.81 44.65-2.93 44.66-1.36 44.65-1.31 44.66-.7 44.65-.6 44.66-.71 44.65-.07 44.66-.16" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 457.99 44.65-44.27 44.66-42.73 44.65-40.35 44.66-33.07 44.65-33.53 44.66-19.76 44.65-15.29 44.66-8.19 44.65-7.29 44.66-6.99 44.65.43 44.66-1.36 44.65-1.54 44.66-1.16 44.65-.22 44.66-.67 44.65-.17 44.66-.03 44.65-.14 44.66.43" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 439.37 44.65-45.68 44.66-42.93 44.65-41.6 44.66-38.21 44.65-42.33 44.66-15.98 44.65-22.55 44.66-20.64 44.65-11.46 44.66-12.35 44.65-8.47 44.66-5.55 44.65-2.03 44.66-2.32 44.65-1.25 44.66-.2 44.65-.31 44.66-.73 44.65.28 44.66-.32" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 630.56 44.65-53.47 44.66-45.73 44.65-44.49 44.66-42.45 44.65-40.89 44.66-117.17 44.65-9 44.66-6.85 44.65-37.26 44.66-39.9 44.65-34.8 44.66-28.1 44.65-20.66 44.66-14.24 44.65-8.54 44.66-4.33 44.65-3.22 44.66-1.76 44.65-.67 44.66-.33" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.62">8 MiB/sec</text>
+    <text x="64.31" y="618.56" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.52">16 MiB/sec</text>
+    <text x="64.31" y="572.79" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.52">32 MiB/sec</text>
+    <text x="64.31" y="527.03" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.52">64 MiB/sec</text>
+    <text x="64.31" y="481.26" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">128 MiB/sec</text>
+    <text x="64.31" y="435.5" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
+    <text x="64.31" y="389.74" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
+    <text x="64.31" y="343.97" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
+    <text x="64.31" y="298.21" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
+    <text x="64.31" y="252.45" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
+    <text x="64.31" y="206.68" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
+    <text x="64.31" y="160.92" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
+    <text x="64.31" y="115.16" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">32 GiB/sec</text>
+    <text x="64.31" y="69.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">64 GiB/sec</text>
+    <path d="M66.5 661.16h2.74M66.5 615.4h2.74m-2.74-45.76h2.74m-2.74-45.77h2.74m-2.74-45.76h2.74m-2.74-45.76h2.74m-2.74-45.77h2.74m-2.74-45.76h2.74m-2.74-45.76h2.74m-2.74-45.77h2.74m-2.74-45.76h2.74m-2.74-45.76h2.74M66.5 112h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="113.9" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">1 B</text>
+    <text x="158.55" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">2 B</text>
+    <text x="203.21" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">4 B</text>
+    <text x="247.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">8 B</text>
+    <text x="292.52" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">16 B</text>
+    <text x="337.17" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">32 B</text>
+    <text x="381.83" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">64 B</text>
+    <text x="426.48" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">128 B</text>
+    <text x="471.14" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">256 B</text>
+    <text x="515.79" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">512 B</text>
+    <text x="560.45" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">1 KiB</text>
+    <text x="605.1" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">2 KiB</text>
+    <text x="649.76" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">4 KiB</text>
+    <text x="694.41" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">8 KiB</text>
+    <text x="739.07" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">16 KiB</text>
+    <text x="783.72" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">32 KiB</text>
+    <text x="828.38" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">64 KiB</text>
+    <text x="873.03" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">128 KiB</text>
+    <text x="917.69" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">256 KiB</text>
+    <text x="962.34" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">512 KiB</text>
+    <text x="1007" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">1 MiB</text>
+    <text x="1051.65" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">2 MiB</text>
+    <text x="560.45" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.25">Chunk Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.28" transform="rotate(-90 187.215 176.485)">Throughput</text>
+    <path d="M812.09 503.45h86.17v113.16h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="817.57" y="518.03" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M817.57 524.73h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="533.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M819.29 533.37h13.83" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 542.01h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="550.65" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M819.29 550.65h13.83" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 559.29h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="567.93" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <path d="M819.29 567.93h13.83" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 576.57h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="585.21" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <path d="M819.29 585.21h13.83" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 593.85h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="602.49" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <path d="M819.29 602.49h13.83" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="840.33" y="536.52" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="840.33" y="553.8" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="840.33" y="571.08" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="38.12">C (scalar)</text>
+    <text x="840.33" y="588.36" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="37.15">C (SSE2)</text>
+    <text x="840.33" y="605.64" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="37.15">C (AVX2)</text>
+    <text x="69.24" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="149.21">x86_64 (AMD Ryzen 9 3950X)</text>
+    <text x="69.24" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="433.35">[xxhash3_64] Throughput of a 1 MiB buffer by chunk size (higher is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash3_64-tiny_data-aarch64.svg b/compare/results/xxhash3_64-tiny_data-aarch64.svg
new file mode 100644
index 000000000..50e2a7f14
--- /dev/null
+++ b/compare/results/xxhash3_64-tiny_data-aarch64.svg
@@ -0,0 +1,126 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpMzkuOTJ8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M39.92 36.5h1011.74v654.42H39.92z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMzkuOTJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M39.92 36.5h1011.74v654.42H39.92z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M39.92 655.05h1011.73M39.92 543.38h1011.73M39.92 431.71h1011.73M39.92 320.05h1011.73M39.92 208.38h1011.73M39.92 96.72h1011.73M185.88 690.91V36.5m199.95 654.41V36.5m199.95 654.41V36.5m199.94 654.41V36.5m199.95 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M39.92 599.21h1011.73M39.92 487.55h1011.73M39.92 375.88h1011.73M39.92 264.22h1011.73M39.92 152.55h1011.73M39.92 40.88h1011.73M85.91 690.91V36.5m199.94 654.41V36.5M485.8 690.91V36.5m199.95 654.41V36.5M885.7 690.91V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="85.91" cy="657.93" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="657.71" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="657.75" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="613.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="648.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="648.07" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="648.63" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="603.96" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="661.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="661.16" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="661.07" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="613.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="658.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="658.33" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="658.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="603.7" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="629.9" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="630.02" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="629.92" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="595.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="579.24" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="579.15" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="579.18" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="569.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="525.37" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="525.37" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="525.69" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="540.46" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="477.56" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="477.06" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="478.38" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="513.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="513.35" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="514.29" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="513.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="478.23" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="489.33" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="489.2" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="490.06" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="458.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="472.01" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="472.42" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="472.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="439.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="451.04" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="452.22" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="452.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="419.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="429.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="431.18" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="430.42" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="399.67" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="409.16" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="412.29" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="411.63" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="379.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="388.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="391.65" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="390.24" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="361.45" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m85.91 613.59 7.99-9.63 16 9.63 27.99-9.89 47.99-8.22 99.97-26.26 119.97-28.76 131.97-26.55 87.97-35.68 59.99-19.62 59.98-19.18 79.98-20.25 59.99-19.51 59.98-19.83 59.99-18.39" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 657.93 7.99-9.35 16 12.49 27.99-2.78 47.99-28.39 99.97-50.66 119.97-53.87 131.97-47.81 87.97 35.79 59.99-24.02 59.98-17.32 79.98-20.97 59.99-21.51 59.98-20.37 59.99-20.72" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 657.71 7.99-9.64 16 13.09 27.99-2.83 47.99-28.31 99.97-50.87 119.97-53.78 131.97-48.31 87.97 37.23 59.99-25.09 59.98-16.78 79.98-20.2 59.99-21.04 59.98-18.89 59.99-20.64" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 657.75 7.99-9.12 16 12.44 27.99-2.82 47.99-28.33 99.97-50.74 119.97-53.49 131.97-47.31 87.97 35.23 59.99-23.55 59.98-17.81 79.98-19.61 59.99-22.22 59.98-18.79 59.99-21.39" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="34.99" y="602.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="34.99" y="490.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <text x="34.99" y="379.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">12ns</text>
+    <text x="34.99" y="267.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">16ns</text>
+    <text x="34.99" y="155.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">20ns</text>
+    <text x="34.99" y="44.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">24ns</text>
+    <path d="M37.18 599.21h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74M37.18 40.88h2.74m45.99 652.77v-2.74m199.94 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="85.91" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
+    <text x="285.85" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">50 B</text>
+    <text x="485.8" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">100 B</text>
+    <text x="685.75" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">150 B</text>
+    <text x="885.7" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">200 B</text>
+    <text x="545.79" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.4">Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="24.44" transform="rotate(-90 187.215 176.485)">Time</text>
+    <path d="M806.22 512.09h86.17v95.88h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="811.7" y="526.67" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M811.7 533.37h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="542.01" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M813.43 542.01h13.82" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 550.65h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="559.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M813.43 559.29h13.82" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 567.93h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="576.57" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <path d="M813.43 576.57h13.82" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 585.21h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="593.85" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <path d="M813.43 593.85h13.82" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="834.46" y="545.16" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="834.46" y="562.44" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="834.46" y="579.72" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="38.12">C (scalar)</text>
+    <text x="834.46" y="597" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="40.06">C (NEON)</text>
+    <text x="39.92" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="120.46">aarch64 (Apple M1 Max)</text>
+    <text x="39.92" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="363.66">[xxhash3_64] Hashing small amounts of bytes (lower is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash3_64-tiny_data-x86_64.svg b/compare/results/xxhash3_64-tiny_data-x86_64.svg
new file mode 100644
index 000000000..671c7e0d7
--- /dev/null
+++ b/compare/results/xxhash3_64-tiny_data-x86_64.svg
@@ -0,0 +1,146 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpMzkuOTJ8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M39.92 36.5h1011.74v654.42H39.92z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMzkuOTJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M39.92 36.5h1011.74v654.42H39.92z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M39.92 655.05h1011.73M39.92 543.38h1011.73M39.92 431.71h1011.73M39.92 320.05h1011.73M39.92 208.38h1011.73M39.92 96.72h1011.73M185.88 690.91V36.5m199.95 654.41V36.5m199.95 654.41V36.5m199.94 654.41V36.5m199.95 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M39.92 599.21h1011.73M39.92 487.55h1011.73M39.92 375.88h1011.73M39.92 264.22h1011.73M39.92 152.55h1011.73M39.92 40.88h1011.73M85.91 690.91V36.5m199.94 654.41V36.5M485.8 690.91V36.5m199.95 654.41V36.5M885.7 690.91V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="85.91" cy="653.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="641.93" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="641.56" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="653.79" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="404" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="641.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="641.17" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="640.66" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="641.2" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="403.86" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="646.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="646.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="646.73" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="646.57" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="399.63" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="650.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="650.59" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="650.31" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="650.75" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="401.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="636.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="636.98" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="636.86" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="637.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="384.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="602.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="600.63" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="600.78" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="602.81" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="380.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="567.63" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="566.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="569.66" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="567.63" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="365.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="536.34" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="536.04" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="536.76" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="537.39" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="335.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="417.23" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="416.03" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="416.44" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="417.67" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="274.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="394.46" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="395.38" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="396.18" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="396.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="248.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="365.98" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="370.26" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="366.51" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="367.9" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="199.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="338.45" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="338.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="338.92" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="342.48" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="184.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="315.43" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="315.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="316.41" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="314.44" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="137.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="286.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="287.01" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="286.84" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="286.71" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="125.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="261.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="260.46" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="260.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="260.29" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="66.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m85.91 404 7.99-.14 16-4.23 27.99 2.21 47.99-17.82 99.97-3.43 119.97-14.81 131.97-30.52 87.97-60.97 59.99-25.65 59.98-49.03 79.98-14.85 59.99-47.47 59.98-11.64 59.99-59.41" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 653.7 7.99-12.41 16 4.76 27.99 4.12 47.99-13.64 99.97-33.83 119.97-35.07 131.97-31.29 87.97-119.11 59.99-22.77 59.98-28.48 79.98-27.53 59.99-23.02 59.98-28.58 59.99-25.8" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 641.93 7.99-.76 16 4.92 27.99 4.5 47.99-13.61 99.97-36.35 119.97-33.8 131.97-30.79 87.97-120.01 59.99-20.65 59.98-25.12 79.98-32.17 59.99-23 59.98-28.08 59.99-26.55" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 653.79 7.99-12.59 16 5.37 27.99 4.18 47.99-13.59 99.97-34.35 119.97-35.18 131.97-30.24 87.97-119.72 59.99-21.06 59.98-28.71 79.98-25.42 59.99-28.04 59.98-27.73 59.99-26.42" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 641.56 7.99-.9 16 6.07 27.99 3.58 47.99-13.45 99.97-36.08 119.97-31.12 131.97-32.9 87.97-120.32 59.99-20.26 59.98-29.67 79.98-27.59 59.99-22.51 59.98-29.57 59.99-26.83" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="34.99" y="602.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="34.99" y="490.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <text x="34.99" y="379.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">12ns</text>
+    <text x="34.99" y="267.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">16ns</text>
+    <text x="34.99" y="155.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">20ns</text>
+    <text x="34.99" y="44.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">24ns</text>
+    <path d="M37.18 599.21h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74M37.18 40.88h2.74m45.99 652.77v-2.74m199.94 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="85.91" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
+    <text x="285.85" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">50 B</text>
+    <text x="485.8" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">100 B</text>
+    <text x="685.75" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">150 B</text>
+    <text x="885.7" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">200 B</text>
+    <text x="545.79" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.4">Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="24.44" transform="rotate(-90 187.215 176.485)">Time</text>
+    <path d="M806.22 503.45h86.17v113.16h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="811.7" y="518.03" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M811.7 524.73h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="533.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M813.43 533.37h13.82" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 542.01h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="550.65" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M813.43 550.65h13.82" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 559.29h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="567.93" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <path d="M813.43 567.93h13.82" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 576.57h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="585.21" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <path d="M813.43 585.21h13.82" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M811.7 593.85h17.28v17.28H811.7z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="820.34" cy="602.49" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <path d="M813.43 602.49h13.82" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="834.46" y="536.52" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="834.46" y="553.8" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="834.46" y="571.08" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="38.12">C (scalar)</text>
+    <text x="834.46" y="588.36" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="37.15">C (SSE2)</text>
+    <text x="834.46" y="605.64" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="37.15">C (AVX2)</text>
+    <text x="39.92" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="149.21">x86_64 (AMD Ryzen 9 3950X)</text>
+    <text x="39.92" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="363.66">[xxhash3_64] Hashing small amounts of bytes (lower is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash64-streaming-aarch64.svg b/compare/results/xxhash64-streaming-aarch64.svg
new file mode 100644
index 000000000..effd1c847
--- /dev/null
+++ b/compare/results/xxhash64-streaming-aarch64.svg
@@ -0,0 +1,116 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M69.24 36.5h982.41v654.42H69.24z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M69.24 661.16h982.41M69.24 576.18h982.41M69.24 491.19h982.41M69.24 406.2h982.41M69.24 321.21h982.41M69.24 236.22h982.41M69.24 151.23h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="1007" cy="172.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="172.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="172.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="172.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="172.28" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="172.19" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="171.73" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="172.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="172.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="172.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="172.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="172.7" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="173" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="172.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="172.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="172.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="173.54" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="173.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="175.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="174.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="177.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="176.45" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="182.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="180.5" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="191.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="186.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="208.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="200.32" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="234.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="224.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="272.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="261.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="362.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="354.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="406.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="413.32" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="528.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="533" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="568.02" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="569.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="627.87" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="635.1" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 635.1 44.65-65.67L203.21 533l44.65-119.68 44.66-59.1 44.65-92.48 44.66-36.76 44.65-24.66 44.66-13.41 44.65-6.41 44.66-4.05 44.65-1.92 44.66-1.11 44.65-.68 44.66-.02 44.65-.02 44.66-.4 44.65-.15 44.66.04 44.65.11 44.66-.08" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 627.87 44.65-59.85 44.66-39.62 44.65-121.99 44.66-43.99 44.65-90.13 44.66-38.12 44.65-25.43 44.66-16.93 44.65-8.89 44.66-5.18 44.65-2.32 44.66-1.88 44.65-.66 44.66.12 44.65-.79 44.66.17 44.65-.65 44.66.55 44.65.03 44.66.17" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
+    <text x="64.31" y="579.33" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
+    <text x="64.31" y="494.34" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
+    <text x="64.31" y="409.35" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
+    <text x="64.31" y="324.36" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
+    <text x="64.31" y="239.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
+    <text x="64.31" y="154.38" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
+    <text x="64.31" y="69.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">32 GiB/sec</text>
+    <path d="M66.5 661.16h2.74m-2.74-84.98h2.74m-2.74-84.99h2.74M66.5 406.2h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="113.9" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">1 B</text>
+    <text x="158.55" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">2 B</text>
+    <text x="203.21" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">4 B</text>
+    <text x="247.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">8 B</text>
+    <text x="292.52" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">16 B</text>
+    <text x="337.17" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">32 B</text>
+    <text x="381.83" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">64 B</text>
+    <text x="426.48" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">128 B</text>
+    <text x="471.14" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">256 B</text>
+    <text x="515.79" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">512 B</text>
+    <text x="560.45" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">1 KiB</text>
+    <text x="605.1" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">2 KiB</text>
+    <text x="649.76" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">4 KiB</text>
+    <text x="694.41" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">8 KiB</text>
+    <text x="739.07" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">16 KiB</text>
+    <text x="783.72" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">32 KiB</text>
+    <text x="828.38" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">64 KiB</text>
+    <text x="873.03" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">128 KiB</text>
+    <text x="917.69" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">256 KiB</text>
+    <text x="962.34" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">512 KiB</text>
+    <text x="1007" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">1 MiB</text>
+    <text x="1051.65" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">2 MiB</text>
+    <text x="560.45" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.25">Chunk Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.28" transform="rotate(-90 187.215 176.485)">Throughput</text>
+    <path d="M812.09 529.37h86.17v61.32h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="817.57" y="543.95" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M817.57 550.65h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="559.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M819.29 559.29h13.83" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 567.93h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="576.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M819.29 576.57h13.83" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="840.33" y="562.44" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="840.33" y="579.72" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="69.24" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="120.46">aarch64 (Apple M1 Max)</text>
+    <text x="69.24" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="418.67">[xxhash64] Throughput of a 1 MiB buffer by chunk size (higher is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash64-streaming-x86_64.svg b/compare/results/xxhash64-streaming-x86_64.svg
new file mode 100644
index 000000000..636c7eafc
--- /dev/null
+++ b/compare/results/xxhash64-streaming-x86_64.svg
@@ -0,0 +1,116 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M69.24 36.5h982.41v654.42H69.24z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M69.24 661.16h982.41M69.24 576.18h982.41M69.24 491.19h982.41M69.24 406.2h982.41M69.24 321.21h982.41M69.24 236.22h982.41M69.24 151.23h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="1007" cy="157.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="150.68" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="154.91" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="150.14" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="156.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="150.09" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="157.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="149.52" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="155.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="151.33" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="153.08" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="154.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="156.98" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="150.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="155.76" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="151.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="156.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="151.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="156.3" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="151.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="155.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="150.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="161.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="150.86" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="165.62" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="153.55" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="182.18" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="160.68" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="204.61" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="176" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="243.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="197.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="339.43" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="308.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="410.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="384.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="491.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="475.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="586.01" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="569.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="660.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="646.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 646.16 44.65-76.62 44.66-94.01 44.65-90.7 44.66-76.35 44.65-110.83L381.83 176l44.65-15.32 44.66-7.13 44.65-2.69 44.66-.75 44.65 1.63 44.66-.52 44.65.22 44.66-.73 44.65 3.72 44.66-3.1 44.65-1.81 44.66.57 44.65.05 44.66.54" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 660.85 44.65-74.84 44.66-94.44 44.65-81.18 44.66-70.96 44.65-96.05 44.66-38.77 44.65-22.43 44.66-16.56 44.65-4.23 44.66-5.6 44.65.51 44.66.11 44.65-.65 44.66 1.22 44.65-3.9 44.66 2.09 44.65 2.21 44.66-1.16 44.65-1.31 44.66 2.89" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
+    <text x="64.31" y="579.33" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
+    <text x="64.31" y="494.34" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
+    <text x="64.31" y="409.35" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
+    <text x="64.31" y="324.36" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
+    <text x="64.31" y="239.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
+    <text x="64.31" y="154.38" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
+    <text x="64.31" y="69.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">32 GiB/sec</text>
+    <path d="M66.5 661.16h2.74m-2.74-84.98h2.74m-2.74-84.99h2.74M66.5 406.2h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="113.9" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">1 B</text>
+    <text x="158.55" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">2 B</text>
+    <text x="203.21" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">4 B</text>
+    <text x="247.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">8 B</text>
+    <text x="292.52" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">16 B</text>
+    <text x="337.17" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">32 B</text>
+    <text x="381.83" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">64 B</text>
+    <text x="426.48" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">128 B</text>
+    <text x="471.14" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">256 B</text>
+    <text x="515.79" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">512 B</text>
+    <text x="560.45" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">1 KiB</text>
+    <text x="605.1" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">2 KiB</text>
+    <text x="649.76" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">4 KiB</text>
+    <text x="694.41" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.02">8 KiB</text>
+    <text x="739.07" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">16 KiB</text>
+    <text x="783.72" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">32 KiB</text>
+    <text x="828.38" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="25.92">64 KiB</text>
+    <text x="873.03" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">128 KiB</text>
+    <text x="917.69" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">256 KiB</text>
+    <text x="962.34" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="30.82">512 KiB</text>
+    <text x="1007" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">1 MiB</text>
+    <text x="1051.65" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="22.49">2 MiB</text>
+    <text x="560.45" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.25">Chunk Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="56.28" transform="rotate(-90 187.215 176.485)">Throughput</text>
+    <path d="M812.09 529.37h86.17v61.32h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="817.57" y="543.95" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M817.57 550.65h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="559.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M819.29 559.29h13.83" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M817.57 567.93h17.28v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="826.21" cy="576.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M819.29 576.57h13.83" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="840.33" y="562.44" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="840.33" y="579.72" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="69.24" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="149.21">x86_64 (AMD Ryzen 9 3950X)</text>
+    <text x="69.24" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="418.67">[xxhash64] Throughput of a 1 MiB buffer by chunk size (higher is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash64-tiny_data-aarch64.svg b/compare/results/xxhash64-tiny_data-aarch64.svg
new file mode 100644
index 000000000..d732a33eb
--- /dev/null
+++ b/compare/results/xxhash64-tiny_data-aarch64.svg
@@ -0,0 +1,120 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpMzUuMDJ8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M35.02 36.5h1016.63v654.42H35.02z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMzUuMDJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M35.02 36.5h1016.63v654.42H35.02z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M35.02 600.26h1016.63M35.02 440.79h1016.63M35.02 281.31h1016.63M35.02 121.84h1016.63M225.64 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M35.02 680h1016.63M35.02 520.52h1016.63M35.02 361.05h1016.63M35.02 201.57h1016.63M35.02 42.1h1016.63M81.23 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5m288.82 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="81.23" cy="475.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="81.23" cy="654.66" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="483.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="603.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="139" cy="453.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="139" cy="575.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="352.94" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="546.23" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="491.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="605.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="460.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="569.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="433.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="532.93" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="352.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="493.21" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="472.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="575.27" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="441.87" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="539.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="410.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="503.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="352.64" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="464.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="445.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="546.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="415.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="508.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="378.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="469.82" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="336.13" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="429.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="425.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="524.39" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="395.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="486.1" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="359.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="448" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="316.49" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="406.32" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="401.16" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="496.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="366.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="456.92" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="325.72" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="415.19" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="273.83" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="373.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="380.96" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="472.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="344.73" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="434.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="303.97" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="391.57" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="261.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="349.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="350.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="445.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="311.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="403.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="267.14" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="362.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="215.71" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="315.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="229.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="354.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m81.23 654.66 28.88-50.83L139 575.61l28.88-29.38 28.88 59.21 28.88-36.24 28.88-36.27 28.88-39.72 28.89 82.06 28.88-35.92 28.88-36.24 28.88-38.93 28.88 82.63 28.88-38.37 28.88-38.62 28.89-40.06 28.88 94.63 28.88-38.29L601.1 448l28.88-41.68 28.88 89.83 28.89-39.23 28.88-41.73 28.88-41.55 28.88 98.62 28.88-37.96 28.88-42.73 28.89-42.04 28.88 95.84 28.88-41.84 28.88-41.5 28.88-46.05 28.88 38.4" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m81.23 475.79 28.88 7.5L139 453.4l28.88-100.46 28.88 138.47 28.88-30.49 28.88-27.77 28.88-80.57 28.89 120.21 28.88-30.92 28.88-31.47 28.88-57.76 28.88 92.72 28.88-29.58 28.88-37.57 28.89-42.08 28.88 89.09 28.88-29.41 28.88-36.37 28.88-42.95 28.88 84.67 28.89-34.28 28.88-41.16 28.88-51.89 28.88 107.13 28.88-36.23 28.88-40.76 28.89-42.09 28.88 88.81 28.88-39.57 28.88-43.98 28.88-51.43 28.88 13.65" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="30.09" y="683.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="4.9">0</text>
+    <text x="30.09" y="523.68" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">2ns</text>
+    <text x="30.09" y="364.2" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="30.09" y="204.73" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">6ns</text>
+    <text x="30.09" y="45.25" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <path d="M32.28 680h2.74m-2.74-159.48h2.74m-2.74-159.47h2.74m-2.74-159.48h2.74M32.28 42.1h2.74m46.21 651.55v-2.74m288.82 2.74v-2.74m288.81 2.74v-2.74m288.82 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="81.23" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
+    <text x="370.05" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">10 B</text>
+    <text x="658.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">20 B</text>
+    <text x="947.68" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">30 B</text>
+    <text x="543.34" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.4">Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="24.44" transform="rotate(-90 187.215 176.485)">Time</text>
+    <path d="M805.24 529.37h86.17v61.32h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="810.72" y="543.95" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M810.72 550.65H828v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="819.36" cy="559.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M812.45 559.29h13.82" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M810.72 567.93H828v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="819.36" cy="576.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M812.45 576.57h13.82" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="833.48" y="562.44" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="833.48" y="579.72" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="35.02" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="120.46">aarch64 (Apple M1 Max)</text>
+    <text x="35.02" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="348.98">[xxhash64] Hashing small amounts of bytes (lower is better)</text>
+  </g>
+</svg>
diff --git a/compare/results/xxhash64-tiny_data-x86_64.svg b/compare/results/xxhash64-tiny_data-x86_64.svg
new file mode 100644
index 000000000..cdbb6396d
--- /dev/null
+++ b/compare/results/xxhash64-tiny_data-x86_64.svg
@@ -0,0 +1,120 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="960" class="svglite" viewBox="0 0 1080 720">
+  <defs>
+    <style>
+      <![CDATA[.svglite line,.svglite polyline,.svglite polygon,.svglite path,.svglite circle{fill:none;stroke:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00}.svglite text{white-space:pre}]]>
+    </style>
+  </defs>
+  <rect width="100%" height="100%" style="stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10.00;stroke:none;fill:#FFFFFF"/>
+  <defs>
+    <clipPath id="cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw">
+      <path d="M0 0h1080v720H0z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <path d="M0 0h1080v720H0z" style="stroke-width:1.07;stroke:#FFFFFF;fill:#FFFFFF"/>
+  </g>
+  <defs>
+    <clipPath id="cpMzUuMDJ8MTA1MS42NXwzNi41MHw2OTAuOTE=">
+      <path d="M35.02 36.5h1016.63v654.42H35.02z"/>
+    </clipPath>
+  </defs>
+  <g clip-path="url(#cpMzUuMDJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
+    <path d="M35.02 36.5h1016.63v654.42H35.02z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <path d="M35.02 600.26h1016.63M35.02 440.79h1016.63M35.02 281.31h1016.63M35.02 121.84h1016.63M225.64 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M35.02 680h1016.63M35.02 520.52h1016.63M35.02 361.05h1016.63M35.02 201.57h1016.63M35.02 42.1h1016.63M81.23 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5m288.82 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="81.23" cy="473.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="81.23" cy="661.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="452.32" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="571.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="139" cy="434.27" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="139" cy="539.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="396" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="500.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="479.14" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="561.86" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="441.51" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="502.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="400.62" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="455.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="355.13" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="409.87" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="427.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="519.87" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="401.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="452.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="346.96" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="407.55" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="283.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="347.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="384.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="462.46" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="351.56" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="404.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="299.27" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="351.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="246.99" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="291.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="371.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="437.31" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="327.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="348.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="266.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="329.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="231.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="286.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="312.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="359.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="253.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="298.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="196.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="234.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="151.61" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="177.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="312.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="371.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="264.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="321.01" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="206.03" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="254.12" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="142.27" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="189.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="252.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="314.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="196.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="233.28" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="118.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="167.92" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="66.24" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="90.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="105.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="150.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m81.23 661.16 28.88-89.4L139 539.83l28.88-39.09 28.88 61.12 28.88-59.56 28.88-46.71 28.88-45.72 28.89 110 28.88-67.51 28.88-44.81 28.88-60.19 28.88 115.1 28.88-58.43 28.88-52.83 28.89-59.64 28.88 145.75 28.88-88.83 28.88-19.18 28.88-42.58 28.88 72.84 28.89-61.54 28.88-63.6 28.88-56.58 28.88 193.19 28.88-50.02 28.88-66.89 28.89-64.75 28.88 125.46 28.88-81.55 28.88-65.36 28.88-77.28 28.88 60.07" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m81.23 473.78 28.88-21.46L139 434.27 167.88 396l28.88 83.14 28.88-37.63 28.88-40.89 28.88-45.49 28.89 72.35 28.88-26.12 28.88-54.4 28.88-63.56 28.88 101.38 28.88-33.22 28.88-52.29 28.89-52.28 28.88 124.21 28.88-43.4 28.88-60.88 28.88-35.11 28.88 80.98 28.89-59.59 28.88-56.52 28.88-45.07 28.88 161.31 28.88-48.24 28.88-58.65 28.89-63.76 28.88 109.85 28.88-55.38 28.88-78.59 28.88-51.91 28.88 39.2" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+  </g>
+  <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
+    <text x="30.09" y="683.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="4.9">0</text>
+    <text x="30.09" y="523.68" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">2ns</text>
+    <text x="30.09" y="364.2" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="30.09" y="204.73" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">6ns</text>
+    <text x="30.09" y="45.25" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <path d="M32.28 680h2.74m-2.74-159.48h2.74m-2.74-159.47h2.74m-2.74-159.48h2.74M32.28 42.1h2.74m46.21 651.55v-2.74m288.82 2.74v-2.74m288.81 2.74v-2.74m288.82 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="81.23" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
+    <text x="370.05" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">10 B</text>
+    <text x="658.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">20 B</text>
+    <text x="947.68" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">30 B</text>
+    <text x="543.34" y="714.73" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="21.4">Size</text>
+    <text lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="24.44" transform="rotate(-90 187.215 176.485)">Time</text>
+    <path d="M805.24 529.37h86.17v61.32h-86.17z" style="stroke-width:1.07;stroke:none;fill:#FFFFFF"/>
+    <text x="810.72" y="543.95" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="75.21">Implementation</text>
+    <path d="M810.72 550.65H828v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="819.36" cy="559.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="M812.45 559.29h13.82" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M810.72 567.93H828v17.28h-17.28z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
+    <circle cx="819.36" cy="576.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M812.45 576.57h13.82" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <text x="833.48" y="562.44" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="18.09">Rust</text>
+    <text x="833.48" y="579.72" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;font-family:&quot;Arial&quot;" textLength="6.35">C</text>
+    <text x="35.02" y="28.58" lengthAdjust="spacingAndGlyphs" style="font-size:11.00px;font-family:&quot;Arial&quot;" textLength="149.21">x86_64 (AMD Ryzen 9 3950X)</text>
+    <text x="35.02" y="12.3" lengthAdjust="spacingAndGlyphs" style="font-size:13.20px;font-family:&quot;Arial&quot;" textLength="348.98">[xxhash64] Hashing small amounts of bytes (lower is better)</text>
+  </g>
+</svg>
diff --git a/compare/svgo.config.js b/compare/svgo.config.js
new file mode 100644
index 000000000..1a7c31349
--- /dev/null
+++ b/compare/svgo.config.js
@@ -0,0 +1,12 @@
+module.exports = {
+  plugins: [
+    {
+      name: 'preset-default',
+      params: {
+        overrides: {
+          minifyStyles: false,
+        },
+      },
+    },
+  ],
+};

From b0b21c7b8a9acc316ddb6a2cbee80ec4ea6b4189 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 26 Aug 2024 21:04:54 -0400
Subject: [PATCH 142/166] test secret and seed

---
 compare/src/lib.rs     | 45 +++++++++++++++++++++++++++++++++++++++++-
 src/xxhash3_64.rs      | 32 ++++++++++++++++++++++++------
 xx_hash-sys/src/lib.rs | 37 ++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 7 deletions(-)

diff --git a/compare/src/lib.rs b/compare/src/lib.rs
index 44e171c45..a4bdbe0bd 100644
--- a/compare/src/lib.rs
+++ b/compare/src/lib.rs
@@ -237,10 +237,15 @@ mod xxhash3_64 {
         }
 
         #[test]
-        fn oneshot_with_a_secret(secret in prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024), data: Vec<u8>) {
+        fn oneshot_with_a_secret(secret in secret(), data: Vec<u8>) {
             oneshot_with_secret_impl(&secret, &data)?;
         }
 
+        #[test]
+        fn oneshot_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec<u8>) {
+            oneshot_with_seed_and_secret_impl(seed, &secret, &data)?;
+        }
+
         #[test]
         fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
             streaming_one_chunk_impl(seed, &data)?;
@@ -250,6 +255,11 @@ mod xxhash3_64 {
         fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
             streaming_one_chunk_impl(seed, &data[offset..])?;
         }
+
+        #[test]
+        fn streaming_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec<u8>) {
+            streaming_with_seed_and_secret_impl(seed, &secret, &data)?;
+        }
     }
 
     fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
@@ -298,6 +308,14 @@ mod xxhash3_64 {
         Ok(())
     }
 
+    fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data);
+        let rust = rust::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data).unwrap();
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
     fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
         let native = {
             let mut hasher = c::XxHash3_64::with_seed(seed);
@@ -314,6 +332,31 @@ mod xxhash3_64 {
         prop_assert_eq!(native, rust);
         Ok(())
     }
+
+    fn streaming_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret);
+            for chunk in data.chunks(256) {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash3_64::with_seed_and_secret(seed, secret).unwrap();
+            for chunk in data.chunks(256) {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn secret() -> impl Strategy<Value = Vec<u8>> {
+        prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024)
+    }
 }
 
 fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 504f9b0fe..217ebce56 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -40,6 +40,8 @@ const PRIME64_5: u64 = 0x27D4EB2F165667C5;
 const PRIME_MX1: u64 = 0x165667919E3779F9;
 const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
 
+const CUTOFF: usize = 240;
+
 const DEFAULT_SEED: u64 = 0;
 
 /// The length of the default secret.
@@ -91,7 +93,7 @@ impl Hasher {
 
         // We know that the secret will only be used if we have more
         // than 240 bytes, so don't waste time computing it otherwise.
-        if input.len() > 240 {
+        if input.len() > CUTOFF {
             derive_secret(seed, &mut secret);
         }
 
@@ -100,14 +102,32 @@ impl Hasher {
         impl_oneshot(secret, seed, input)
     }
 
-    /// Hash all data at once using the provided secret. If you can
-    /// use this function, you may see noticable speed gains for
-    /// certain types of input.
+    /// Hash all data at once using the provided secret and the
+    /// default seed. If you can use this function, you may see
+    /// noticable speed gains for certain types of input.
     #[inline]
     pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
         let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
         Ok(impl_oneshot(secret, DEFAULT_SEED, input))
     }
+
+    /// Hash all data at once using the provided seed and secret. If
+    /// you can use this function, you may see noticable speed gains
+    /// for certain types of input.
+    #[inline]
+    pub fn oneshot_with_seed_and_secret(
+        seed: u64,
+        secret: &[u8],
+        input: &[u8],
+    ) -> Result<u64, OneshotWithSecretError> {
+        let secret = if input.len() > CUTOFF {
+            Secret::new(secret).map_err(OneshotWithSecretError)?
+        } else {
+            DEFAULT_SECRET
+        };
+
+        Ok(impl_oneshot(secret, seed, input))
+    }
 }
 
 /// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][]
@@ -129,7 +149,7 @@ const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES;
 type Buffer = [u8; BUFFERED_BYTES];
 
 // Ensure that a full buffer always implies we are in the 241+ byte case.
-const _: () = assert!(BUFFERED_BYTES > 240);
+const _: () = assert!(BUFFERED_BYTES > CUTOFF);
 
 /// A buffer containing the secret bytes.
 ///
@@ -762,7 +782,7 @@ where
         assert_unchecked(buffer_usage <= buffer.len())
     };
 
-    if total_bytes >= 241 {
+    if total_bytes > CUTOFF {
         let input = &buffer[..buffer_usage];
 
         // Ingest final stripes
diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
index 6cf194ea1..cbe655c40 100644
--- a/xx_hash-sys/src/lib.rs
+++ b/xx_hash-sys/src/lib.rs
@@ -158,6 +158,13 @@ macro_rules! xxh3_64b_template {
                     secret: *const libc::c_void,
                     secret_length: libc::size_t,
                 ) -> crate::XXH64_hash_t;
+                fn [<$prefix _64bits_withSecretandSeed>](
+                    input: *const libc::c_void,
+                    length: libc::size_t,
+                    secret: *const libc::c_void,
+                    secret_length: libc::size_t,
+                    seed: crate::XXH64_hash_t,
+                ) -> crate::XXH64_hash_t;
 
                 fn [<$prefix _createState>]() -> *mut crate::XXH3_state_t;
                 fn [<$prefix _64bits_reset>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode;
@@ -165,6 +172,12 @@ macro_rules! xxh3_64b_template {
                     state: *mut crate::XXH3_state_t,
                     seed: crate::XXH64_hash_t,
                 ) -> crate::XXH_errorcode;
+                fn [<$prefix _64bits_reset_withSecretandSeed>](
+                    state: *mut crate::XXH3_state_t,
+                    secret: *const libc::c_void,
+                    secret_length: libc::size_t,
+                    seed: crate::XXH64_hash_t,
+                ) -> crate::XXH_errorcode;
                 fn [<$prefix _64bits_update>](
                     state: *mut crate::XXH3_state_t,
                     buffer: *const libc::c_void,
@@ -199,6 +212,19 @@ macro_rules! xxh3_64b_template {
                     }
                 }
 
+                #[inline]
+                pub fn oneshot_with_seed_and_secret(seed: u64, secret: &[u8], data: &[u8]) -> u64 {
+                    unsafe {
+                        [<$prefix _64bits_withSecretandSeed>](
+                            data.as_ptr().cast(),
+                            data.len(),
+                            secret.as_ptr().cast(),
+                            secret.len(),
+                            seed,
+                        )
+                    }
+                }
+
                 #[inline]
                 pub fn new() -> Self {
                     let state = unsafe {
@@ -221,6 +247,17 @@ macro_rules! xxh3_64b_template {
                     Self(state)
                 }
 
+                #[inline]
+                pub fn with_seed_and_secret(seed: u64, secret: &[u8]) -> Self {
+                    let state = unsafe {
+                        let state = [<$prefix _createState>]();
+                        [<$prefix _64bits_reset_withSecretandSeed>](state, secret.as_ptr().cast(), secret.len(), seed);
+                        state
+                    };
+
+                    Self(state)
+                }
+
                 #[inline]
                 pub fn write(&mut self, data: &[u8]) {
                     let retval =

From 1a2b6580e2011e256ff1d78f0b5eba1cda162ab6 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 7 Oct 2024 13:30:45 -0400
Subject: [PATCH 143/166] Rename xx-renu as twox-hash

---
 renu/Cargo.toml                   | 15 +++++++++++++--
 renu/asmasm/Cargo.toml            |  2 +-
 renu/asmasm/src/main.rs           |  2 +-
 renu/compare/Cargo.toml           |  3 +--
 renu/compare/benches/benchmark.rs |  2 +-
 renu/compare/src/lib.rs           |  4 ++--
 renu/renu-sum/Cargo.toml          |  2 +-
 renu/renu-sum/src/main.rs         |  2 +-
 renu/src/lib.rs                   | 10 +++++-----
 9 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/renu/Cargo.toml b/renu/Cargo.toml
index 5183cadfb..e62d39cd4 100644
--- a/renu/Cargo.toml
+++ b/renu/Cargo.toml
@@ -1,8 +1,19 @@
 [package]
-name = "xx-renu"
-version = "0.1.0"
+name = "twox-hash"
+version = "1.6.3"
+authors = ["Jake Goulding <jake.goulding@gmail.com>"]
 edition = "2021"
 
+description = "A Rust implementation of the XXHash and XXH3 algorithms"
+readme = "README.md"
+keywords = ["hash", "hasher", "xxhash", "xxh3"]
+categories = ["algorithms"]
+
+repository = "https://github.com/shepmaster/twox-hash"
+documentation = "https://docs.rs/twox-hash/"
+
+license = "MIT"
+
 [workspace]
 members = [
     "asmasm",
diff --git a/renu/asmasm/Cargo.toml b/renu/asmasm/Cargo.toml
index 511b782de..1f1f94fde 100644
--- a/renu/asmasm/Cargo.toml
+++ b/renu/asmasm/Cargo.toml
@@ -4,5 +4,5 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-xx-renu = { path = ".." }
+twox-hash = { path = ".." }
 xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/renu/asmasm/src/main.rs b/renu/asmasm/src/main.rs
index 0561ba274..db0f82d66 100644
--- a/renu/asmasm/src/main.rs
+++ b/renu/asmasm/src/main.rs
@@ -1,6 +1,6 @@
 use std::{hash::Hasher as _, time::Instant};
+use twox_hash::XxHash3_64;
 use xx_hash_sys::XxHash3_64 as C;
-use xx_renu::XxHash3_64;
 
 fn main() {
     let filename = std::env::args().nth(1).expect("filename");
diff --git a/renu/compare/Cargo.toml b/renu/compare/Cargo.toml
index b6495a7b3..bc6b0ee81 100644
--- a/renu/compare/Cargo.toml
+++ b/renu/compare/Cargo.toml
@@ -14,6 +14,5 @@ harness = false
 criterion = { version = "0.5.1", features = [] }
 proptest = "1.5.0"
 rand = "0.8.5"
-twox-hash = "1.6.3"
-xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
+twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
 xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/renu/compare/benches/benchmark.rs b/renu/compare/benches/benchmark.rs
index acf5ab8ec..961465ee0 100644
--- a/renu/compare/benches/benchmark.rs
+++ b/renu/compare/benches/benchmark.rs
@@ -4,8 +4,8 @@ use criterion::{
 use rand::{Rng, RngCore, SeedableRng};
 use std::{env::consts::ARCH, hash::Hasher as _, iter};
 
+use twox_hash as rust;
 use xx_hash_sys as c;
-use xx_renu as rust;
 
 const BIG_DATA_SIZE: usize = 4 * 1024 * 1024;
 const MIN_BIG_DATA_SIZE: usize = 256 * 1024;
diff --git a/renu/compare/src/lib.rs b/renu/compare/src/lib.rs
index a4bdbe0bd..24b28a85b 100644
--- a/renu/compare/src/lib.rs
+++ b/renu/compare/src/lib.rs
@@ -2,8 +2,8 @@
 
 use proptest::{num, prelude::*};
 
+use twox_hash as rust;
 use xx_hash_sys as c;
-use xx_renu as rust;
 
 mod xxhash32 {
     use proptest::{prelude::*, test_runner::TestCaseResult};
@@ -206,7 +206,7 @@ mod xxhash64 {
 mod xxhash3_64 {
     use proptest::{prelude::*, test_runner::TestCaseResult};
     use std::hash::Hasher as _;
-    use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH;
+    use twox_hash::xxhash3_64::SECRET_MINIMUM_LENGTH;
 
     use super::*;
 
diff --git a/renu/renu-sum/Cargo.toml b/renu/renu-sum/Cargo.toml
index 2677f2ea2..9de3c49a9 100644
--- a/renu/renu-sum/Cargo.toml
+++ b/renu/renu-sum/Cargo.toml
@@ -4,4 +4,4 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-xx-renu = { path = ".." }
+twox-hash = { path = ".." }
diff --git a/renu/renu-sum/src/main.rs b/renu/renu-sum/src/main.rs
index b86a5e26c..08bbcfdc5 100644
--- a/renu/renu-sum/src/main.rs
+++ b/renu/renu-sum/src/main.rs
@@ -7,7 +7,7 @@ use std::{
     sync::mpsc::{self, SendError},
     thread,
 };
-use xx_renu::XxHash3_64;
+use twox_hash::XxHash3_64;
 
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T, E = Error> = std::result::Result<T, E>;
diff --git a/renu/src/lib.rs b/renu/src/lib.rs
index 524a2cc5a..2ee51fb45 100644
--- a/renu/src/lib.rs
+++ b/renu/src/lib.rs
@@ -7,7 +7,7 @@
 //! ### When all the data is available at once
 //!
 //! ```rust
-//! use xx_renu::XxHash64;
+//! use twox_hash::XxHash64;
 //!
 //! let seed = 1234;
 //! let hash = XxHash64::oneshot(seed, b"some bytes");
@@ -18,7 +18,7 @@
 //!
 //! ```rust
 //! use std::hash::Hasher as _;
-//! use xx_renu::XxHash64;
+//! use twox_hash::XxHash64;
 //!
 //! let seed = 1234;
 //! let mut hasher = XxHash64::with_seed(seed);
@@ -35,7 +35,7 @@
 //!
 //! ```rust
 //! use std::{collections::HashMap, hash::BuildHasherDefault};
-//! use xx_renu::XxHash64;
+//! use twox_hash::XxHash64;
 //!
 //! let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
 //! hash.insert(42, "the answer");
@@ -46,7 +46,7 @@
 //!
 //! ```rust
 //! use std::collections::HashMap;
-//! use xx_renu::xxhash64;
+//! use twox_hash::xxhash64;
 //!
 //! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
 //! hash.insert(42, "the answer");
@@ -57,7 +57,7 @@
 //!
 //! ```rust
 //! use std::collections::HashMap;
-//! use xx_renu::xxhash64;
+//! use twox_hash::xxhash64;
 //!
 //! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
 //! hash.insert(42, "the answer");

From 58c05665dfdf5022034853e3a892eb3172f5b204 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 7 Oct 2024 13:56:32 -0400
Subject: [PATCH 144/166] Format code that slipped through

---
 renu/compare/src/lib.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/renu/compare/src/lib.rs b/renu/compare/src/lib.rs
index 24b28a85b..7701a8314 100644
--- a/renu/compare/src/lib.rs
+++ b/renu/compare/src/lib.rs
@@ -333,7 +333,11 @@ mod xxhash3_64 {
         Ok(())
     }
 
-    fn streaming_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult {
+    fn streaming_with_seed_and_secret_impl(
+        seed: u64,
+        secret: &[u8],
+        data: &[u8],
+    ) -> TestCaseResult {
         let native = {
             let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret);
             for chunk in data.chunks(256) {

From 1497bfcbc94ddc34d46b26460f2643ae5c875c98 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 7 Oct 2024 14:15:29 -0400
Subject: [PATCH 145/166] Set minimum Rust version

---
 renu/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/renu/Cargo.toml b/renu/Cargo.toml
index e62d39cd4..837af1d8e 100644
--- a/renu/Cargo.toml
+++ b/renu/Cargo.toml
@@ -3,6 +3,7 @@ name = "twox-hash"
 version = "1.6.3"
 authors = ["Jake Goulding <jake.goulding@gmail.com>"]
 edition = "2021"
+rust-version = "1.81"
 
 description = "A Rust implementation of the XXHash and XXH3 algorithms"
 readme = "README.md"

From 9e3510bc7d3c2480c487e4ffffa688cd9c423a2c Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 7 Oct 2024 14:30:35 -0400
Subject: [PATCH 146/166] Avoid inline assembly when testing with Miri

---
 renu/src/xxhash3_64/scalar.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/renu/src/xxhash3_64/scalar.rs b/renu/src/xxhash3_64/scalar.rs
index 3a91464b1..b7a996de5 100644
--- a/renu/src/xxhash3_64/scalar.rs
+++ b/renu/src/xxhash3_64/scalar.rs
@@ -33,7 +33,7 @@ impl Vector for Impl {
 }
 
 #[inline]
-#[cfg(not(target_arch = "aarch64"))]
+#[cfg(any(miri, not(target_arch = "aarch64")))]
 fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
     use super::IntoU64;
 
@@ -47,7 +47,7 @@ fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
 #[inline]
 // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
 // https://github.com/llvm/llvm-project/issues/98481
-#[cfg(target_arch = "aarch64")]
+#[cfg(all(not(miri), target_arch = "aarch64"))]
 fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
     let res;
 

From c7e348c69521dc862091de9ca3ce620b51769bd5 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 7 Oct 2024 14:32:28 -0400
Subject: [PATCH 147/166] Feature `std` implies `alloc`

---
 renu/Cargo.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/renu/Cargo.toml b/renu/Cargo.toml
index 837af1d8e..b0a4ce722 100644
--- a/renu/Cargo.toml
+++ b/renu/Cargo.toml
@@ -24,7 +24,7 @@ members = [
 ]
 
 [features]
-default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "alloc", "std"]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
 
 random = ["dep:rand"]
 
@@ -34,8 +34,8 @@ xxhash32 = []
 xxhash64 = []
 xxhash3_64 = []
 
-alloc = []
 std = ["alloc"]
+alloc = []
 
 [lints.rust.unexpected_cfgs]
 level = "warn"

From b664dd08b9a7c77b26b5d64eab89629cd53ea2b2 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Mon, 7 Oct 2024 14:40:10 -0400
Subject: [PATCH 148/166] Use correct functions for big- and little-endian

---
 renu/src/xxhash3_64.rs        | 22 +++++++++++-----------
 renu/src/xxhash3_64/scalar.rs |  6 +++---
 renu/src/xxhash3_64/secret.rs |  8 ++++----
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/renu/src/xxhash3_64.rs b/renu/src/xxhash3_64.rs
index 217ebce56..70c84d337 100644
--- a/renu/src/xxhash3_64.rs
+++ b/renu/src/xxhash3_64.rs
@@ -833,14 +833,14 @@ fn derive_secret(seed: u64, secret: &mut DefaultSecret) {
     let (pairs, _) = words.bp_as_chunks_mut();
 
     for [a_p, b_p] in pairs {
-        let a = u64::from_ne_bytes(*a_p);
-        let b = u64::from_ne_bytes(*b_p);
+        let a = u64::from_le_bytes(*a_p);
+        let b = u64::from_le_bytes(*b_p);
 
         let a = a.wrapping_add(seed);
         let b = b.wrapping_sub(seed);
 
-        *a_p = a.to_ne_bytes();
-        *b_p = b.to_ne_bytes();
+        *a_p = a.to_le_bytes();
+        *b_p = b.to_le_bytes();
     }
 }
 
@@ -1006,7 +1006,7 @@ fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 {
     #[inline]
     fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] {
         let (pair, _) = bytes.bp_as_chunks::<8>();
-        [pair[0], pair[1]].map(u64::from_ne_bytes)
+        [pair[0], pair[1]].map(u64::from_le_bytes)
     }
 
     let data_words = to_u64s(data);
@@ -1160,8 +1160,8 @@ where
         for i in 0..4 {
             // 64-bit by 64-bit multiplication to 128-bit full result
             let mul_result = {
-                let sa = u64::from_ne_bytes(secrets[i * 2]);
-                let sb = u64::from_ne_bytes(secrets[i * 2 + 1]);
+                let sa = u64::from_le_bytes(secrets[i * 2]);
+                let sb = u64::from_le_bytes(secrets[i * 2 + 1]);
 
                 let a = (acc[i * 2] ^ sa).into_u128();
                 let b = (acc[i * 2 + 1] ^ sb).into_u128();
@@ -1253,22 +1253,22 @@ trait U8SliceExt {
 impl U8SliceExt for [u8] {
     #[inline]
     fn first_u32(&self) -> Option<u32> {
-        self.first_chunk().copied().map(u32::from_ne_bytes)
+        self.first_chunk().copied().map(u32::from_le_bytes)
     }
 
     #[inline]
     fn last_u32(&self) -> Option<u32> {
-        self.last_chunk().copied().map(u32::from_ne_bytes)
+        self.last_chunk().copied().map(u32::from_le_bytes)
     }
 
     #[inline]
     fn first_u64(&self) -> Option<u64> {
-        self.first_chunk().copied().map(u64::from_ne_bytes)
+        self.first_chunk().copied().map(u64::from_le_bytes)
     }
 
     #[inline]
     fn last_u64(&self) -> Option<u64> {
-        self.last_chunk().copied().map(u64::from_ne_bytes)
+        self.last_chunk().copied().map(u64::from_le_bytes)
     }
 }
 
diff --git a/renu/src/xxhash3_64/scalar.rs b/renu/src/xxhash3_64/scalar.rs
index b7a996de5..64f6f9fa4 100644
--- a/renu/src/xxhash3_64/scalar.rs
+++ b/renu/src/xxhash3_64/scalar.rs
@@ -7,7 +7,7 @@ impl Vector for Impl {
     #[inline]
     fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
         let (last, _) = secret_end.bp_as_chunks();
-        let last = last.iter().copied().map(u64::from_ne_bytes);
+        let last = last.iter().copied().map(u64::from_le_bytes);
 
         for (acc, secret) in acc.iter_mut().zip(last) {
             *acc ^= *acc >> 47;
@@ -22,8 +22,8 @@ impl Vector for Impl {
         let (secret, _) = secret.bp_as_chunks();
 
         for i in 0..8 {
-            let stripe = u64::from_ne_bytes(stripe[i]);
-            let secret = u64::from_ne_bytes(secret[i]);
+            let stripe = u64::from_le_bytes(stripe[i]);
+            let secret = u64::from_le_bytes(secret[i]);
 
             let value = stripe ^ secret;
             acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
diff --git a/renu/src/xxhash3_64/secret.rs b/renu/src/xxhash3_64/secret.rs
index 9741fabb0..e7210b977 100644
--- a/renu/src/xxhash3_64/secret.rs
+++ b/renu/src/xxhash3_64/secret.rs
@@ -38,7 +38,7 @@ impl Secret {
         self.reassert_preconditions();
 
         let (q, _) = self.0[56..].bp_as_chunks();
-        [q[0], q[1]].map(u64::from_ne_bytes)
+        [q[0], q[1]].map(u64::from_le_bytes)
     }
 
     #[inline]
@@ -46,7 +46,7 @@ impl Secret {
         self.reassert_preconditions();
 
         let (q, _) = self.0.bp_as_chunks();
-        [q[0], q[1]].map(u32::from_ne_bytes)
+        [q[0], q[1]].map(u32::from_le_bytes)
     }
 
     #[inline]
@@ -54,7 +54,7 @@ impl Secret {
         self.reassert_preconditions();
 
         let (q, _) = self.0[8..].bp_as_chunks();
-        [q[0], q[1]].map(u64::from_ne_bytes)
+        [q[0], q[1]].map(u64::from_le_bytes)
     }
 
     #[inline]
@@ -62,7 +62,7 @@ impl Secret {
         self.reassert_preconditions();
 
         let (q, _) = self.0[24..].bp_as_chunks();
-        [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes)
+        [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes)
     }
 
     #[inline]

From 39d1b3962d25ac5904ad99c7e39024f387f69e06 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 9 Oct 2024 13:06:02 -0400
Subject: [PATCH 149/166] Overwrite original version with refresh

---
 .gitignore                                    |    4 +-
 .gitmodules                                   |    8 +-
 Cargo.toml                                    |   51 +-
 {renu/asmasm => asmasm}/Cargo.toml            |    0
 {renu/asmasm => asmasm}/src/main.rs           |    0
 renu/clippy.toml => clippy.toml               |    0
 {renu/compare => comparison}/.gitignore       |    0
 comparison/Cargo.toml                         |   25 +-
 comparison/README.md                          |  207 +-
 comparison/benches/bench.rs                   |  105 --
 .../benches/benchmark.rs                      |    0
 {renu/compare => comparison}/benchmark.sh     |    0
 comparison/build.rs                           |    3 -
 {renu/compare => comparison}/generate-graph.R |    0
 {renu/compare => comparison}/prepare-data.jq  |    0
 .../results/xxhash3_64-streaming-aarch64.svg  |    0
 .../results/xxhash3_64-streaming-x86_64.svg   |    0
 .../results/xxhash3_64-tiny_data-aarch64.svg  |    0
 .../results/xxhash3_64-tiny_data-x86_64.svg   |    0
 .../results/xxhash64-streaming-aarch64.svg    |    0
 .../results/xxhash64-streaming-x86_64.svg     |    0
 .../results/xxhash64-tiny_data-aarch64.svg    |    0
 .../results/xxhash64-tiny_data-x86_64.svg     |    0
 comparison/src/c_xxhash.rs                    |   52 -
 comparison/src/lib.rs                         |  416 +++-
 {renu/compare => comparison}/svgo.config.js   |    0
 comparison/xxHash                             |    1 -
 compatibility-tests/digest_0_10/Cargo.toml    |    9 -
 compatibility-tests/digest_0_10/src/lib.rs    |  142 --
 compatibility-tests/digest_0_8/Cargo.toml     |    9 -
 compatibility-tests/digest_0_8/src/lib.rs     |  130 --
 compatibility-tests/digest_0_9/Cargo.toml     |    9 -
 compatibility-tests/digest_0_9/src/lib.rs     |  142 --
 renu/.gitignore                               |    2 -
 renu/Cargo.toml                               |   57 -
 renu/README.md                                |   16 -
 renu/compare/Cargo.toml                       |   18 -
 renu/compare/README.md                        |  202 --
 renu/compare/src/lib.rs                       |  378 ----
 renu/src/lib.rs                               |  143 --
 src/bin/hash_file.rs                          |   28 -
 src/digest_0_10_support.rs                    |   92 -
 src/digest_0_9_support.rs                     |  179 --
 src/digest_support.rs                         |  179 --
 src/lib.rs                                    |  196 +-
 src/sixty_four.rs                             |  413 ----
 src/std_support.rs                            |  113 --
 src/thirty_two.rs                             |  416 ----
 src/xxh3.rs                                   | 1666 -----------------
 {renu/src => src}/xxhash32.rs                 |    0
 {renu/src => src}/xxhash3_64.rs               |    0
 {renu/src => src}/xxhash3_64/avx2.rs          |    0
 {renu/src => src}/xxhash3_64/neon.rs          |    0
 {renu/src => src}/xxhash3_64/scalar.rs        |    0
 {renu/src => src}/xxhash3_64/secret.rs        |    0
 {renu/src => src}/xxhash3_64/sse2.rs          |    0
 {renu/src => src}/xxhash64.rs                 |    0
 {renu/renu-sum => twox-hash-sum}/.gitignore   |    0
 {renu/renu-sum => twox-hash-sum}/Cargo.toml   |    2 +-
 {renu/renu-sum => twox-hash-sum}/src/main.rs  |    0
 {renu/xx_hash-sys => xx_hash-sys}/.gitignore  |    0
 {renu/xx_hash-sys => xx_hash-sys}/Cargo.toml  |    0
 {renu/xx_hash-sys => xx_hash-sys}/build.rs    |    0
 {renu/xx_hash-sys => xx_hash-sys}/src/lib.rs  |    0
 {renu/xx_hash-sys => xx_hash-sys}/xxHash      |    0
 65 files changed, 708 insertions(+), 4705 deletions(-)
 rename {renu/asmasm => asmasm}/Cargo.toml (100%)
 rename {renu/asmasm => asmasm}/src/main.rs (100%)
 rename renu/clippy.toml => clippy.toml (100%)
 rename {renu/compare => comparison}/.gitignore (100%)
 delete mode 100644 comparison/benches/bench.rs
 rename {renu/compare => comparison}/benches/benchmark.rs (100%)
 rename {renu/compare => comparison}/benchmark.sh (100%)
 delete mode 100644 comparison/build.rs
 rename {renu/compare => comparison}/generate-graph.R (100%)
 rename {renu/compare => comparison}/prepare-data.jq (100%)
 rename {renu/compare => comparison}/results/xxhash3_64-streaming-aarch64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash3_64-streaming-x86_64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash3_64-tiny_data-aarch64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash3_64-tiny_data-x86_64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash64-streaming-aarch64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash64-streaming-x86_64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash64-tiny_data-aarch64.svg (100%)
 rename {renu/compare => comparison}/results/xxhash64-tiny_data-x86_64.svg (100%)
 delete mode 100644 comparison/src/c_xxhash.rs
 rename {renu/compare => comparison}/svgo.config.js (100%)
 delete mode 160000 comparison/xxHash
 delete mode 100644 compatibility-tests/digest_0_10/Cargo.toml
 delete mode 100644 compatibility-tests/digest_0_10/src/lib.rs
 delete mode 100644 compatibility-tests/digest_0_8/Cargo.toml
 delete mode 100644 compatibility-tests/digest_0_8/src/lib.rs
 delete mode 100644 compatibility-tests/digest_0_9/Cargo.toml
 delete mode 100644 compatibility-tests/digest_0_9/src/lib.rs
 delete mode 100644 renu/.gitignore
 delete mode 100644 renu/Cargo.toml
 delete mode 100644 renu/README.md
 delete mode 100644 renu/compare/Cargo.toml
 delete mode 100644 renu/compare/README.md
 delete mode 100644 renu/compare/src/lib.rs
 delete mode 100644 renu/src/lib.rs
 delete mode 100644 src/bin/hash_file.rs
 delete mode 100644 src/digest_0_10_support.rs
 delete mode 100644 src/digest_0_9_support.rs
 delete mode 100644 src/digest_support.rs
 delete mode 100644 src/sixty_four.rs
 delete mode 100644 src/std_support.rs
 delete mode 100644 src/thirty_two.rs
 delete mode 100644 src/xxh3.rs
 rename {renu/src => src}/xxhash32.rs (100%)
 rename {renu/src => src}/xxhash3_64.rs (100%)
 rename {renu/src => src}/xxhash3_64/avx2.rs (100%)
 rename {renu/src => src}/xxhash3_64/neon.rs (100%)
 rename {renu/src => src}/xxhash3_64/scalar.rs (100%)
 rename {renu/src => src}/xxhash3_64/secret.rs (100%)
 rename {renu/src => src}/xxhash3_64/sse2.rs (100%)
 rename {renu/src => src}/xxhash64.rs (100%)
 rename {renu/renu-sum => twox-hash-sum}/.gitignore (100%)
 rename {renu/renu-sum => twox-hash-sum}/Cargo.toml (79%)
 rename {renu/renu-sum => twox-hash-sum}/src/main.rs (100%)
 rename {renu/xx_hash-sys => xx_hash-sys}/.gitignore (100%)
 rename {renu/xx_hash-sys => xx_hash-sys}/Cargo.toml (100%)
 rename {renu/xx_hash-sys => xx_hash-sys}/build.rs (100%)
 rename {renu/xx_hash-sys => xx_hash-sys}/src/lib.rs (100%)
 rename {renu/xx_hash-sys => xx_hash-sys}/xxHash (100%)

diff --git a/.gitignore b/.gitignore
index a9d37c560..1b72444ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
-target
-Cargo.lock
+/Cargo.lock
+/target
diff --git a/.gitmodules b/.gitmodules
index 7ae1d298f..532b0627b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,7 +1,3 @@
-[submodule "comparison/xxHash"]
-	path = comparison/xxHash
-	url = https://github.com/Cyan4973/xxHash.git
-
-[submodule "renu/xx_hash-sys/xxHash"]
-        path = renu/xx_hash-sys/xxHash
+[submodule "xxHash"]
+	path = xx_hash-sys/xxHash
         url = https://github.com/Cyan4973/xxHash.git
diff --git a/Cargo.toml b/Cargo.toml
index 57937abd9..426cd074e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,8 @@
 name = "twox-hash"
 version = "1.6.3"
 authors = ["Jake Goulding <jake.goulding@gmail.com>"]
-edition = "2018"
+edition = "2021"
+rust-version = "1.81"
 
 description = "A Rust implementation of the XXHash and XXH3 algorithms"
 readme = "README.md"
@@ -14,19 +15,43 @@ documentation = "https://docs.rs/twox-hash/"
 
 license = "MIT"
 
+[workspace]
+members = [
+    "asmasm",
+    "comparison",
+    "twox-hash-sum",
+    "xx_hash-sys",
+]
+
+[features]
+default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
+
+random = ["dep:rand"]
+
+serialize = ["dep:serde"]
+
+xxhash32 = []
+xxhash64 = []
+xxhash3_64 = []
+
+std = ["alloc"]
+alloc = []
+
+[lints.rust.unexpected_cfgs]
+level = "warn"
+check-cfg = [
+    'cfg(_internal_xxhash3_force_scalar)',
+    'cfg(_internal_xxhash3_force_neon)',
+    'cfg(_internal_xxhash3_force_sse2)',
+    'cfg(_internal_xxhash3_force_avx2)',
+]
+
 [dependencies]
-cfg-if = { version = ">= 0.1, < 2", default-features = false }
-static_assertions = { version = "1.0", default-features = false }
-rand = { version = ">= 0.3.10, < 0.9", optional = true }
-serde = { version = "1.0", features = ["derive"], optional = true}
-digest = { package = "digest", version = "0.8", default-features = false, optional = true  }
-digest_0_9 = { package = "digest", version = "0.9", default-features = false, optional = true }
-digest_0_10 = { package = "digest", version = "0.10", default-features = false, optional = true }
+rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
+serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
 
 [dev-dependencies]
-serde_json = "1.0"
+serde_json = "1.0.117"
 
-[features]
-default = ["std"]
-serialize = ["serde"]
-std = ["rand"]
+[package.metadata.docs.rs]
+all-features = true
diff --git a/renu/asmasm/Cargo.toml b/asmasm/Cargo.toml
similarity index 100%
rename from renu/asmasm/Cargo.toml
rename to asmasm/Cargo.toml
diff --git a/renu/asmasm/src/main.rs b/asmasm/src/main.rs
similarity index 100%
rename from renu/asmasm/src/main.rs
rename to asmasm/src/main.rs
diff --git a/renu/clippy.toml b/clippy.toml
similarity index 100%
rename from renu/clippy.toml
rename to clippy.toml
diff --git a/renu/compare/.gitignore b/comparison/.gitignore
similarity index 100%
rename from renu/compare/.gitignore
rename to comparison/.gitignore
diff --git a/comparison/Cargo.toml b/comparison/Cargo.toml
index b4168647d..80cc76cb6 100644
--- a/comparison/Cargo.toml
+++ b/comparison/Cargo.toml
@@ -1,23 +1,18 @@
 [package]
-authors = ["Jake Goulding <jake.goulding@gmail.com>"]
 name = "comparison"
 version = "0.1.0"
-edition = "2018"
+edition = "2021"
+
+[lib]
+bench = false
 
 [[bench]]
-name = "bench"
+name = "benchmark"
 harness = false
 
 [dependencies]
-fnv = "1.0"
-rand = "0.6.5"
-twox-hash = { path = ".." }
-libc = "0.2.53"
-criterion = "0.2.11"
-proptest = "0.9.3"
-
-[features]
-bench = []
-
-[build-dependencies]
-cc = "1.0.36"
+criterion = { version = "0.5.1", features = [] }
+proptest = "1.5.0"
+rand = "0.8.5"
+twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
+xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/comparison/README.md b/comparison/README.md
index 68c29ee51..efb868e2b 100644
--- a/comparison/README.md
+++ b/comparison/README.md
@@ -1,7 +1,202 @@
-This is just a crate for sanity checks and performance tests. Pay no
-attention to the man behind the curtain.
+# Overview
 
-```
-cargo test
-cargo bench
-```
+Tests compare calling [the reference implementation in
+C](https://xxhash.com) against equivalent functions in this crate. No
+link-time optimization (LTO) is used, so the C performance numbers
+have additional overhead for each function call.
+
+Click any graph to see it full-size.
+
+# XXHash64
+
+## Oneshot hashing
+
+Compares the **speed** of hashing an entire buffer of data in one
+function call. Data sizes from 256 KiB to 4 MiB are tested. These
+graphs are boring flat lines, so a table is used instead.
+
+### aarch64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 13.4               |
+| C              | 13.4               |
+
+## x86_64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 15.7               |
+| C              | 15.8               |
+
+
+## Streaming data
+
+Compares the **speed** of hashing a 1 MiB buffer of data split into
+various chunk sizes.
+
+### aarch64
+
+<a href="./results/xxhash64-streaming-aarch64.svg">
+  <img
+    src="./results/xxhash64-streaming-aarch64.svg"
+    alt="XXHash64, streaming data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash64-streaming-x86_64.svg">
+  <img
+    src="./results/xxhash64-streaming-x86_64.svg"
+    alt="XXHash64, streaming data, on an x86_64 processor"
+    />
+</a>
+
+## Small amounts of data
+
+Compares the **time taken** to hash 0 to 32 bytes of data.
+
+### aarch64
+
+<a href="./results/xxhash64-tiny_data-aarch64.svg">
+  <img
+    src="./results/xxhash64-tiny_data-aarch64.svg"
+    alt="XXHash64, small data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash64-tiny_data-x86_64.svg">
+  <img
+    src="./results/xxhash64-tiny_data-x86_64.svg"
+    alt="XXHash64, small data, on an x86_64 processor"
+    />
+</a>
+
+
+# XXHash3 (64-bit)
+
+## Oneshot hashing
+
+Compares the **speed** of hashing an entire buffer of data in one
+function call. Data sizes from 256 KiB to 4 MiB are tested. These
+graphs are boring flat lines, so a table is used instead.
+
+### aarch64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 34.8               |
+| C              | 34.8               |
+| C (scalar)     | 21.0               |
+| C (NEON)       | 34.7               |
+
+### x86_64
+
+| Implementation | Throughput (GiB/s) |
+|----------------|--------------------|
+| Rust           | 58.3               |
+| C              | 25.0               |
+| C (scalar)     | 7.5                |
+| C (SSE2)       | 25.1               |
+| C (AVX2)       | 58.1               |
+
+## Streaming data
+
+Compares the **speed** of hashing a 1 MiB buffer of data split into
+various chunk sizes.
+
+### aarch64
+
+<a href="./results/xxhash3_64-streaming-aarch64.svg">
+  <img
+    src="./results/xxhash3_64-streaming-aarch64.svg"
+    alt="XXHash3, 64-bit, streaming data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash3_64-streaming-x86_64.svg">
+  <img
+    src="./results/xxhash3_64-streaming-x86_64.svg"
+    alt="XXHash3, 64-bit, streaming data, on an x86_64 processor"
+    />
+</a>
+
+## Small amounts of data
+
+Compares the **time taken** to hash 0 to 230 bytes of
+data. Representative samples are taken from similar times to avoid
+cluttering the graph and wasting benchmarking time.
+
+### aarch64
+
+<a href="./results/xxhash3_64-tiny_data-aarch64.svg">
+  <img
+    src="./results/xxhash3_64-tiny_data-aarch64.svg"
+    alt="XXHash3, 64-bit, small data, on an aarch64 processor"
+    />
+</a>
+
+### x86_64
+
+<a href="./results/xxhash3_64-tiny_data-x86_64.svg">
+  <img
+    src="./results/xxhash3_64-tiny_data-x86_64.svg"
+    alt="XXHash3, 64-bit, small data, on an x86_64 processor"
+    />
+</a>
+
+# Benchmark machines
+
+## Overview
+
+| CPU               | Memory | C compiler         |
+|-------------------|--------|--------------------|
+| Apple M1 Max      | 64 GiB | clang 15.0.0       |
+| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 |
+
+Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
+
+## Details
+
+### aarch64
+
+<table>
+  <tr>
+    <th>CPU</th>
+    <td>Apple M1 Max</td>
+  </tr>
+
+  <tr>
+    <th>Memory</th>
+    <td>64 GiB</td>
+  </tr>
+
+  <tr>
+    <th>C compiler</th>
+    <td>Apple clang version 15.0.0 (clang-1500.3.9.4)</td>
+  </tr>
+</table>
+
+### x86_64
+
+<table>
+  <tr>
+    <th>CPU</th>
+    <td>AMD Ryzen 9 3950X 16-Core Processor, 3501 Mhz, 16 Core(s), 32 Logical Processor(s)</td>
+  </tr>
+
+  <tr>
+    <th>Memory</th>
+    <td>32 GiB (3600 MT/s)</td>
+  </tr>
+
+  <tr>
+    <th>C compiler</th>
+    <td>Microsoft (R) C/C++ Optimizing Compiler Version 19.41.34120 for x86</td>
+  </tr>
+</table>
diff --git a/comparison/benches/bench.rs b/comparison/benches/bench.rs
deleted file mode 100644
index 5deee9790..000000000
--- a/comparison/benches/bench.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-#![deny(rust_2018_idioms)]
-
-use comparison::{
-    c_xxhash::{hash32, hash64, xxh3_hash128, xxh3_hash64},
-    hash_once,
-};
-use criterion::{
-    criterion_group, criterion_main, AxisScale, Bencher, Criterion, ParameterizedBenchmark,
-    PlotConfiguration, Throughput,
-};
-use fnv::FnvHasher;
-use rand::{distributions::Standard, rngs::StdRng, Rng, SeedableRng};
-use std::{collections::hash_map::DefaultHasher, env, fmt, hash::Hasher, ops};
-use twox_hash::{xxh3, XxHash32, XxHash64};
-
-const INPUT_SIZES: &[usize] = &[0, 1, 4, 16, 23, 32, 47, 128, 256, 512, 1024, 1024 * 1024];
-const OFFSETS: &[usize] = &[0, 1];
-
-fn bench_hasher<H>(hasher: impl Fn() -> H) -> impl FnMut(&mut Bencher, &Data)
-where
-    H: Hasher,
-{
-    move |b, data| b.iter(|| hash_once(hasher(), data))
-}
-
-fn bench_c<R>(hasher: impl Fn(&[u8]) -> R) -> impl FnMut(&mut Bencher, &Data) {
-    move |b, data| b.iter(|| hasher(data))
-}
-
-fn bench_everything(c: &mut Criterion) {
-    let seed: u64 = env::var("RANDOM_SEED")
-        .ok()
-        .and_then(|v| v.parse().ok())
-        .unwrap_or_else(rand::random);
-    eprintln!("Using RANDOM_SEED={}", seed);
-
-    let data: Vec<_> = OFFSETS
-        .iter()
-        .flat_map(|&o| {
-            INPUT_SIZES
-                .iter()
-                .map(|&l| Data::new(l, seed, o))
-                .collect::<Vec<_>>()
-        })
-        .collect();
-
-    let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Logarithmic);
-
-    let bench =
-        ParameterizedBenchmark::new("XxHash64", bench_hasher(|| XxHash64::with_seed(0)), data)
-            .with_function("XxHash32", bench_hasher(|| XxHash32::with_seed(0)))
-            .with_function("XxHash64 (C)", bench_c(|d| hash64(d, 0)))
-            .with_function("XxHash32 (C)", bench_c(|d| hash32(d, 0)))
-            .with_function("xxh3::Hash64", bench_hasher(|| xxh3::Hash64::with_seed(0)))
-            .with_function(
-                "xxh3::Hash128",
-                bench_hasher(|| xxh3::Hash128::with_seed(0)),
-            )
-            .with_function("xxh3::hash64", bench_c(|d| xxh3::hash64(d)))
-            .with_function("xxh3::hash128", bench_c(|d| xxh3::hash128(d)))
-            .with_function("xxh3::hash64 (C)", bench_c(|d| xxh3_hash64(d, 0)))
-            .with_function("xxh3::hash128 (C)", bench_c(|d| xxh3_hash128(d, 0)))
-            .with_function("DefaultHasher", bench_hasher(|| DefaultHasher::new()))
-            .with_function("FnvHasher", bench_hasher(|| FnvHasher::default()))
-            .throughput(|data| Throughput::Elements(data.0.len() as u32))
-            .plot_config(plot_config);
-
-    c.bench("All Hashers", bench);
-}
-
-struct Data(Vec<u8>, usize);
-
-impl Data {
-    fn new(len: usize, seed: u64, offset: usize) -> Self {
-        let mut rng = StdRng::seed_from_u64(seed);
-        let data = rng.sample_iter(&Standard).take(len + offset).collect();
-        Self(data, offset)
-    }
-
-    #[inline]
-    fn len(&self) -> usize {
-        self.0.len() - self.offset()
-    }
-
-    #[inline]
-    fn offset(&self) -> usize {
-        self.1
-    }
-}
-
-impl ops::Deref for Data {
-    type Target = [u8];
-    fn deref(&self) -> &[u8] {
-        &self.0[self.offset()..]
-    }
-}
-
-impl fmt::Debug for Data {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{} bytes/offset {}", self.len(), self.offset())
-    }
-}
-
-criterion_group!(benches, bench_everything);
-criterion_main!(benches);
diff --git a/renu/compare/benches/benchmark.rs b/comparison/benches/benchmark.rs
similarity index 100%
rename from renu/compare/benches/benchmark.rs
rename to comparison/benches/benchmark.rs
diff --git a/renu/compare/benchmark.sh b/comparison/benchmark.sh
similarity index 100%
rename from renu/compare/benchmark.sh
rename to comparison/benchmark.sh
diff --git a/comparison/build.rs b/comparison/build.rs
deleted file mode 100644
index b8c1ef7a5..000000000
--- a/comparison/build.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    cc::Build::new().file("xxHash/xxhash.c").compile("xxhash");
-}
diff --git a/renu/compare/generate-graph.R b/comparison/generate-graph.R
similarity index 100%
rename from renu/compare/generate-graph.R
rename to comparison/generate-graph.R
diff --git a/renu/compare/prepare-data.jq b/comparison/prepare-data.jq
similarity index 100%
rename from renu/compare/prepare-data.jq
rename to comparison/prepare-data.jq
diff --git a/renu/compare/results/xxhash3_64-streaming-aarch64.svg b/comparison/results/xxhash3_64-streaming-aarch64.svg
similarity index 100%
rename from renu/compare/results/xxhash3_64-streaming-aarch64.svg
rename to comparison/results/xxhash3_64-streaming-aarch64.svg
diff --git a/renu/compare/results/xxhash3_64-streaming-x86_64.svg b/comparison/results/xxhash3_64-streaming-x86_64.svg
similarity index 100%
rename from renu/compare/results/xxhash3_64-streaming-x86_64.svg
rename to comparison/results/xxhash3_64-streaming-x86_64.svg
diff --git a/renu/compare/results/xxhash3_64-tiny_data-aarch64.svg b/comparison/results/xxhash3_64-tiny_data-aarch64.svg
similarity index 100%
rename from renu/compare/results/xxhash3_64-tiny_data-aarch64.svg
rename to comparison/results/xxhash3_64-tiny_data-aarch64.svg
diff --git a/renu/compare/results/xxhash3_64-tiny_data-x86_64.svg b/comparison/results/xxhash3_64-tiny_data-x86_64.svg
similarity index 100%
rename from renu/compare/results/xxhash3_64-tiny_data-x86_64.svg
rename to comparison/results/xxhash3_64-tiny_data-x86_64.svg
diff --git a/renu/compare/results/xxhash64-streaming-aarch64.svg b/comparison/results/xxhash64-streaming-aarch64.svg
similarity index 100%
rename from renu/compare/results/xxhash64-streaming-aarch64.svg
rename to comparison/results/xxhash64-streaming-aarch64.svg
diff --git a/renu/compare/results/xxhash64-streaming-x86_64.svg b/comparison/results/xxhash64-streaming-x86_64.svg
similarity index 100%
rename from renu/compare/results/xxhash64-streaming-x86_64.svg
rename to comparison/results/xxhash64-streaming-x86_64.svg
diff --git a/renu/compare/results/xxhash64-tiny_data-aarch64.svg b/comparison/results/xxhash64-tiny_data-aarch64.svg
similarity index 100%
rename from renu/compare/results/xxhash64-tiny_data-aarch64.svg
rename to comparison/results/xxhash64-tiny_data-aarch64.svg
diff --git a/renu/compare/results/xxhash64-tiny_data-x86_64.svg b/comparison/results/xxhash64-tiny_data-x86_64.svg
similarity index 100%
rename from renu/compare/results/xxhash64-tiny_data-x86_64.svg
rename to comparison/results/xxhash64-tiny_data-x86_64.svg
diff --git a/comparison/src/c_xxhash.rs b/comparison/src/c_xxhash.rs
deleted file mode 100644
index 0fa9284c7..000000000
--- a/comparison/src/c_xxhash.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-mod ffi {
-    use libc::{c_void, size_t};
-
-    #[allow(non_camel_case_types)]
-    type XXH32_hash_t = u32;
-
-    #[allow(non_camel_case_types)]
-    type XXH64_hash_t = u64;
-
-    #[allow(non_camel_case_types)]
-    #[repr(C)]
-    #[derive(Debug, Copy, Clone)]
-    pub struct XXH128_hash_t {
-        pub low64: XXH64_hash_t,
-        pub high64: XXH64_hash_t,
-    }
-
-    extern "C" {
-        pub fn XXH32(input: *const c_void, length: size_t, seed: u32) -> XXH32_hash_t;
-        pub fn XXH64(input: *const c_void, length: size_t, seed: u64) -> XXH64_hash_t;
-        pub fn XXH3_64bits_withSeed(
-            data: *const ::std::os::raw::c_void,
-            len: usize,
-            seed: ::std::os::raw::c_ulonglong,
-        ) -> XXH64_hash_t;
-        pub fn XXH3_128bits_withSeed(
-            data: *const ::std::os::raw::c_void,
-            len: usize,
-            seed: ::std::os::raw::c_ulonglong,
-        ) -> XXH128_hash_t;
-    }
-}
-
-pub fn hash32(data: &[u8], seed: u32) -> u32 {
-    unsafe { ffi::XXH32(data.as_ptr() as *const libc::c_void, data.len(), seed) }
-}
-
-pub fn hash64(data: &[u8], seed: u64) -> u64 {
-    unsafe { ffi::XXH64(data.as_ptr() as *const libc::c_void, data.len(), seed) }
-}
-
-pub fn xxh3_hash64(data: &[u8], seed: u64) -> u64 {
-    unsafe { ffi::XXH3_64bits_withSeed(data.as_ptr() as *const libc::c_void, data.len(), seed) }
-}
-
-pub fn xxh3_hash128(data: &[u8], seed: u64) -> u128 {
-    let hash = unsafe {
-        ffi::XXH3_128bits_withSeed(data.as_ptr() as *const libc::c_void, data.len(), seed)
-    };
-
-    u128::from(hash.low64) + (u128::from(hash.high64) << 64)
-}
diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs
index 4a0022ba3..7701a8314 100644
--- a/comparison/src/lib.rs
+++ b/comparison/src/lib.rs
@@ -1,104 +1,378 @@
-#![deny(rust_2018_idioms)]
+#![cfg(test)]
 
-use proptest::{collection::vec as propvec, prelude::*};
-use std::hash::Hasher;
-#[cfg(test)]
-use twox_hash::{XxHash32, XxHash64};
+use proptest::{num, prelude::*};
 
-pub mod c_xxhash;
+use twox_hash as rust;
+use xx_hash_sys as c;
 
-pub fn hash_once(mut hasher: impl Hasher, data: &[u8]) -> u64 {
-    hasher.write(&data);
-    hasher.finish()
-}
+mod xxhash32 {
+    use proptest::{prelude::*, test_runner::TestCaseResult};
+    use std::hash::Hasher as _;
+
+    use super::*;
+
+    proptest! {
+        #[test]
+        fn oneshot_same_as_one_chunk(seed: u32, data: Vec<u8>) {
+            oneshot_same_as_one_chunk_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_same_as_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
+            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn oneshot_same_as_many_chunks(seed: u32, (data, chunks) in data_and_chunks()) {
+            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        }
+
+        #[test]
+        fn oneshot(seed: u32, data: Vec<u8>) {
+            oneshot_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
+            oneshot_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn streaming_one_chunk(seed: u32, data: Vec<u8>) {
+            streaming_one_chunk_impl(seed, &data)?;
+        }
 
-#[cfg(test)]
-fn hash_by_chunks(mut hasher: impl Hasher, mut data: &[u8], chunk_sizes: &[usize]) -> u64 {
-    for &chunk_size in chunk_sizes {
-        let (this_chunk, remaining) = data.split_at(chunk_size);
-        hasher.write(this_chunk);
-        data = remaining;
+        #[test]
+        fn streaming_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
+            streaming_one_chunk_impl(seed, &data[offset..])?;
+        }
     }
 
-    hasher.finish()
-}
+    fn oneshot_same_as_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult {
+        let oneshot = rust::XxHash32::oneshot(seed, data);
+        let one_chunk = {
+            let mut hasher = rust::XxHash32::with_seed(seed);
+            hasher.write(data);
+            hasher.finish_32()
+        };
 
-prop_compose! {
-    fn data_and_offset
-        ()
-        (data in any::<Vec<u8>>())
-        (index in 0..=data.len(), data in Just(data))
-         -> (Vec<u8>, usize)
-    {
-        (data, index)
+        prop_assert_eq!(oneshot, one_chunk);
+        Ok(())
+    }
+
+    fn oneshot_same_as_many_chunks_impl(
+        seed: u32,
+        data: &[u8],
+        chunks: &[Vec<u8>],
+    ) -> TestCaseResult {
+        let oneshot = rust::XxHash32::oneshot(seed, data);
+        let many_chunks = {
+            let mut hasher = rust::XxHash32::with_seed(seed);
+            for chunk in chunks {
+                hasher.write(chunk);
+            }
+            hasher.finish_32()
+        };
+
+        prop_assert_eq!(oneshot, many_chunks);
+        Ok(())
+    }
+
+    fn oneshot_impl(seed: u32, data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash32::oneshot(seed, data);
+        let rust = rust::XxHash32::oneshot(seed, data);
+
+        prop_assert_eq!(native, rust);
+        Ok(())
     }
-}
 
-prop_compose! {
-    fn data_and_chunk_sizes
-        ()
-        (sizes in propvec(1..=256usize, 0..=100))
-        (data in propvec(any::<u8>(), sizes.iter().sum::<usize>()), sizes in Just(sizes))
-         -> (Vec<u8>, Vec<usize>)
-    {
-        (data, sizes)
+    fn streaming_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash32::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash32::with_seed(seed);
+            hasher.write(data);
+            hasher.finish_32()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
     }
 }
 
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(100_000))]
+mod xxhash64 {
+    use proptest::{prelude::*, test_runner::TestCaseResult};
+    use std::hash::Hasher as _;
+
+    use super::*;
+
+    proptest! {
+        #[test]
+        fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
+            oneshot_same_as_one_chunk_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
+            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        }
+
+        #[test]
+        fn oneshot(seed: u64, data: Vec<u8>) {
+            oneshot_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_impl(seed, &data[offset..])?;
+        }
 
-    #[test]
-    fn same_results_as_c_for_64_bit(seed: u64, data: Vec<u8>) {
-        let our_result = hash_once(XxHash64::with_seed(seed), &data);
-        let their_result = c_xxhash::hash64(&data, seed);
+        #[test]
+        fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
+            streaming_one_chunk_impl(seed, &data)?;
+        }
 
-        prop_assert_eq!(our_result, their_result);
+        #[test]
+        fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            streaming_one_chunk_impl(seed, &data[offset..])?;
+        }
     }
 
-   #[test]
-    fn same_results_as_c_with_offset_for_64_bit(seed: u64, (data, offset) in data_and_offset()) {
-        let data = &data[offset..];
-        let our_result = hash_once(XxHash64::with_seed(seed), data);
-        let their_result = c_xxhash::hash64(data, seed);
+    fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let oneshot = rust::XxHash64::oneshot(seed, data);
+        let one_chunk = {
+            let mut hasher = rust::XxHash64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
 
-        prop_assert_eq!(our_result, their_result);
+        prop_assert_eq!(oneshot, one_chunk);
+        Ok(())
     }
 
-    #[test]
-    fn same_results_as_c_for_32_bit(seed: u32, data: Vec<u8>) {
-        let our_result = hash_once(XxHash32::with_seed(seed), &data);
-        let their_result = c_xxhash::hash32(&data, seed);
+    fn oneshot_same_as_many_chunks_impl(
+        seed: u64,
+        data: &[u8],
+        chunks: &[Vec<u8>],
+    ) -> TestCaseResult {
+        let oneshot = rust::XxHash64::oneshot(seed, data);
+        let many_chunks = {
+            let mut hasher = rust::XxHash64::with_seed(seed);
+            for chunk in chunks {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
 
-        prop_assert_eq!(our_result, their_result as u64);
+        prop_assert_eq!(oneshot, many_chunks);
+        Ok(())
     }
 
-   #[test]
-    fn same_results_as_c_with_offset_for_32_bit(seed: u32, (data, offset) in data_and_offset()) {
-        let data = &data[offset..];
-        let our_result = hash_once(XxHash32::with_seed(seed), data);
-        let their_result = c_xxhash::hash32(data, seed);
+    fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash64::oneshot(seed, data);
+        let rust = rust::XxHash64::oneshot(seed, data);
 
-        prop_assert_eq!(our_result, their_result as u64);
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
     }
 }
 
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(1_000))]
+mod xxhash3_64 {
+    use proptest::{prelude::*, test_runner::TestCaseResult};
+    use std::hash::Hasher as _;
+    use twox_hash::xxhash3_64::SECRET_MINIMUM_LENGTH;
+
+    use super::*;
+
+    proptest! {
+        #[test]
+        fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
+            oneshot_same_as_one_chunk_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
+            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
+        }
 
-    #[test]
-    fn same_results_with_many_chunks_as_one_for_64_bit(seed: u64, (data, chunk_sizes) in data_and_chunk_sizes()) {
-        let chunked_result = hash_by_chunks(XxHash64::with_seed(seed), &data, &chunk_sizes);
-        let monolithic_result = hash_once(XxHash64::with_seed(seed), &data);
+        #[test]
+        fn oneshot(seed: u64, data: Vec<u8>) {
+            oneshot_impl(seed, &data)?;
+        }
 
-        prop_assert_eq!(chunked_result, monolithic_result);
+        #[test]
+        fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            oneshot_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn oneshot_with_a_secret(secret in secret(), data: Vec<u8>) {
+            oneshot_with_secret_impl(&secret, &data)?;
+        }
+
+        #[test]
+        fn oneshot_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec<u8>) {
+            oneshot_with_seed_and_secret_impl(seed, &secret, &data)?;
+        }
+
+        #[test]
+        fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
+            streaming_one_chunk_impl(seed, &data)?;
+        }
+
+        #[test]
+        fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
+            streaming_one_chunk_impl(seed, &data[offset..])?;
+        }
+
+        #[test]
+        fn streaming_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec<u8>) {
+            streaming_with_seed_and_secret_impl(seed, &secret, &data)?;
+        }
+    }
+
+    fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data);
+        let one_chunk = {
+            let mut hasher = rust::XxHash3_64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(oneshot, one_chunk);
+        Ok(())
     }
 
-    #[test]
-    fn same_results_with_many_chunks_as_one_for_32_bit(seed: u32, (data, chunk_sizes) in data_and_chunk_sizes()) {
-        let chunked_result = hash_by_chunks(XxHash32::with_seed(seed), &data, &chunk_sizes);
-        let monolithic_result = hash_once(XxHash32::with_seed(seed), &data);
+    fn oneshot_same_as_many_chunks_impl(
+        seed: u64,
+        data: &[u8],
+        chunks: &[Vec<u8>],
+    ) -> TestCaseResult {
+        let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data);
+        let many_chunks = {
+            let mut hasher = rust::XxHash3_64::with_seed(seed);
+            for chunk in chunks {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
 
-        prop_assert_eq!(chunked_result, monolithic_result);
+        prop_assert_eq!(oneshot, many_chunks);
+        Ok(())
     }
+
+    fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash3_64::oneshot_with_seed(seed, data);
+        let rust = rust::XxHash3_64::oneshot_with_seed(seed, data);
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash3_64::oneshot_with_secret(secret, data);
+        let rust = rust::XxHash3_64::oneshot_with_secret(secret, data).unwrap();
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult {
+        let native = c::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data);
+        let rust = rust::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data).unwrap();
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash3_64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash3_64::with_seed(seed);
+            hasher.write(data);
+            hasher.finish()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn streaming_with_seed_and_secret_impl(
+        seed: u64,
+        secret: &[u8],
+        data: &[u8],
+    ) -> TestCaseResult {
+        let native = {
+            let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret);
+            for chunk in data.chunks(256) {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
+
+        let rust = {
+            let mut hasher = rust::XxHash3_64::with_seed_and_secret(seed, secret).unwrap();
+            for chunk in data.chunks(256) {
+                hasher.write(chunk);
+            }
+            hasher.finish()
+        };
+
+        prop_assert_eq!(native, rust);
+        Ok(())
+    }
+
+    fn secret() -> impl Strategy<Value = Vec<u8>> {
+        prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024)
+    }
+}
+
+fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
+    prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| {
+        let len = vec.len();
+        (Just(vec), 0..len)
+    })
+}
+
+fn data_and_chunks() -> impl Strategy<Value = (Vec<u8>, Vec<Vec<u8>>)> {
+    prop::collection::vec(prop::collection::vec(num::u8::ANY, 0..100), 0..100).prop_map(|vs| {
+        let data = vs.iter().flatten().copied().collect();
+        (data, vs)
+    })
 }
diff --git a/renu/compare/svgo.config.js b/comparison/svgo.config.js
similarity index 100%
rename from renu/compare/svgo.config.js
rename to comparison/svgo.config.js
diff --git a/comparison/xxHash b/comparison/xxHash
deleted file mode 160000
index d7f47bc3b..000000000
--- a/comparison/xxHash
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d7f47bc3bf1ca767b82eda6ada557ba02dc36e83
diff --git a/compatibility-tests/digest_0_10/Cargo.toml b/compatibility-tests/digest_0_10/Cargo.toml
deleted file mode 100644
index 65eaadfd4..000000000
--- a/compatibility-tests/digest_0_10/Cargo.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[package]
-name = "digest_0_10"
-version = "0.1.0"
-authors = ["Jake Goulding <jake.goulding@gmail.com>"]
-edition = "2018"
-
-[dependencies]
-digest = "0.10"
-twox-hash = { path = "../..", features = ["digest_0_10"] }
diff --git a/compatibility-tests/digest_0_10/src/lib.rs b/compatibility-tests/digest_0_10/src/lib.rs
deleted file mode 100644
index 66f336519..000000000
--- a/compatibility-tests/digest_0_10/src/lib.rs
+++ /dev/null
@@ -1,142 +0,0 @@
-#![cfg(test)]
-
-use digest::Digest;
-use twox_hash::{XxHash32, XxHash64};
-
-#[test]
-fn it_implements_digest() {
-    fn implements_digest<T: Digest>() {}
-
-    implements_digest::<XxHash64>();
-}
-
-#[test]
-fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_64() {
-    let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-    let mut byte_by_byte = XxHash64::new();
-    for byte in bytes.chunks(1) {
-        byte_by_byte.update(byte);
-    }
-
-    let mut one_chunk = XxHash64::new();
-    one_chunk.update(&bytes);
-
-    assert_eq!(byte_by_byte.finalize(), one_chunk.finalize());
-}
-
-#[test]
-fn hash_of_nothing_matches_c_implementation_64() {
-    let mut hasher = XxHash64::new();
-    hasher.update(&[]);
-    assert_eq!(
-        hasher.finalize()[..],
-        0xef46_db37_51d8_e999_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_single_byte_matches_c_implementation_64() {
-    let mut hasher = XxHash64::new();
-    hasher.update(&[42]);
-    assert_eq!(
-        hasher.finalize()[..],
-        0x0a9e_dece_beb0_3ae4_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_bytes_matches_c_implementation_64() {
-    assert_eq!(
-        XxHash64::digest(b"Hello, world!\0")[..],
-        0x7b06_c531_ea43_e89f_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_chunks_matches_c_implementation_64() {
-    let bytes: Vec<_> = (0..100).collect();
-    assert_eq!(
-        XxHash64::digest(&bytes)[..],
-        0x6ac1_e580_3216_6597_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_with_different_seed_matches_c_implementation_64() {
-    let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-    hasher.update(&[]);
-    assert_eq!(
-        hasher.finalize()[..],
-        0x4b6a_04fc_df7a_4672_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_64() {
-    let bytes: Vec<_> = (0..100).collect();
-    let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-    hasher.update(&bytes);
-    assert_eq!(
-        hasher.finalize()[..],
-        0x567e_355e_0682_e1f1_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_32() {
-    let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-    let mut byte_by_byte = XxHash32::new();
-    for byte in bytes.chunks(1) {
-        byte_by_byte.update(byte);
-    }
-
-    let mut one_chunk = XxHash32::new();
-    one_chunk.update(&bytes);
-
-    assert_eq!(byte_by_byte.finalize(), one_chunk.finalize());
-}
-
-#[test]
-fn hash_of_nothing_matches_c_implementation_32() {
-    let mut hasher = XxHash32::new();
-    hasher.update(&[]);
-    assert_eq!(hasher.finalize()[..], 0x02cc_5d05_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_of_single_byte_matches_c_implementation_32() {
-    let mut hasher = XxHash32::new();
-    hasher.update(&[42]);
-    assert_eq!(hasher.finalize()[..], 0xe0fe_705f_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_of_multiple_bytes_matches_c_implementation_32() {
-    assert_eq!(
-        XxHash32::digest(b"Hello, world!\0")[..],
-        0x9e5e_7e93_u32.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_chunks_matches_c_implementation_32() {
-    let bytes: Vec<_> = (0..100).collect();
-    assert_eq!(XxHash32::digest(&bytes)[..], 0x7f89_ba44_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_matches_c_implementation_32() {
-    let mut hasher = XxHash32::with_seed(0x42c9_1977);
-    hasher.update(&[]);
-    assert_eq!(hasher.finalize()[..], 0xd6bf_8459_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_32() {
-    let bytes: Vec<_> = (0..100).collect();
-    let mut hasher = XxHash32::with_seed(0x42c9_1977);
-    hasher.update(&bytes);
-    assert_eq!(hasher.finalize()[..], 0x6d2f_6c17_u32.to_be_bytes());
-}
diff --git a/compatibility-tests/digest_0_8/Cargo.toml b/compatibility-tests/digest_0_8/Cargo.toml
deleted file mode 100644
index 3b5d3d6ae..000000000
--- a/compatibility-tests/digest_0_8/Cargo.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[package]
-name = "digest_0_8"
-version = "0.1.0"
-authors = ["Jake Goulding <jake.goulding@gmail.com>"]
-edition = "2018"
-
-[dependencies]
-digest = "0.8"
-twox-hash = { path = "../..", features = ["digest"] }
diff --git a/compatibility-tests/digest_0_8/src/lib.rs b/compatibility-tests/digest_0_8/src/lib.rs
deleted file mode 100644
index 59585a049..000000000
--- a/compatibility-tests/digest_0_8/src/lib.rs
+++ /dev/null
@@ -1,130 +0,0 @@
-#![cfg(test)]
-
-use digest::Digest;
-use twox_hash::{XxHash32, XxHash64};
-
-#[test]
-fn it_implements_digest() {
-    fn implements_digest<T: Digest>() {}
-
-    implements_digest::<XxHash64>();
-}
-
-#[test]
-fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_64() {
-    let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-    let mut byte_by_byte = XxHash64::new();
-    for byte in bytes.chunks(1) {
-        byte_by_byte.input(byte);
-    }
-
-    let mut one_chunk = XxHash64::new();
-    one_chunk.input(&bytes);
-
-    assert_eq!(byte_by_byte.result(), one_chunk.result());
-}
-
-#[test]
-fn hash_of_nothing_matches_c_implementation_64() {
-    let mut hasher = XxHash64::new();
-    hasher.input(&[]);
-    assert_eq!(hasher.result()[..], 0xef46_db37_51d8_e999_u64.to_be_bytes());
-}
-
-#[test]
-fn hash_of_single_byte_matches_c_implementation_64() {
-    let mut hasher = XxHash64::new();
-    hasher.input(&[42]);
-    assert_eq!(hasher.result()[..], 0x0a9e_dece_beb0_3ae4_u64.to_be_bytes());
-}
-
-#[test]
-fn hash_of_multiple_bytes_matches_c_implementation_64() {
-    assert_eq!(
-        XxHash64::digest(b"Hello, world!\0")[..],
-        0x7b06_c531_ea43_e89f_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_chunks_matches_c_implementation_64() {
-    let bytes: Vec<_> = (0..100).collect();
-    assert_eq!(
-        XxHash64::digest(&bytes)[..],
-        0x6ac1_e580_3216_6597_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_with_different_seed_matches_c_implementation_64() {
-    let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-    hasher.input(&[]);
-    assert_eq!(hasher.result()[..], 0x4b6a_04fc_df7a_4672_u64.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_64() {
-    let bytes: Vec<_> = (0..100).collect();
-    let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-    hasher.input(&bytes);
-    assert_eq!(hasher.result()[..], 0x567e_355e_0682_e1f1_u64.to_be_bytes());
-}
-
-#[test]
-fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_32() {
-    let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-    let mut byte_by_byte = XxHash32::new();
-    for byte in bytes.chunks(1) {
-        byte_by_byte.input(byte);
-    }
-
-    let mut one_chunk = XxHash32::new();
-    one_chunk.input(&bytes);
-
-    assert_eq!(byte_by_byte.result(), one_chunk.result());
-}
-
-#[test]
-fn hash_of_nothing_matches_c_implementation_32() {
-    let mut hasher = XxHash32::new();
-    hasher.input(&[]);
-    assert_eq!(hasher.result()[..], 0x02cc_5d05_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_of_single_byte_matches_c_implementation_32() {
-    let mut hasher = XxHash32::new();
-    hasher.input(&[42]);
-    assert_eq!(hasher.result()[..], 0xe0fe_705f_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_of_multiple_bytes_matches_c_implementation_32() {
-    assert_eq!(
-        XxHash32::digest(b"Hello, world!\0")[..],
-        0x9e5e_7e93_u32.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_chunks_matches_c_implementation_32() {
-    let bytes: Vec<_> = (0..100).collect();
-    assert_eq!(XxHash32::digest(&bytes)[..], 0x7f89_ba44_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_matches_c_implementation_32() {
-    let mut hasher = XxHash32::with_seed(0x42c9_1977);
-    hasher.input(&[]);
-    assert_eq!(hasher.result()[..], 0xd6bf_8459_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_32() {
-    let bytes: Vec<_> = (0..100).collect();
-    let mut hasher = XxHash32::with_seed(0x42c9_1977);
-    hasher.input(&bytes);
-    assert_eq!(hasher.result()[..], 0x6d2f_6c17_u32.to_be_bytes());
-}
diff --git a/compatibility-tests/digest_0_9/Cargo.toml b/compatibility-tests/digest_0_9/Cargo.toml
deleted file mode 100644
index 7d072eaf3..000000000
--- a/compatibility-tests/digest_0_9/Cargo.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[package]
-name = "digest_0_9"
-version = "0.1.0"
-authors = ["Jake Goulding <jake.goulding@gmail.com>"]
-edition = "2018"
-
-[dependencies]
-digest = "0.9"
-twox-hash = { path = "../..", features = ["digest_0_9"] }
diff --git a/compatibility-tests/digest_0_9/src/lib.rs b/compatibility-tests/digest_0_9/src/lib.rs
deleted file mode 100644
index 66f336519..000000000
--- a/compatibility-tests/digest_0_9/src/lib.rs
+++ /dev/null
@@ -1,142 +0,0 @@
-#![cfg(test)]
-
-use digest::Digest;
-use twox_hash::{XxHash32, XxHash64};
-
-#[test]
-fn it_implements_digest() {
-    fn implements_digest<T: Digest>() {}
-
-    implements_digest::<XxHash64>();
-}
-
-#[test]
-fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_64() {
-    let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-    let mut byte_by_byte = XxHash64::new();
-    for byte in bytes.chunks(1) {
-        byte_by_byte.update(byte);
-    }
-
-    let mut one_chunk = XxHash64::new();
-    one_chunk.update(&bytes);
-
-    assert_eq!(byte_by_byte.finalize(), one_chunk.finalize());
-}
-
-#[test]
-fn hash_of_nothing_matches_c_implementation_64() {
-    let mut hasher = XxHash64::new();
-    hasher.update(&[]);
-    assert_eq!(
-        hasher.finalize()[..],
-        0xef46_db37_51d8_e999_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_single_byte_matches_c_implementation_64() {
-    let mut hasher = XxHash64::new();
-    hasher.update(&[42]);
-    assert_eq!(
-        hasher.finalize()[..],
-        0x0a9e_dece_beb0_3ae4_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_bytes_matches_c_implementation_64() {
-    assert_eq!(
-        XxHash64::digest(b"Hello, world!\0")[..],
-        0x7b06_c531_ea43_e89f_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_chunks_matches_c_implementation_64() {
-    let bytes: Vec<_> = (0..100).collect();
-    assert_eq!(
-        XxHash64::digest(&bytes)[..],
-        0x6ac1_e580_3216_6597_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_with_different_seed_matches_c_implementation_64() {
-    let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-    hasher.update(&[]);
-    assert_eq!(
-        hasher.finalize()[..],
-        0x4b6a_04fc_df7a_4672_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_64() {
-    let bytes: Vec<_> = (0..100).collect();
-    let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-    hasher.update(&bytes);
-    assert_eq!(
-        hasher.finalize()[..],
-        0x567e_355e_0682_e1f1_u64.to_be_bytes()
-    );
-}
-
-#[test]
-fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_32() {
-    let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-    let mut byte_by_byte = XxHash32::new();
-    for byte in bytes.chunks(1) {
-        byte_by_byte.update(byte);
-    }
-
-    let mut one_chunk = XxHash32::new();
-    one_chunk.update(&bytes);
-
-    assert_eq!(byte_by_byte.finalize(), one_chunk.finalize());
-}
-
-#[test]
-fn hash_of_nothing_matches_c_implementation_32() {
-    let mut hasher = XxHash32::new();
-    hasher.update(&[]);
-    assert_eq!(hasher.finalize()[..], 0x02cc_5d05_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_of_single_byte_matches_c_implementation_32() {
-    let mut hasher = XxHash32::new();
-    hasher.update(&[42]);
-    assert_eq!(hasher.finalize()[..], 0xe0fe_705f_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_of_multiple_bytes_matches_c_implementation_32() {
-    assert_eq!(
-        XxHash32::digest(b"Hello, world!\0")[..],
-        0x9e5e_7e93_u32.to_be_bytes()
-    );
-}
-
-#[test]
-fn hash_of_multiple_chunks_matches_c_implementation_32() {
-    let bytes: Vec<_> = (0..100).collect();
-    assert_eq!(XxHash32::digest(&bytes)[..], 0x7f89_ba44_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_matches_c_implementation_32() {
-    let mut hasher = XxHash32::with_seed(0x42c9_1977);
-    hasher.update(&[]);
-    assert_eq!(hasher.finalize()[..], 0xd6bf_8459_u32.to_be_bytes());
-}
-
-#[test]
-fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_32() {
-    let bytes: Vec<_> = (0..100).collect();
-    let mut hasher = XxHash32::with_seed(0x42c9_1977);
-    hasher.update(&bytes);
-    assert_eq!(hasher.finalize()[..], 0x6d2f_6c17_u32.to_be_bytes());
-}
diff --git a/renu/.gitignore b/renu/.gitignore
deleted file mode 100644
index 1b72444ae..000000000
--- a/renu/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-/Cargo.lock
-/target
diff --git a/renu/Cargo.toml b/renu/Cargo.toml
deleted file mode 100644
index b0a4ce722..000000000
--- a/renu/Cargo.toml
+++ /dev/null
@@ -1,57 +0,0 @@
-[package]
-name = "twox-hash"
-version = "1.6.3"
-authors = ["Jake Goulding <jake.goulding@gmail.com>"]
-edition = "2021"
-rust-version = "1.81"
-
-description = "A Rust implementation of the XXHash and XXH3 algorithms"
-readme = "README.md"
-keywords = ["hash", "hasher", "xxhash", "xxh3"]
-categories = ["algorithms"]
-
-repository = "https://github.com/shepmaster/twox-hash"
-documentation = "https://docs.rs/twox-hash/"
-
-license = "MIT"
-
-[workspace]
-members = [
-    "asmasm",
-    "compare",
-    "renu-sum",
-    "xx_hash-sys",
-]
-
-[features]
-default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
-
-random = ["dep:rand"]
-
-serialize = ["dep:serde"]
-
-xxhash32 = []
-xxhash64 = []
-xxhash3_64 = []
-
-std = ["alloc"]
-alloc = []
-
-[lints.rust.unexpected_cfgs]
-level = "warn"
-check-cfg = [
-    'cfg(_internal_xxhash3_force_scalar)',
-    'cfg(_internal_xxhash3_force_neon)',
-    'cfg(_internal_xxhash3_force_sse2)',
-    'cfg(_internal_xxhash3_force_avx2)',
-]
-
-[dependencies]
-rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] }
-serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] }
-
-[dev-dependencies]
-serde_json = "1.0.117"
-
-[package.metadata.docs.rs]
-all-features = true
diff --git a/renu/README.md b/renu/README.md
deleted file mode 100644
index ab987c651..000000000
--- a/renu/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-cargo test # unit tests
-cargo test -p comparison # proptests
-cargo miri test # unsafe
-cargo miri test --target s390x-unknown-linux-gnu # big-endian
-
-cargo -Z profile-rustflags --config 'profile.test.package.xx-renu.rustflags=["--cfg=_internal_xxhash3_force_scalar"]' test
-
-minimal versions
-no-features
-all-features
-
-features for 32 / 64 / xx3
-
-
-rand feature instead of `std`?
-remove digest as we aren't crypto?
diff --git a/renu/compare/Cargo.toml b/renu/compare/Cargo.toml
deleted file mode 100644
index bc6b0ee81..000000000
--- a/renu/compare/Cargo.toml
+++ /dev/null
@@ -1,18 +0,0 @@
-[package]
-name = "compare"
-version = "0.1.0"
-edition = "2021"
-
-[lib]
-bench = false
-
-[[bench]]
-name = "benchmark"
-harness = false
-
-[dependencies]
-criterion = { version = "0.5.1", features = [] }
-proptest = "1.5.0"
-rand = "0.8.5"
-twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
-xx_hash-sys = { path = "../xx_hash-sys" }
diff --git a/renu/compare/README.md b/renu/compare/README.md
deleted file mode 100644
index efb868e2b..000000000
--- a/renu/compare/README.md
+++ /dev/null
@@ -1,202 +0,0 @@
-# Overview
-
-Tests compare calling [the reference implementation in
-C](https://xxhash.com) against equivalent functions in this crate. No
-link-time optimization (LTO) is used, so the C performance numbers
-have additional overhead for each function call.
-
-Click any graph to see it full-size.
-
-# XXHash64
-
-## Oneshot hashing
-
-Compares the **speed** of hashing an entire buffer of data in one
-function call. Data sizes from 256 KiB to 4 MiB are tested. These
-graphs are boring flat lines, so a table is used instead.
-
-### aarch64
-
-| Implementation | Throughput (GiB/s) |
-|----------------|--------------------|
-| Rust           | 13.4               |
-| C              | 13.4               |
-
-## x86_64
-
-| Implementation | Throughput (GiB/s) |
-|----------------|--------------------|
-| Rust           | 15.7               |
-| C              | 15.8               |
-
-
-## Streaming data
-
-Compares the **speed** of hashing a 1 MiB buffer of data split into
-various chunk sizes.
-
-### aarch64
-
-<a href="./results/xxhash64-streaming-aarch64.svg">
-  <img
-    src="./results/xxhash64-streaming-aarch64.svg"
-    alt="XXHash64, streaming data, on an aarch64 processor"
-    />
-</a>
-
-### x86_64
-
-<a href="./results/xxhash64-streaming-x86_64.svg">
-  <img
-    src="./results/xxhash64-streaming-x86_64.svg"
-    alt="XXHash64, streaming data, on an x86_64 processor"
-    />
-</a>
-
-## Small amounts of data
-
-Compares the **time taken** to hash 0 to 32 bytes of data.
-
-### aarch64
-
-<a href="./results/xxhash64-tiny_data-aarch64.svg">
-  <img
-    src="./results/xxhash64-tiny_data-aarch64.svg"
-    alt="XXHash64, small data, on an aarch64 processor"
-    />
-</a>
-
-### x86_64
-
-<a href="./results/xxhash64-tiny_data-x86_64.svg">
-  <img
-    src="./results/xxhash64-tiny_data-x86_64.svg"
-    alt="XXHash64, small data, on an x86_64 processor"
-    />
-</a>
-
-
-# XXHash3 (64-bit)
-
-## Oneshot hashing
-
-Compares the **speed** of hashing an entire buffer of data in one
-function call. Data sizes from 256 KiB to 4 MiB are tested. These
-graphs are boring flat lines, so a table is used instead.
-
-### aarch64
-
-| Implementation | Throughput (GiB/s) |
-|----------------|--------------------|
-| Rust           | 34.8               |
-| C              | 34.8               |
-| C (scalar)     | 21.0               |
-| C (NEON)       | 34.7               |
-
-### x86_64
-
-| Implementation | Throughput (GiB/s) |
-|----------------|--------------------|
-| Rust           | 58.3               |
-| C              | 25.0               |
-| C (scalar)     | 7.5                |
-| C (SSE2)       | 25.1               |
-| C (AVX2)       | 58.1               |
-
-## Streaming data
-
-Compares the **speed** of hashing a 1 MiB buffer of data split into
-various chunk sizes.
-
-### aarch64
-
-<a href="./results/xxhash3_64-streaming-aarch64.svg">
-  <img
-    src="./results/xxhash3_64-streaming-aarch64.svg"
-    alt="XXHash3, 64-bit, streaming data, on an aarch64 processor"
-    />
-</a>
-
-### x86_64
-
-<a href="./results/xxhash3_64-streaming-x86_64.svg">
-  <img
-    src="./results/xxhash3_64-streaming-x86_64.svg"
-    alt="XXHash3, 64-bit, streaming data, on an x86_64 processor"
-    />
-</a>
-
-## Small amounts of data
-
-Compares the **time taken** to hash 0 to 230 bytes of
-data. Representative samples are taken from similar times to avoid
-cluttering the graph and wasting benchmarking time.
-
-### aarch64
-
-<a href="./results/xxhash3_64-tiny_data-aarch64.svg">
-  <img
-    src="./results/xxhash3_64-tiny_data-aarch64.svg"
-    alt="XXHash3, 64-bit, small data, on an aarch64 processor"
-    />
-</a>
-
-### x86_64
-
-<a href="./results/xxhash3_64-tiny_data-x86_64.svg">
-  <img
-    src="./results/xxhash3_64-tiny_data-x86_64.svg"
-    alt="XXHash3, 64-bit, small data, on an x86_64 processor"
-    />
-</a>
-
-# Benchmark machines
-
-## Overview
-
-| CPU               | Memory | C compiler         |
-|-------------------|--------|--------------------|
-| Apple M1 Max      | 64 GiB | clang 15.0.0       |
-| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 |
-
-Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
-
-## Details
-
-### aarch64
-
-<table>
-  <tr>
-    <th>CPU</th>
-    <td>Apple M1 Max</td>
-  </tr>
-
-  <tr>
-    <th>Memory</th>
-    <td>64 GiB</td>
-  </tr>
-
-  <tr>
-    <th>C compiler</th>
-    <td>Apple clang version 15.0.0 (clang-1500.3.9.4)</td>
-  </tr>
-</table>
-
-### x86_64
-
-<table>
-  <tr>
-    <th>CPU</th>
-    <td>AMD Ryzen 9 3950X 16-Core Processor, 3501 Mhz, 16 Core(s), 32 Logical Processor(s)</td>
-  </tr>
-
-  <tr>
-    <th>Memory</th>
-    <td>32 GiB (3600 MT/s)</td>
-  </tr>
-
-  <tr>
-    <th>C compiler</th>
-    <td>Microsoft (R) C/C++ Optimizing Compiler Version 19.41.34120 for x86</td>
-  </tr>
-</table>
diff --git a/renu/compare/src/lib.rs b/renu/compare/src/lib.rs
deleted file mode 100644
index 7701a8314..000000000
--- a/renu/compare/src/lib.rs
+++ /dev/null
@@ -1,378 +0,0 @@
-#![cfg(test)]
-
-use proptest::{num, prelude::*};
-
-use twox_hash as rust;
-use xx_hash_sys as c;
-
-mod xxhash32 {
-    use proptest::{prelude::*, test_runner::TestCaseResult};
-    use std::hash::Hasher as _;
-
-    use super::*;
-
-    proptest! {
-        #[test]
-        fn oneshot_same_as_one_chunk(seed: u32, data: Vec<u8>) {
-            oneshot_same_as_one_chunk_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn oneshot_same_as_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
-            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn oneshot_same_as_many_chunks(seed: u32, (data, chunks) in data_and_chunks()) {
-            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
-        }
-
-        #[test]
-        fn oneshot(seed: u32, data: Vec<u8>) {
-            oneshot_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn oneshot_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
-            oneshot_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn streaming_one_chunk(seed: u32, data: Vec<u8>) {
-            streaming_one_chunk_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn streaming_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) {
-            streaming_one_chunk_impl(seed, &data[offset..])?;
-        }
-    }
-
-    fn oneshot_same_as_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult {
-        let oneshot = rust::XxHash32::oneshot(seed, data);
-        let one_chunk = {
-            let mut hasher = rust::XxHash32::with_seed(seed);
-            hasher.write(data);
-            hasher.finish_32()
-        };
-
-        prop_assert_eq!(oneshot, one_chunk);
-        Ok(())
-    }
-
-    fn oneshot_same_as_many_chunks_impl(
-        seed: u32,
-        data: &[u8],
-        chunks: &[Vec<u8>],
-    ) -> TestCaseResult {
-        let oneshot = rust::XxHash32::oneshot(seed, data);
-        let many_chunks = {
-            let mut hasher = rust::XxHash32::with_seed(seed);
-            for chunk in chunks {
-                hasher.write(chunk);
-            }
-            hasher.finish_32()
-        };
-
-        prop_assert_eq!(oneshot, many_chunks);
-        Ok(())
-    }
-
-    fn oneshot_impl(seed: u32, data: &[u8]) -> TestCaseResult {
-        let native = c::XxHash32::oneshot(seed, data);
-        let rust = rust::XxHash32::oneshot(seed, data);
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn streaming_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult {
-        let native = {
-            let mut hasher = c::XxHash32::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        let rust = {
-            let mut hasher = rust::XxHash32::with_seed(seed);
-            hasher.write(data);
-            hasher.finish_32()
-        };
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-}
-
-mod xxhash64 {
-    use proptest::{prelude::*, test_runner::TestCaseResult};
-    use std::hash::Hasher as _;
-
-    use super::*;
-
-    proptest! {
-        #[test]
-        fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
-            oneshot_same_as_one_chunk_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
-            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
-        }
-
-        #[test]
-        fn oneshot(seed: u64, data: Vec<u8>) {
-            oneshot_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-            oneshot_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
-            streaming_one_chunk_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-            streaming_one_chunk_impl(seed, &data[offset..])?;
-        }
-    }
-
-    fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let oneshot = rust::XxHash64::oneshot(seed, data);
-        let one_chunk = {
-            let mut hasher = rust::XxHash64::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        prop_assert_eq!(oneshot, one_chunk);
-        Ok(())
-    }
-
-    fn oneshot_same_as_many_chunks_impl(
-        seed: u64,
-        data: &[u8],
-        chunks: &[Vec<u8>],
-    ) -> TestCaseResult {
-        let oneshot = rust::XxHash64::oneshot(seed, data);
-        let many_chunks = {
-            let mut hasher = rust::XxHash64::with_seed(seed);
-            for chunk in chunks {
-                hasher.write(chunk);
-            }
-            hasher.finish()
-        };
-
-        prop_assert_eq!(oneshot, many_chunks);
-        Ok(())
-    }
-
-    fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let native = c::XxHash64::oneshot(seed, data);
-        let rust = rust::XxHash64::oneshot(seed, data);
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let native = {
-            let mut hasher = c::XxHash64::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        let rust = {
-            let mut hasher = rust::XxHash64::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-}
-
-mod xxhash3_64 {
-    use proptest::{prelude::*, test_runner::TestCaseResult};
-    use std::hash::Hasher as _;
-    use twox_hash::xxhash3_64::SECRET_MINIMUM_LENGTH;
-
-    use super::*;
-
-    proptest! {
-        #[test]
-        fn oneshot_same_as_one_chunk(seed: u64, data: Vec<u8>) {
-            oneshot_same_as_one_chunk_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-            oneshot_same_as_one_chunk_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) {
-            oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?;
-        }
-
-        #[test]
-        fn oneshot(seed: u64, data: Vec<u8>) {
-            oneshot_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-            oneshot_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn oneshot_with_a_secret(secret in secret(), data: Vec<u8>) {
-            oneshot_with_secret_impl(&secret, &data)?;
-        }
-
-        #[test]
-        fn oneshot_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec<u8>) {
-            oneshot_with_seed_and_secret_impl(seed, &secret, &data)?;
-        }
-
-        #[test]
-        fn streaming_one_chunk(seed: u64, data: Vec<u8>) {
-            streaming_one_chunk_impl(seed, &data)?;
-        }
-
-        #[test]
-        fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) {
-            streaming_one_chunk_impl(seed, &data[offset..])?;
-        }
-
-        #[test]
-        fn streaming_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec<u8>) {
-            streaming_with_seed_and_secret_impl(seed, &secret, &data)?;
-        }
-    }
-
-    fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data);
-        let one_chunk = {
-            let mut hasher = rust::XxHash3_64::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        prop_assert_eq!(oneshot, one_chunk);
-        Ok(())
-    }
-
-    fn oneshot_same_as_many_chunks_impl(
-        seed: u64,
-        data: &[u8],
-        chunks: &[Vec<u8>],
-    ) -> TestCaseResult {
-        let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data);
-        let many_chunks = {
-            let mut hasher = rust::XxHash3_64::with_seed(seed);
-            for chunk in chunks {
-                hasher.write(chunk);
-            }
-            hasher.finish()
-        };
-
-        prop_assert_eq!(oneshot, many_chunks);
-        Ok(())
-    }
-
-    fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let native = c::XxHash3_64::oneshot_with_seed(seed, data);
-        let rust = rust::XxHash3_64::oneshot_with_seed(seed, data);
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult {
-        let native = c::XxHash3_64::oneshot_with_secret(secret, data);
-        let rust = rust::XxHash3_64::oneshot_with_secret(secret, data).unwrap();
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult {
-        let native = c::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data);
-        let rust = rust::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data).unwrap();
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult {
-        let native = {
-            let mut hasher = c::XxHash3_64::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        let rust = {
-            let mut hasher = rust::XxHash3_64::with_seed(seed);
-            hasher.write(data);
-            hasher.finish()
-        };
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn streaming_with_seed_and_secret_impl(
-        seed: u64,
-        secret: &[u8],
-        data: &[u8],
-    ) -> TestCaseResult {
-        let native = {
-            let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret);
-            for chunk in data.chunks(256) {
-                hasher.write(chunk);
-            }
-            hasher.finish()
-        };
-
-        let rust = {
-            let mut hasher = rust::XxHash3_64::with_seed_and_secret(seed, secret).unwrap();
-            for chunk in data.chunks(256) {
-                hasher.write(chunk);
-            }
-            hasher.finish()
-        };
-
-        prop_assert_eq!(native, rust);
-        Ok(())
-    }
-
-    fn secret() -> impl Strategy<Value = Vec<u8>> {
-        prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024)
-    }
-}
-
-fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
-    prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| {
-        let len = vec.len();
-        (Just(vec), 0..len)
-    })
-}
-
-fn data_and_chunks() -> impl Strategy<Value = (Vec<u8>, Vec<Vec<u8>>)> {
-    prop::collection::vec(prop::collection::vec(num::u8::ANY, 0..100), 0..100).prop_map(|vs| {
-        let data = vs.iter().flatten().copied().collect();
-        (data, vs)
-    })
-}
diff --git a/renu/src/lib.rs b/renu/src/lib.rs
deleted file mode 100644
index 2ee51fb45..000000000
--- a/renu/src/lib.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-//! A Rust implementation of the [XXHash][] algorithm.
-//!
-//! [XXHash]: https://github.com/Cyan4973/xxHash
-//!
-//! ## Hashing arbitrary data
-//!
-//! ### When all the data is available at once
-//!
-//! ```rust
-//! use twox_hash::XxHash64;
-//!
-//! let seed = 1234;
-//! let hash = XxHash64::oneshot(seed, b"some bytes");
-//! assert_eq!(0xeab5_5659_a496_d78b, hash);
-//! ```
-//!
-//! ### When the data is streaming
-//!
-//! ```rust
-//! use std::hash::Hasher as _;
-//! use twox_hash::XxHash64;
-//!
-//! let seed = 1234;
-//! let mut hasher = XxHash64::with_seed(seed);
-//! hasher.write(b"some");
-//! hasher.write(b" ");
-//! hasher.write(b"bytes");
-//! let hash = hasher.finish();
-//! assert_eq!(0xeab5_5659_a496_d78b, hash);
-//! ```
-//!
-//! ## In a [`HashMap`](std::collections::HashMap)
-//!
-//! ### With a default seed
-//!
-//! ```rust
-//! use std::{collections::HashMap, hash::BuildHasherDefault};
-//! use twox_hash::XxHash64;
-//!
-//! let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
-//!
-//! ### With a random seed
-//!
-//! ```rust
-//! use std::collections::HashMap;
-//! use twox_hash::xxhash64;
-//!
-//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
-//!
-//! ### With a fixed seed
-//!
-//! ```rust
-//! use std::collections::HashMap;
-//! use twox_hash::xxhash64;
-//!
-//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
-
-#![deny(rust_2018_idioms)]
-#![deny(missing_docs)]
-#![cfg_attr(not(feature = "std"), no_std)]
-#![cfg_attr(docsrs, feature(doc_cfg))]
-
-#[cfg(feature = "alloc")]
-extern crate alloc;
-
-#[cfg(any(feature = "std", doc, test))]
-extern crate std;
-
-#[cfg(feature = "xxhash32")]
-#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
-pub mod xxhash32;
-
-#[cfg(feature = "xxhash32")]
-#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
-pub use xxhash32::Hasher as XxHash32;
-
-#[cfg(feature = "xxhash64")]
-#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
-pub mod xxhash64;
-
-#[cfg(feature = "xxhash64")]
-#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
-pub use xxhash64::Hasher as XxHash64;
-
-#[cfg(feature = "xxhash3_64")]
-#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
-pub mod xxhash3_64;
-
-#[cfg(feature = "xxhash3_64")]
-#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
-pub use xxhash3_64::Hasher as XxHash3_64;
-
-trait IntoU32 {
-    fn into_u32(self) -> u32;
-}
-
-impl IntoU32 for u8 {
-    fn into_u32(self) -> u32 {
-        self.into()
-    }
-}
-
-trait IntoU64 {
-    fn into_u64(self) -> u64;
-}
-
-impl IntoU64 for u8 {
-    fn into_u64(self) -> u64 {
-        self.into()
-    }
-}
-
-impl IntoU64 for u32 {
-    fn into_u64(self) -> u64 {
-        self.into()
-    }
-}
-
-#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-impl IntoU64 for usize {
-    fn into_u64(self) -> u64 {
-        self as u64
-    }
-}
-
-trait IntoU128 {
-    fn into_u128(self) -> u128;
-}
-
-impl IntoU128 for u64 {
-    fn into_u128(self) -> u128 {
-        u128::from(self)
-    }
-}
diff --git a/src/bin/hash_file.rs b/src/bin/hash_file.rs
deleted file mode 100644
index 509b48d68..000000000
--- a/src/bin/hash_file.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use std::env;
-use std::fs::File;
-use std::hash::Hasher;
-use std::io::{BufRead, BufReader};
-use twox_hash::XxHash64;
-
-fn main() {
-    for arg in env::args().skip(1) {
-        let f = File::open(&arg).unwrap();
-        let mut f = BufReader::new(f);
-
-        let mut hasher = XxHash64::with_seed(0);
-
-        loop {
-            let consumed = {
-                let bytes = f.fill_buf().unwrap();
-                if bytes.is_empty() {
-                    break;
-                }
-                hasher.write(bytes);
-                bytes.len()
-            };
-            f.consume(consumed);
-        }
-
-        println!("{:16x}   {}", hasher.finish(), arg);
-    }
-}
diff --git a/src/digest_0_10_support.rs b/src/digest_0_10_support.rs
deleted file mode 100644
index 935c09692..000000000
--- a/src/digest_0_10_support.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-use core::hash::Hasher;
-
-use digest_0_10::{
-    generic_array::typenum::consts::{U16, U4, U8},
-    FixedOutput, HashMarker, Output, OutputSizeUser, Update,
-};
-
-use crate::{xxh3, XxHash32, XxHash64};
-
-// ----------
-
-impl Update for XxHash32 {
-    fn update(&mut self, data: &[u8]) {
-        self.write(data);
-    }
-}
-
-impl OutputSizeUser for XxHash32 {
-    type OutputSize = U4;
-}
-
-impl FixedOutput for XxHash32 {
-    fn finalize_into(self, out: &mut Output<Self>) {
-        let tmp: &mut [u8; 4] = out.as_mut();
-        *tmp = self.finish().to_be_bytes();
-    }
-}
-
-impl HashMarker for XxHash32 {}
-
-// ----------
-
-impl Update for XxHash64 {
-    fn update(&mut self, data: &[u8]) {
-        self.write(data);
-    }
-}
-
-impl OutputSizeUser for XxHash64 {
-    type OutputSize = U8;
-}
-
-impl FixedOutput for XxHash64 {
-    fn finalize_into(self, out: &mut Output<Self>) {
-        let tmp: &mut [u8; 8] = out.as_mut();
-        *tmp = self.finish().to_be_bytes();
-    }
-}
-
-impl HashMarker for XxHash64 {}
-
-// ----------
-
-impl Update for xxh3::Hash64 {
-    fn update(&mut self, data: &[u8]) {
-        self.write(data);
-    }
-}
-
-impl OutputSizeUser for xxh3::Hash64 {
-    type OutputSize = U8;
-}
-
-impl FixedOutput for xxh3::Hash64 {
-    fn finalize_into(self, out: &mut Output<Self>) {
-        let tmp: &mut [u8; 8] = out.as_mut();
-        *tmp = self.finish().to_be_bytes();
-    }
-}
-
-impl HashMarker for xxh3::Hash64 {}
-
-// ----------
-
-impl Update for xxh3::Hash128 {
-    fn update(&mut self, data: &[u8]) {
-        self.write(data);
-    }
-}
-
-impl OutputSizeUser for xxh3::Hash128 {
-    type OutputSize = U16;
-}
-
-impl FixedOutput for xxh3::Hash128 {
-    fn finalize_into(self, out: &mut Output<Self>) {
-        let tmp: &mut [u8; 16] = out.as_mut();
-        *tmp = xxh3::HasherExt::finish_ext(&self).to_be_bytes();
-    }
-}
-
-impl HashMarker for xxh3::Hash128 {}
diff --git a/src/digest_0_9_support.rs b/src/digest_0_9_support.rs
deleted file mode 100644
index 67788cd6c..000000000
--- a/src/digest_0_9_support.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-use core::hash::Hasher;
-
-use digest_0_9::{
-    generic_array::{
-        typenum::consts::{U16, U4, U8},
-        GenericArray,
-    },
-    Digest,
-};
-
-use crate::{xxh3, XxHash32, XxHash64};
-
-impl Digest for XxHash32 {
-    type OutputSize = U4;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn update(&mut self, data: impl AsRef<[u8]>) {
-        self.write(data.as_ref());
-    }
-
-    fn chain(mut self, data: impl AsRef<[u8]>) -> Self
-    where
-        Self: Sized,
-    {
-        self.update(data);
-        self
-    }
-
-    fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
-        self.finish().to_be_bytes().into()
-    }
-
-    fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.finalize();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        4
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).finalize()
-    }
-}
-
-impl Digest for XxHash64 {
-    type OutputSize = U8;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn update(&mut self, data: impl AsRef<[u8]>) {
-        self.write(data.as_ref());
-    }
-
-    fn chain(mut self, data: impl AsRef<[u8]>) -> Self
-    where
-        Self: Sized,
-    {
-        self.update(data);
-        self
-    }
-
-    fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
-        self.finish().to_be_bytes().into()
-    }
-
-    fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.finalize();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        8
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).finalize()
-    }
-}
-
-impl Digest for xxh3::Hash64 {
-    type OutputSize = U8;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn update(&mut self, data: impl AsRef<[u8]>) {
-        self.write(data.as_ref());
-    }
-
-    fn chain(mut self, data: impl AsRef<[u8]>) -> Self
-    where
-        Self: Sized,
-    {
-        self.update(data);
-        self
-    }
-
-    fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
-        self.finish().to_be_bytes().into()
-    }
-
-    fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.clone().finalize();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        8
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).finalize()
-    }
-}
-
-impl Digest for xxh3::Hash128 {
-    type OutputSize = U16;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn update(&mut self, data: impl AsRef<[u8]>) {
-        self.write(data.as_ref());
-    }
-
-    fn chain(mut self, data: impl AsRef<[u8]>) -> Self
-    where
-        Self: Sized,
-    {
-        self.update(data);
-        self
-    }
-
-    fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
-        xxh3::HasherExt::finish_ext(&self).to_be_bytes().into()
-    }
-
-    fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.clone().finalize();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        8
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).finalize()
-    }
-}
diff --git a/src/digest_support.rs b/src/digest_support.rs
deleted file mode 100644
index 7b00b9d80..000000000
--- a/src/digest_support.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-use core::hash::Hasher;
-
-use digest::{
-    generic_array::{
-        typenum::consts::{U16, U4, U8},
-        GenericArray,
-    },
-    Digest,
-};
-
-use crate::{xxh3, XxHash32, XxHash64};
-
-impl Digest for XxHash32 {
-    type OutputSize = U4;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn input<B: AsRef<[u8]>>(&mut self, data: B) {
-        self.write(data.as_ref());
-    }
-
-    fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
-    where
-        Self: Sized,
-    {
-        self.input(data);
-        self
-    }
-
-    fn result(self) -> GenericArray<u8, Self::OutputSize> {
-        self.finish().to_be_bytes().into()
-    }
-
-    fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.result();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        4
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).result()
-    }
-}
-
-impl Digest for XxHash64 {
-    type OutputSize = U8;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn input<B: AsRef<[u8]>>(&mut self, data: B) {
-        self.write(data.as_ref());
-    }
-
-    fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
-    where
-        Self: Sized,
-    {
-        self.input(data);
-        self
-    }
-
-    fn result(self) -> GenericArray<u8, Self::OutputSize> {
-        self.finish().to_be_bytes().into()
-    }
-
-    fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.result();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        8
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).result()
-    }
-}
-
-impl Digest for xxh3::Hash64 {
-    type OutputSize = U8;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn input<B: AsRef<[u8]>>(&mut self, data: B) {
-        self.write(data.as_ref());
-    }
-
-    fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
-    where
-        Self: Sized,
-    {
-        self.input(data);
-        self
-    }
-
-    fn result(self) -> GenericArray<u8, Self::OutputSize> {
-        self.finish().to_be_bytes().into()
-    }
-
-    fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.clone().result();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        8
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).result()
-    }
-}
-
-impl Digest for xxh3::Hash128 {
-    type OutputSize = U16;
-
-    fn new() -> Self {
-        Self::default()
-    }
-
-    fn input<B: AsRef<[u8]>>(&mut self, data: B) {
-        self.write(data.as_ref());
-    }
-
-    fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
-    where
-        Self: Sized,
-    {
-        self.input(data);
-        self
-    }
-
-    fn result(self) -> GenericArray<u8, Self::OutputSize> {
-        xxh3::HasherExt::finish_ext(&self).to_be_bytes().into()
-    }
-
-    fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
-        let result = self.clone().result();
-        self.reset();
-        result
-    }
-
-    fn reset(&mut self) {
-        *self = Self::default();
-    }
-
-    fn output_size() -> usize {
-        8
-    }
-
-    fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
-        Self::new().chain(data).result()
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
index 414dc8d42..2ee51fb45 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,15 +1,43 @@
-//! A Rust implementation of the [XXHash] algorithm.
+//! A Rust implementation of the [XXHash][] algorithm.
 //!
 //! [XXHash]: https://github.com/Cyan4973/xxHash
 //!
-//! ### With a fixed seed
+//! ## Hashing arbitrary data
+//!
+//! ### When all the data is available at once
 //!
 //! ```rust
-//! use std::hash::BuildHasherDefault;
-//! use std::collections::HashMap;
 //! use twox_hash::XxHash64;
 //!
-//! let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+//! let seed = 1234;
+//! let hash = XxHash64::oneshot(seed, b"some bytes");
+//! assert_eq!(0xeab5_5659_a496_d78b, hash);
+//! ```
+//!
+//! ### When the data is streaming
+//!
+//! ```rust
+//! use std::hash::Hasher as _;
+//! use twox_hash::XxHash64;
+//!
+//! let seed = 1234;
+//! let mut hasher = XxHash64::with_seed(seed);
+//! hasher.write(b"some");
+//! hasher.write(b" ");
+//! hasher.write(b"bytes");
+//! let hash = hasher.finish();
+//! assert_eq!(0xeab5_5659_a496_d78b, hash);
+//! ```
+//!
+//! ## In a [`HashMap`](std::collections::HashMap)
+//!
+//! ### With a default seed
+//!
+//! ```rust
+//! use std::{collections::HashMap, hash::BuildHasherDefault};
+//! use twox_hash::XxHash64;
+//!
+//! let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
 //! hash.insert(42, "the answer");
 //! assert_eq!(hash.get(&42), Some(&"the answer"));
 //! ```
@@ -18,104 +46,98 @@
 //!
 //! ```rust
 //! use std::collections::HashMap;
-//! use twox_hash::RandomXxHashBuilder64;
+//! use twox_hash::xxhash64;
+//!
+//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
+//! hash.insert(42, "the answer");
+//! assert_eq!(hash.get(&42), Some(&"the answer"));
+//! ```
+//!
+//! ### With a fixed seed
+//!
+//! ```rust
+//! use std::collections::HashMap;
+//! use twox_hash::xxhash64;
 //!
-//! let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
+//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
 //! hash.insert(42, "the answer");
 //! assert_eq!(hash.get(&42), Some(&"the answer"));
 //! ```
 
-#![no_std]
+#![deny(rust_2018_idioms)]
+#![deny(missing_docs)]
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 
+#[cfg(feature = "alloc")]
 extern crate alloc;
 
-#[cfg(test)]
+#[cfg(any(feature = "std", doc, test))]
 extern crate std;
 
-use core::{marker::PhantomData, mem};
-
-mod sixty_four;
-mod thirty_two;
-pub mod xxh3;
-
-#[cfg(feature = "std")]
-mod std_support;
-#[cfg(feature = "std")]
-pub use std_support::sixty_four::RandomXxHashBuilder64;
-#[cfg(feature = "std")]
-pub use std_support::thirty_two::RandomXxHashBuilder32;
-#[cfg(feature = "std")]
-pub use std_support::xxh3::{
-    RandomHashBuilder128 as RandomXxh3HashBuilder128,
-    RandomHashBuilder64 as RandomXxh3HashBuilder64,
-};
-
-#[cfg(feature = "digest")]
-mod digest_support;
-
-#[cfg(feature = "digest_0_9")]
-mod digest_0_9_support;
-
-#[cfg(feature = "digest_0_10")]
-mod digest_0_10_support;
-
-pub use crate::sixty_four::XxHash64;
-pub use crate::thirty_two::XxHash32;
-pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64};
-
-/// A backwards compatibility type alias. Consider directly using
-/// `XxHash64` instead.
-pub type XxHash = XxHash64;
-
-#[cfg(feature = "std")]
-/// A backwards compatibility type alias. Consider directly using
-/// `RandomXxHashBuilder64` instead.
-pub type RandomXxHashBuilder = RandomXxHashBuilder64;
-
-/// An unaligned buffer with iteration support for `UnalignedItem`.
-struct UnalignedBuffer<'a, T> {
-    buf: &'a [u8],
-    phantom: PhantomData<T>,
+#[cfg(feature = "xxhash32")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
+pub mod xxhash32;
+
+#[cfg(feature = "xxhash32")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
+pub use xxhash32::Hasher as XxHash32;
+
+#[cfg(feature = "xxhash64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
+pub mod xxhash64;
+
+#[cfg(feature = "xxhash64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
+pub use xxhash64::Hasher as XxHash64;
+
+#[cfg(feature = "xxhash3_64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
+pub mod xxhash3_64;
+
+#[cfg(feature = "xxhash3_64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
+pub use xxhash3_64::Hasher as XxHash3_64;
+
+trait IntoU32 {
+    fn into_u32(self) -> u32;
 }
 
-/// Types implementing this trait must be transmutable from a `*const
-/// u8` to `*const Self` at any possible alignment.
-///
-/// The intent is to use this with only primitive integer types (and
-/// tightly-packed arrays of those integers).
-#[allow(clippy::missing_safety_doc)]
-unsafe trait UnalignedItem {}
-
-unsafe impl UnalignedItem for [u64; 4] {}
-unsafe impl UnalignedItem for [u32; 4] {}
-unsafe impl UnalignedItem for u64 {}
-unsafe impl UnalignedItem for u32 {}
-
-impl<'a, T: UnalignedItem> UnalignedBuffer<'a, T> {
-    #[inline]
-    fn new(buf: &'a [u8]) -> Self {
-        Self {
-            buf,
-            phantom: PhantomData,
-        }
+impl IntoU32 for u8 {
+    fn into_u32(self) -> u32 {
+        self.into()
     }
+}
 
-    #[inline]
-    fn remaining(&self) -> &[u8] {
-        self.buf
+trait IntoU64 {
+    fn into_u64(self) -> u64;
+}
+
+impl IntoU64 for u8 {
+    fn into_u64(self) -> u64 {
+        self.into()
+    }
+}
+
+impl IntoU64 for u32 {
+    fn into_u64(self) -> u64 {
+        self.into()
     }
 }
 
-impl<'a, T: UnalignedItem> Iterator for UnalignedBuffer<'a, T> {
-    type Item = T;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let size = mem::size_of::<T>();
-        self.buf.get(size..).map(|remaining| {
-            // `self.buf` has at least `size` bytes that can be read as `T`.
-            let result = unsafe { (self.buf.as_ptr() as *const T).read_unaligned() };
-            self.buf = remaining;
-            result
-        })
+#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+impl IntoU64 for usize {
+    fn into_u64(self) -> u64 {
+        self as u64
+    }
+}
+
+trait IntoU128 {
+    fn into_u128(self) -> u128;
+}
+
+impl IntoU128 for u64 {
+    fn into_u128(self) -> u128 {
+        u128::from(self)
     }
 }
diff --git a/src/sixty_four.rs b/src/sixty_four.rs
deleted file mode 100644
index c15158693..000000000
--- a/src/sixty_four.rs
+++ /dev/null
@@ -1,413 +0,0 @@
-use crate::UnalignedBuffer;
-use core::{cmp, hash::Hasher};
-
-#[cfg(feature = "serialize")]
-use serde::{Deserialize, Serialize};
-
-const CHUNK_SIZE: usize = 32;
-
-pub const PRIME_1: u64 = 11_400_714_785_074_694_791;
-pub const PRIME_2: u64 = 14_029_467_366_897_019_727;
-pub const PRIME_3: u64 = 1_609_587_929_392_839_161;
-pub const PRIME_4: u64 = 9_650_029_242_287_828_579;
-pub const PRIME_5: u64 = 2_870_177_450_012_600_261;
-
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Copy, Clone, PartialEq)]
-struct XxCore {
-    v1: u64,
-    v2: u64,
-    v3: u64,
-    v4: u64,
-}
-
-/// Calculates the 64-bit hash.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct XxHash64 {
-    total_len: u64,
-    seed: u64,
-    core: XxCore,
-    #[cfg_attr(feature = "serialize", serde(flatten))]
-    buffer: Buffer,
-}
-
-impl XxCore {
-    fn with_seed(seed: u64) -> XxCore {
-        XxCore {
-            v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
-            v2: seed.wrapping_add(PRIME_2),
-            v3: seed,
-            v4: seed.wrapping_sub(PRIME_1),
-        }
-    }
-
-    #[inline(always)]
-    fn ingest_chunks<I>(&mut self, values: I)
-    where
-        I: IntoIterator<Item = [u64; 4]>,
-    {
-        #[inline(always)]
-        fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 {
-            value = value.wrapping_mul(PRIME_2);
-            current_value = current_value.wrapping_add(value);
-            current_value = current_value.rotate_left(31);
-            current_value.wrapping_mul(PRIME_1)
-        }
-
-        // By drawing these out, we can avoid going back and forth to
-        // memory. It only really helps for large files, when we need
-        // to iterate multiple times here.
-
-        let mut v1 = self.v1;
-        let mut v2 = self.v2;
-        let mut v3 = self.v3;
-        let mut v4 = self.v4;
-
-        for [n1, n2, n3, n4] in values {
-            v1 = ingest_one_number(v1, n1.to_le());
-            v2 = ingest_one_number(v2, n2.to_le());
-            v3 = ingest_one_number(v3, n3.to_le());
-            v4 = ingest_one_number(v4, n4.to_le());
-        }
-
-        self.v1 = v1;
-        self.v2 = v2;
-        self.v3 = v3;
-        self.v4 = v4;
-    }
-
-    #[inline(always)]
-    fn finish(&self) -> u64 {
-        // The original code pulls out local vars for v[1234]
-        // here. Performance tests did not show that to be effective
-        // here, presumably because this method is not called in a
-        // tight loop.
-
-        #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
-        let mut hash;
-
-        hash = self.v1.rotate_left(1);
-        hash = hash.wrapping_add(self.v2.rotate_left(7));
-        hash = hash.wrapping_add(self.v3.rotate_left(12));
-        hash = hash.wrapping_add(self.v4.rotate_left(18));
-
-        #[inline(always)]
-        fn mix_one(mut hash: u64, mut value: u64) -> u64 {
-            value = value.wrapping_mul(PRIME_2);
-            value = value.rotate_left(31);
-            value = value.wrapping_mul(PRIME_1);
-            hash ^= value;
-            hash = hash.wrapping_mul(PRIME_1);
-            hash.wrapping_add(PRIME_4)
-        }
-
-        hash = mix_one(hash, self.v1);
-        hash = mix_one(hash, self.v2);
-        hash = mix_one(hash, self.v3);
-        hash = mix_one(hash, self.v4);
-
-        hash
-    }
-}
-
-impl core::fmt::Debug for XxCore {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
-        write!(
-            f,
-            "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
-            self.v1, self.v2, self.v3, self.v4
-        )
-    }
-}
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-#[repr(align(8))]
-#[cfg_attr(feature = "serialize", serde(transparent))]
-struct AlignToU64<T>(T);
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-struct Buffer {
-    #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
-    data: AlignToU64<[u8; CHUNK_SIZE]>,
-    #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
-    len: usize,
-}
-
-impl Buffer {
-    fn data(&self) -> &[u8] {
-        &self.data.0[..self.len]
-    }
-
-    /// Consumes as much of the parameter as it can, returning the unused part.
-    fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
-        let to_use = cmp::min(self.available(), data.len());
-        let (data, remaining) = data.split_at(to_use);
-        self.data.0[self.len..][..to_use].copy_from_slice(data);
-        self.len += to_use;
-        remaining
-    }
-
-    fn set_data(&mut self, data: &[u8]) {
-        debug_assert!(self.is_empty());
-        debug_assert!(data.len() < CHUNK_SIZE);
-        self.data.0[..data.len()].copy_from_slice(data);
-        self.len = data.len();
-    }
-
-    fn available(&self) -> usize {
-        CHUNK_SIZE - self.len
-    }
-
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    fn is_full(&self) -> bool {
-        self.len == CHUNK_SIZE
-    }
-}
-
-impl XxHash64 {
-    /// Constructs the hash with an initial seed
-    pub fn with_seed(seed: u64) -> XxHash64 {
-        XxHash64 {
-            total_len: 0,
-            seed,
-            core: XxCore::with_seed(seed),
-            buffer: Buffer::default(),
-        }
-    }
-
-    pub(crate) fn write(&mut self, bytes: &[u8]) {
-        let remaining = self.maybe_consume_bytes(bytes);
-        if !remaining.is_empty() {
-            let mut remaining = UnalignedBuffer::new(remaining);
-            self.core.ingest_chunks(&mut remaining);
-            self.buffer.set_data(remaining.remaining());
-        }
-        self.total_len += bytes.len() as u64;
-    }
-
-    // Consume bytes and try to make `self.buffer` empty.
-    // If there are not enough bytes, `self.buffer` can be non-empty, and this
-    // function returns an empty slice.
-    fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
-        if self.buffer.is_empty() {
-            data
-        } else {
-            let data = self.buffer.consume(data);
-            if self.buffer.is_full() {
-                let mut u64s = UnalignedBuffer::new(self.buffer.data());
-                self.core.ingest_chunks(&mut u64s);
-                debug_assert!(u64s.remaining().is_empty());
-                self.buffer.len = 0;
-            }
-            data
-        }
-    }
-
-    pub(crate) fn finish(&self) -> u64 {
-        let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
-            // We have processed at least one full chunk
-            self.core.finish()
-        } else {
-            self.seed.wrapping_add(PRIME_5)
-        };
-
-        hash = hash.wrapping_add(self.total_len);
-
-        let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data());
-        for buffered_u64 in &mut buffered_u64s {
-            let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2);
-            k1 = k1.rotate_left(31);
-            k1 = k1.wrapping_mul(PRIME_1);
-            hash ^= k1;
-            hash = hash.rotate_left(27);
-            hash = hash.wrapping_mul(PRIME_1);
-            hash = hash.wrapping_add(PRIME_4);
-        }
-
-        let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining());
-        for buffered_u32 in &mut buffered_u32s {
-            let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1);
-            hash ^= k1;
-            hash = hash.rotate_left(23);
-            hash = hash.wrapping_mul(PRIME_2);
-            hash = hash.wrapping_add(PRIME_3);
-        }
-
-        let buffered_u8s = buffered_u32s.remaining();
-        for &buffered_u8 in buffered_u8s {
-            let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5);
-            hash ^= k1;
-            hash = hash.rotate_left(11);
-            hash = hash.wrapping_mul(PRIME_1);
-        }
-
-        // The final intermixing
-        hash ^= hash >> 33;
-        hash = hash.wrapping_mul(PRIME_2);
-        hash ^= hash >> 29;
-        hash = hash.wrapping_mul(PRIME_3);
-        hash ^= hash >> 32;
-
-        hash
-    }
-
-    pub fn seed(&self) -> u64 {
-        self.seed
-    }
-
-    pub fn total_len(&self) -> u64 {
-        self.total_len
-    }
-}
-
-impl Default for XxHash64 {
-    fn default() -> XxHash64 {
-        XxHash64::with_seed(0)
-    }
-}
-
-impl Hasher for XxHash64 {
-    fn finish(&self) -> u64 {
-        XxHash64::finish(self)
-    }
-
-    fn write(&mut self, bytes: &[u8]) {
-        XxHash64::write(self, bytes)
-    }
-}
-
-#[cfg(feature = "std")]
-pub use crate::std_support::sixty_four::RandomXxHashBuilder64;
-
-#[cfg(test)]
-mod test {
-    use super::{RandomXxHashBuilder64, XxHash64};
-    use std::collections::HashMap;
-    use std::hash::BuildHasherDefault;
-    use std::prelude::v1::*;
-
-    #[test]
-    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
-        let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-        let mut byte_by_byte = XxHash64::with_seed(0);
-        for byte in bytes.chunks(1) {
-            byte_by_byte.write(byte);
-        }
-
-        let mut one_chunk = XxHash64::with_seed(0);
-        one_chunk.write(&bytes);
-
-        assert_eq!(byte_by_byte.core, one_chunk.core);
-    }
-
-    #[test]
-    fn hash_of_nothing_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&[]);
-        assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
-    }
-
-    #[test]
-    fn hash_of_single_byte_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&[42]);
-        assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
-    }
-
-    #[test]
-    fn hash_of_multiple_bytes_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
-    }
-
-    #[test]
-    fn hash_of_multiple_chunks_matches_c_implementation() {
-        let bytes: Vec<_> = (0..100).collect();
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(&bytes);
-        assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
-    }
-
-    #[test]
-    fn hash_with_different_seed_matches_c_implementation() {
-        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-        hasher.write(&[]);
-        assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
-    }
-
-    #[test]
-    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
-        let bytes: Vec<_> = (0..100).collect();
-        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
-        hasher.write(&bytes);
-        assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
-    }
-
-    #[test]
-    fn can_be_used_in_a_hashmap_with_a_default_seed() {
-        let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
-        hash.insert(42, "the answer");
-        assert_eq!(hash.get(&42), Some(&"the answer"));
-    }
-
-    #[test]
-    fn can_be_used_in_a_hashmap_with_a_random_seed() {
-        let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
-        hash.insert(42, "the answer");
-        assert_eq!(hash.get(&42), Some(&"the answer"));
-    }
-
-    #[cfg(feature = "serialize")]
-    type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
-
-    #[cfg(feature = "serialize")]
-    #[test]
-    fn test_serialization_cycle() -> TestResult {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        hasher.finish();
-
-        let serialized = serde_json::to_string(&hasher)?;
-        let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
-        assert_eq!(hasher, unserialized);
-        Ok(())
-    }
-
-    #[cfg(feature = "serialize")]
-    #[test]
-    fn test_serialization_stability() -> TestResult {
-        let mut hasher = XxHash64::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        hasher.finish();
-
-        let serialized = r#"{
-            "total_len": 14,
-            "seed": 0,
-            "core": {
-              "v1": 6983438078262162902,
-              "v2": 14029467366897019727,
-              "v3": 0,
-              "v4": 7046029288634856825
-            },
-            "buffer": [
-              72,  101, 108, 108, 111, 44, 32, 119,
-              111, 114, 108, 100, 33,  0,  0,  0,
-              0,   0,   0,   0,   0,   0,  0,  0,
-              0,   0,   0,   0,   0,   0,  0,  0
-            ],
-            "buffer_usage": 14
-        }"#;
-
-        let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
-        assert_eq!(hasher, unserialized);
-        Ok(())
-    }
-}
diff --git a/src/std_support.rs b/src/std_support.rs
deleted file mode 100644
index d79085e26..000000000
--- a/src/std_support.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-pub mod sixty_four {
-    use crate::XxHash64;
-    use core::hash::BuildHasher;
-    use rand::{self, Rng};
-
-    #[derive(Clone)]
-    /// Constructs a randomized seed and reuses it for multiple hasher instances.
-    pub struct RandomXxHashBuilder64(u64);
-
-    impl RandomXxHashBuilder64 {
-        fn new() -> RandomXxHashBuilder64 {
-            RandomXxHashBuilder64(rand::thread_rng().gen())
-        }
-    }
-
-    impl Default for RandomXxHashBuilder64 {
-        fn default() -> RandomXxHashBuilder64 {
-            RandomXxHashBuilder64::new()
-        }
-    }
-
-    impl BuildHasher for RandomXxHashBuilder64 {
-        type Hasher = XxHash64;
-
-        fn build_hasher(&self) -> XxHash64 {
-            XxHash64::with_seed(self.0)
-        }
-    }
-}
-
-pub mod thirty_two {
-    use crate::XxHash32;
-    use core::hash::BuildHasher;
-    use rand::{self, Rng};
-
-    #[derive(Clone)]
-    /// Constructs a randomized seed and reuses it for multiple hasher instances. See the usage warning on `XxHash32`.
-    pub struct RandomXxHashBuilder32(u32);
-
-    impl RandomXxHashBuilder32 {
-        fn new() -> RandomXxHashBuilder32 {
-            RandomXxHashBuilder32(rand::thread_rng().gen())
-        }
-    }
-
-    impl Default for RandomXxHashBuilder32 {
-        fn default() -> RandomXxHashBuilder32 {
-            RandomXxHashBuilder32::new()
-        }
-    }
-
-    impl BuildHasher for RandomXxHashBuilder32 {
-        type Hasher = XxHash32;
-
-        fn build_hasher(&self) -> XxHash32 {
-            XxHash32::with_seed(self.0)
-        }
-    }
-}
-
-pub mod xxh3 {
-    use crate::xxh3::{Hash128, Hash64};
-    use core::hash::BuildHasher;
-    use rand::{self, Rng};
-
-    #[derive(Clone)]
-    /// Constructs a randomized seed and reuses it for multiple hasher instances.
-    pub struct RandomHashBuilder64(u64);
-
-    impl RandomHashBuilder64 {
-        fn new() -> RandomHashBuilder64 {
-            RandomHashBuilder64(rand::thread_rng().gen())
-        }
-    }
-
-    impl Default for RandomHashBuilder64 {
-        fn default() -> RandomHashBuilder64 {
-            RandomHashBuilder64::new()
-        }
-    }
-
-    impl BuildHasher for RandomHashBuilder64 {
-        type Hasher = Hash64;
-
-        fn build_hasher(&self) -> Hash64 {
-            Hash64::with_seed(self.0)
-        }
-    }
-
-    #[derive(Clone)]
-    /// Constructs a randomized seed and reuses it for multiple hasher instances.
-    pub struct RandomHashBuilder128(u64);
-
-    impl RandomHashBuilder128 {
-        fn new() -> RandomHashBuilder128 {
-            RandomHashBuilder128(rand::thread_rng().gen())
-        }
-    }
-
-    impl Default for RandomHashBuilder128 {
-        fn default() -> RandomHashBuilder128 {
-            RandomHashBuilder128::new()
-        }
-    }
-
-    impl BuildHasher for RandomHashBuilder128 {
-        type Hasher = Hash128;
-
-        fn build_hasher(&self) -> Hash128 {
-            Hash128::with_seed(self.0)
-        }
-    }
-}
diff --git a/src/thirty_two.rs b/src/thirty_two.rs
deleted file mode 100644
index cfa44cdbc..000000000
--- a/src/thirty_two.rs
+++ /dev/null
@@ -1,416 +0,0 @@
-use crate::UnalignedBuffer;
-use core::{cmp, hash::Hasher};
-
-#[cfg(feature = "serialize")]
-use serde::{Deserialize, Serialize};
-
-const CHUNK_SIZE: usize = 16;
-
-pub const PRIME_1: u32 = 2_654_435_761;
-pub const PRIME_2: u32 = 2_246_822_519;
-pub const PRIME_3: u32 = 3_266_489_917;
-pub const PRIME_4: u32 = 668_265_263;
-pub const PRIME_5: u32 = 374_761_393;
-
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Copy, Clone, PartialEq)]
-struct XxCore {
-    v1: u32,
-    v2: u32,
-    v3: u32,
-    v4: u32,
-}
-
-/// Calculates the 32-bit hash. Care should be taken when using this
-/// hash.
-///
-/// Although this struct implements `Hasher`, it only calculates a
-/// 32-bit number, leaving the upper bits as 0. This means it is
-/// unlikely to be correct to use this in places like a `HashMap`.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct XxHash32 {
-    total_len: u64,
-    seed: u32,
-    core: XxCore,
-    #[cfg_attr(feature = "serialize", serde(flatten))]
-    buffer: Buffer,
-}
-
-impl XxCore {
-    fn with_seed(seed: u32) -> XxCore {
-        XxCore {
-            v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
-            v2: seed.wrapping_add(PRIME_2),
-            v3: seed,
-            v4: seed.wrapping_sub(PRIME_1),
-        }
-    }
-
-    #[inline(always)]
-    fn ingest_chunks<I>(&mut self, values: I)
-    where
-        I: IntoIterator<Item = [u32; 4]>,
-    {
-        #[inline(always)]
-        fn ingest_one_number(mut current_value: u32, mut value: u32) -> u32 {
-            value = value.wrapping_mul(PRIME_2);
-            current_value = current_value.wrapping_add(value);
-            current_value = current_value.rotate_left(13);
-            current_value.wrapping_mul(PRIME_1)
-        }
-
-        // By drawing these out, we can avoid going back and forth to
-        // memory. It only really helps for large files, when we need
-        // to iterate multiple times here.
-
-        let mut v1 = self.v1;
-        let mut v2 = self.v2;
-        let mut v3 = self.v3;
-        let mut v4 = self.v4;
-
-        for [n1, n2, n3, n4] in values {
-            v1 = ingest_one_number(v1, n1.to_le());
-            v2 = ingest_one_number(v2, n2.to_le());
-            v3 = ingest_one_number(v3, n3.to_le());
-            v4 = ingest_one_number(v4, n4.to_le());
-        }
-
-        self.v1 = v1;
-        self.v2 = v2;
-        self.v3 = v3;
-        self.v4 = v4;
-    }
-
-    #[inline(always)]
-    fn finish(&self) -> u32 {
-        // The original code pulls out local vars for v[1234]
-        // here. Performance tests did not show that to be effective
-        // here, presumably because this method is not called in a
-        // tight loop.
-
-        #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
-        let mut hash;
-
-        hash = self.v1.rotate_left(1);
-        hash = hash.wrapping_add(self.v2.rotate_left(7));
-        hash = hash.wrapping_add(self.v3.rotate_left(12));
-        hash = hash.wrapping_add(self.v4.rotate_left(18));
-
-        hash
-    }
-}
-
-impl core::fmt::Debug for XxCore {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
-        write!(
-            f,
-            "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
-            self.v1, self.v2, self.v3, self.v4
-        )
-    }
-}
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-#[repr(align(4))]
-#[cfg_attr(feature = "serialize", serde(transparent))]
-struct AlignToU32<T>(T);
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-struct Buffer {
-    #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
-    data: AlignToU32<[u8; CHUNK_SIZE]>,
-    #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
-    len: usize,
-}
-
-impl Buffer {
-    fn data(&self) -> &[u8] {
-        &self.data.0[..self.len]
-    }
-
-    /// Consumes as much of the parameter as it can, returning the unused part.
-    fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
-        let to_use = cmp::min(self.available(), data.len());
-        let (data, remaining) = data.split_at(to_use);
-        self.data.0[self.len..][..to_use].copy_from_slice(data);
-        self.len += to_use;
-        remaining
-    }
-
-    fn set_data(&mut self, data: &[u8]) {
-        debug_assert!(self.is_empty());
-        debug_assert!(data.len() < CHUNK_SIZE);
-        self.data.0[..data.len()].copy_from_slice(data);
-        self.len = data.len();
-    }
-
-    fn available(&self) -> usize {
-        CHUNK_SIZE - self.len
-    }
-
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    fn is_full(&self) -> bool {
-        self.len == CHUNK_SIZE
-    }
-}
-
-impl XxHash32 {
-    /// Constructs the hash with an initial seed
-    pub fn with_seed(seed: u32) -> XxHash32 {
-        XxHash32 {
-            total_len: 0,
-            seed,
-            core: XxCore::with_seed(seed),
-            buffer: Buffer::default(),
-        }
-    }
-
-    pub(crate) fn write(&mut self, bytes: &[u8]) {
-        let remaining = self.maybe_consume_bytes(bytes);
-        if !remaining.is_empty() {
-            let mut remaining = UnalignedBuffer::new(remaining);
-            self.core.ingest_chunks(&mut remaining);
-            self.buffer.set_data(remaining.remaining());
-        }
-        self.total_len += bytes.len() as u64;
-    }
-
-    // Consume bytes and try to make `self.buffer` empty.
-    // If there are not enough bytes, `self.buffer` can be non-empty, and this
-    // function returns an empty slice.
-    fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
-        if self.buffer.is_empty() {
-            data
-        } else {
-            let data = self.buffer.consume(data);
-            if self.buffer.is_full() {
-                let mut u32s = UnalignedBuffer::new(self.buffer.data());
-                self.core.ingest_chunks(&mut u32s);
-                debug_assert!(u32s.remaining().is_empty());
-                self.buffer.len = 0;
-            }
-            data
-        }
-    }
-
-    pub(crate) fn finish(&self) -> u32 {
-        let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
-            // We have processed at least one full chunk
-            self.core.finish()
-        } else {
-            self.seed.wrapping_add(PRIME_5)
-        };
-
-        hash = hash.wrapping_add(self.total_len as u32);
-
-        let mut buffered_u32s = UnalignedBuffer::<u32>::new(self.buffer.data());
-        for buffered_u32 in &mut buffered_u32s {
-            let k1 = buffered_u32.to_le().wrapping_mul(PRIME_3);
-            hash = hash.wrapping_add(k1);
-            hash = hash.rotate_left(17);
-            hash = hash.wrapping_mul(PRIME_4);
-        }
-
-        let buffered_u8s = buffered_u32s.remaining();
-        for &buffered_u8 in buffered_u8s {
-            let k1 = u32::from(buffered_u8).wrapping_mul(PRIME_5);
-            hash = hash.wrapping_add(k1);
-            hash = hash.rotate_left(11);
-            hash = hash.wrapping_mul(PRIME_1);
-        }
-
-        // The final intermixing
-        hash ^= hash >> 15;
-        hash = hash.wrapping_mul(PRIME_2);
-        hash ^= hash >> 13;
-        hash = hash.wrapping_mul(PRIME_3);
-        hash ^= hash >> 16;
-
-        hash
-    }
-
-    pub fn seed(&self) -> u32 {
-        self.seed
-    }
-
-    /// Get the total number of bytes hashed, truncated to 32 bits.
-    /// For the full 64-bit byte count, use `total_len_64`
-    pub fn total_len(&self) -> u32 {
-        self.total_len as u32
-    }
-
-    /// Get the total number of bytes hashed.
-    pub fn total_len_64(&self) -> u64 {
-        self.total_len
-    }
-}
-
-impl Default for XxHash32 {
-    fn default() -> XxHash32 {
-        XxHash32::with_seed(0)
-    }
-}
-
-impl Hasher for XxHash32 {
-    fn finish(&self) -> u64 {
-        u64::from(XxHash32::finish(self))
-    }
-
-    fn write(&mut self, bytes: &[u8]) {
-        XxHash32::write(self, bytes)
-    }
-}
-
-#[cfg(feature = "std")]
-pub use crate::std_support::thirty_two::RandomXxHashBuilder32;
-
-#[cfg(test)]
-mod test {
-    use super::{RandomXxHashBuilder32, XxHash32};
-    use std::collections::HashMap;
-    use std::hash::BuildHasherDefault;
-    use std::prelude::v1::*;
-
-    #[test]
-    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
-        let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
-        let mut byte_by_byte = XxHash32::with_seed(0);
-        for byte in bytes.chunks(1) {
-            byte_by_byte.write(byte);
-        }
-
-        let mut one_chunk = XxHash32::with_seed(0);
-        one_chunk.write(&bytes);
-
-        assert_eq!(byte_by_byte.core, one_chunk.core);
-    }
-
-    #[test]
-    fn hash_of_nothing_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0);
-        hasher.write(&[]);
-        assert_eq!(hasher.finish(), 0x02cc_5d05);
-    }
-
-    #[test]
-    fn hash_of_single_byte_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0);
-        hasher.write(&[42]);
-        assert_eq!(hasher.finish(), 0xe0fe_705f);
-    }
-
-    #[test]
-    fn hash_of_multiple_bytes_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        assert_eq!(hasher.finish(), 0x9e5e_7e93);
-    }
-
-    #[test]
-    fn hash_of_multiple_chunks_matches_c_implementation() {
-        let bytes: Vec<_> = (0..100).collect();
-        let mut hasher = XxHash32::with_seed(0);
-        hasher.write(&bytes);
-        assert_eq!(hasher.finish(), 0x7f89_ba44);
-    }
-
-    #[test]
-    fn hash_with_different_seed_matches_c_implementation() {
-        let mut hasher = XxHash32::with_seed(0x42c9_1977);
-        hasher.write(&[]);
-        assert_eq!(hasher.finish(), 0xd6bf_8459);
-    }
-
-    #[test]
-    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
-        let bytes: Vec<_> = (0..100).collect();
-        let mut hasher = XxHash32::with_seed(0x42c9_1977);
-        hasher.write(&bytes);
-        assert_eq!(hasher.finish(), 0x6d2f_6c17);
-    }
-
-    #[test]
-    fn can_be_used_in_a_hashmap_with_a_default_seed() {
-        let mut hash: HashMap<_, _, BuildHasherDefault<XxHash32>> = Default::default();
-        hash.insert(42, "the answer");
-        assert_eq!(hash.get(&42), Some(&"the answer"));
-    }
-
-    #[test]
-    fn can_be_used_in_a_hashmap_with_a_random_seed() {
-        let mut hash: HashMap<_, _, RandomXxHashBuilder32> = Default::default();
-        hash.insert(42, "the answer");
-        assert_eq!(hash.get(&42), Some(&"the answer"));
-    }
-
-    #[cfg(feature = "serialize")]
-    type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
-
-    #[cfg(feature = "serialize")]
-    #[test]
-    fn test_serialization_cycle() -> TestResult {
-        let mut hasher = XxHash32::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        hasher.finish();
-
-        let serialized = serde_json::to_string(&hasher)?;
-        let unserialized: XxHash32 = serde_json::from_str(&serialized)?;
-        assert_eq!(hasher, unserialized);
-        Ok(())
-    }
-
-    #[cfg(feature = "serialize")]
-    #[test]
-    fn test_serialization_stability() -> TestResult {
-        let mut hasher = XxHash32::with_seed(0);
-        hasher.write(b"Hello, world!\0");
-        hasher.finish();
-
-        let serialized = r#"{
-            "total_len": 14,
-            "seed": 0,
-            "core": {
-              "v1": 606290984,
-              "v2": 2246822519,
-              "v3": 0,
-              "v4": 1640531535
-            },
-            "buffer": [
-              72,  101, 108, 108, 111, 44, 32, 119,
-              111, 114, 108, 100, 33,  0,  0,  0
-            ],
-            "buffer_usage": 14
-        }"#;
-
-        let unserialized: XxHash32 = serde_json::from_str(serialized).unwrap();
-        assert_eq!(hasher, unserialized);
-        Ok(())
-    }
-
-    // This test validates wraparound/truncation behavior for very large inputs
-    // of a 32-bit hash, but runs very slowly in the normal "cargo test"
-    // build config since it hashes 4.3GB of data. It runs reasonably quick
-    // under "cargo test --release".
-    /*
-    #[test]
-    fn len_overflow_32bit() {
-        // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32.
-        let bytes200: Vec<u8> = (0..200).collect();
-        let mut hasher = XxHash32::with_seed(0);
-        for _ in 0..(4_300_000_000u64 / 200u64) {
-            hasher.write(&bytes200);
-        }
-        assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00);
-        assert_eq!(hasher.total_len(), 0x004c_cb00);
-        // retult is tested against the C implementation
-        assert_eq!(hasher.finish(), 0x1522_4ca7);
-    }
-    */
-}
diff --git a/src/xxh3.rs b/src/xxh3.rs
deleted file mode 100644
index 0ffc54189..000000000
--- a/src/xxh3.rs
+++ /dev/null
@@ -1,1666 +0,0 @@
-//! The in-progress XXH3 algorithm.
-//!
-//! Please read [the notes in original implementation][warning] to
-//! learn about when to use these algorithms. Specifically, the
-//! version of code this crate reproduces says:
-//!
-//! > The algorithm is currently in development, meaning its return
-//!   values might still change in future versions. However, the API
-//!   is stable, and can be used in production, typically for
-//!   generation of ephemeral hashes (produced and consumed in same
-//!   session).
-//!
-//! [warning]: https://github.com/Cyan4973/xxHash#new-hash-algorithms
-
-use alloc::vec::Vec;
-
-use core::convert::TryInto;
-use core::hash::Hasher;
-use core::mem;
-use core::ops::{Deref, DerefMut};
-use core::slice;
-
-#[cfg(target_arch = "x86")]
-use core::arch::x86::*;
-#[cfg(target_arch = "x86_64")]
-use core::arch::x86_64::*;
-
-use cfg_if::cfg_if;
-use static_assertions::{const_assert, const_assert_eq};
-
-#[cfg(feature = "serialize")]
-use serde::{Deserialize, Serialize};
-
-use crate::sixty_four::{
-    PRIME_1 as PRIME64_1, PRIME_2 as PRIME64_2, PRIME_3 as PRIME64_3, PRIME_4 as PRIME64_4,
-    PRIME_5 as PRIME64_5,
-};
-use crate::thirty_two::{PRIME_1 as PRIME32_1, PRIME_2 as PRIME32_2, PRIME_3 as PRIME32_3};
-
-#[cfg(feature = "std")]
-pub use crate::std_support::xxh3::{RandomHashBuilder128, RandomHashBuilder64};
-
-#[inline(always)]
-pub fn hash64(data: &[u8]) -> u64 {
-    hash64_with_seed(data, 0)
-}
-
-#[inline(always)]
-pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 {
-    let len = data.len();
-
-    if len <= 16 {
-        hash_len_0to16_64bits(data, len, &SECRET, seed)
-    } else if len <= 128 {
-        hash_len_17to128_64bits(data, len, &SECRET, seed)
-    } else if len <= MIDSIZE_MAX {
-        hash_len_129to240_64bits(data, len, &SECRET, seed)
-    } else {
-        hash_long_64bits_with_seed(data, len, seed)
-    }
-}
-
-#[inline(always)]
-pub fn hash64_with_secret(data: &[u8], secret: &[u8]) -> u64 {
-    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-    let len = data.len();
-
-    if len <= 16 {
-        hash_len_0to16_64bits(data, len, secret, 0)
-    } else if len <= 128 {
-        hash_len_17to128_64bits(data, len, secret, 0)
-    } else if len <= MIDSIZE_MAX {
-        hash_len_129to240_64bits(data, len, secret, 0)
-    } else {
-        hash_long_64bits_with_secret(data, len, secret)
-    }
-}
-
-#[inline(always)]
-pub fn hash128(data: &[u8]) -> u128 {
-    hash128_with_seed(data, 0)
-}
-
-#[inline(always)]
-pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 {
-    let len = data.len();
-
-    if len <= 16 {
-        hash_len_0to16_128bits(data, len, &SECRET, seed)
-    } else if len <= 128 {
-        hash_len_17to128_128bits(data, len, &SECRET, seed)
-    } else if len <= MIDSIZE_MAX {
-        hash_len_129to240_128bits(data, len, &SECRET, seed)
-    } else {
-        hash_long_128bits_with_seed(data, len, seed)
-    }
-}
-
-#[inline(always)]
-pub fn hash128_with_secret(data: &[u8], secret: &[u8]) -> u128 {
-    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-    let len = data.len();
-
-    if len <= 16 {
-        hash_len_0to16_128bits(data, len, secret, 0)
-    } else if len <= 128 {
-        hash_len_17to128_128bits(data, len, secret, 0)
-    } else if len <= MIDSIZE_MAX {
-        hash_len_129to240_128bits(data, len, secret, 0)
-    } else {
-        hash_long_128bits_with_secret(data, len, secret)
-    }
-}
-
-/// Calculates the 64-bit hash.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone, Default)]
-pub struct Hash64(State);
-
-impl Hash64 {
-    pub fn with_seed(seed: u64) -> Self {
-        Self(State::with_seed(seed))
-    }
-
-    pub fn with_secret<S: Into<Vec<u8>>>(secret: S) -> Self {
-        Self(State::with_secret(secret))
-    }
-}
-
-impl Hasher for Hash64 {
-    #[inline(always)]
-    fn finish(&self) -> u64 {
-        self.0.digest64()
-    }
-
-    #[inline(always)]
-    fn write(&mut self, bytes: &[u8]) {
-        self.0.update(bytes, AccWidth::Acc64Bits)
-    }
-}
-
-/// Calculates the 128-bit hash.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone, Default)]
-pub struct Hash128(State);
-
-impl Hash128 {
-    pub fn with_seed(seed: u64) -> Self {
-        Self(State::with_seed(seed))
-    }
-
-    pub fn with_secret<S: Into<Vec<u8>>>(secret: S) -> Self {
-        Self(State::with_secret(secret))
-    }
-}
-
-impl Hasher for Hash128 {
-    #[inline(always)]
-    fn finish(&self) -> u64 {
-        self.0.digest128() as u64
-    }
-
-    #[inline(always)]
-    fn write(&mut self, bytes: &[u8]) {
-        self.0.update(bytes, AccWidth::Acc128Bits)
-    }
-}
-
-pub trait HasherExt: Hasher {
-    fn finish_ext(&self) -> u128;
-}
-
-impl HasherExt for Hash128 {
-    #[inline(always)]
-    fn finish_ext(&self) -> u128 {
-        self.0.digest128()
-    }
-}
-
-/* ==========================================
- * XXH3 default settings
- * ========================================== */
-
-const SECRET_DEFAULT_SIZE: usize = 192;
-const SECRET_SIZE_MIN: usize = 136;
-
-const SECRET: Secret = Secret([
-    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
-    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
-    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
-    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
-    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
-    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
-    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
-    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
-    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
-    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
-    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
-    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
-]);
-
-#[repr(align(64))]
-#[derive(Clone)]
-struct Secret([u8; SECRET_DEFAULT_SIZE]);
-
-const_assert_eq!(mem::size_of::<Secret>() % 16, 0);
-
-impl Default for Secret {
-    #[inline(always)]
-    fn default() -> Self {
-        SECRET
-    }
-}
-
-impl Deref for Secret {
-    type Target = [u8];
-
-    #[inline(always)]
-    fn deref(&self) -> &Self::Target {
-        &self.0[..]
-    }
-}
-
-cfg_if! {
-    if #[cfg(feature = "serialize")] {
-        impl Serialize for Secret {
-            fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-            where
-                S: serde::Serializer,
-            {
-                serializer.serialize_bytes(self)
-            }
-        }
-
-        impl<'de> Deserialize<'de> for Secret {
-            fn deserialize<D>(deserializer: D) -> Result<Secret, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                deserializer.deserialize_bytes(SecretVisitor)
-            }
-        }
-
-        struct SecretVisitor;
-
-        impl<'de> serde::de::Visitor<'de> for SecretVisitor {
-            type Value = Secret;
-
-            fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
-                formatter.write_str("secret with a bytes array")
-            }
-
-            fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                if v.len() == SECRET_DEFAULT_SIZE {
-                    let mut secret = [0; SECRET_DEFAULT_SIZE];
-
-                    secret.copy_from_slice(v);
-
-                    Ok(Secret(secret))
-                } else {
-                    Err(E::custom("incomplete secret data"))
-                }
-            }
-        }
-    }
-}
-
-impl Secret {
-    #[inline(always)]
-    pub fn with_seed(seed: u64) -> Self {
-        let mut secret = [0; SECRET_DEFAULT_SIZE];
-
-        for off in (0..SECRET_DEFAULT_SIZE).step_by(16) {
-            secret[off..].write_u64_le(SECRET[off..].read_u64_le().wrapping_add(seed));
-            secret[off + 8..].write_u64_le(SECRET[off + 8..].read_u64_le().wrapping_sub(seed));
-        }
-
-        Secret(secret)
-    }
-}
-
-cfg_if! {
-    if #[cfg(target_feature = "avx2")] {
-        #[repr(align(32))]
-        #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-        #[derive(Clone)]
-        struct Acc([u64; ACC_NB]);
-    } else if #[cfg(target_feature = "sse2")] {
-        #[repr(align(16))]
-        #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-        #[derive(Clone)]
-        struct Acc([u64; ACC_NB]);
-    } else {
-        #[repr(align(8))]
-        #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-        #[derive(Clone)]
-        struct Acc([u64; ACC_NB]);
-    }
-}
-
-const ACC_SIZE: usize = mem::size_of::<Acc>();
-
-const_assert_eq!(ACC_SIZE, 64);
-
-impl Default for Acc {
-    #[inline(always)]
-    fn default() -> Self {
-        Acc([
-            u64::from(PRIME32_3),
-            PRIME64_1,
-            PRIME64_2,
-            PRIME64_3,
-            PRIME64_4,
-            u64::from(PRIME32_2),
-            PRIME64_5,
-            u64::from(PRIME32_1),
-        ])
-    }
-}
-
-impl Deref for Acc {
-    type Target = [u64];
-
-    #[inline(always)]
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl DerefMut for Acc {
-    #[inline(always)]
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-trait Buf {
-    fn read_u32_le(&self) -> u32;
-
-    fn read_u64_le(&self) -> u64;
-}
-
-trait BufMut {
-    fn write_u32_le(&mut self, n: u32);
-
-    fn write_u64_le(&mut self, n: u64);
-}
-
-impl Buf for [u8] {
-    #[inline(always)]
-    fn read_u32_le(&self) -> u32 {
-        let buf = &self[..mem::size_of::<u32>()];
-        u32::from_le_bytes(buf.try_into().unwrap())
-    }
-
-    #[inline(always)]
-    fn read_u64_le(&self) -> u64 {
-        let buf = &self[..mem::size_of::<u64>()];
-        u64::from_le_bytes(buf.try_into().unwrap())
-    }
-}
-
-impl BufMut for [u8] {
-    #[inline(always)]
-    fn write_u32_le(&mut self, n: u32) {
-        self[..mem::size_of::<u32>()].copy_from_slice(&n.to_le_bytes()[..]);
-    }
-
-    #[inline(always)]
-    fn write_u64_le(&mut self, n: u64) {
-        self[..mem::size_of::<u64>()].copy_from_slice(&n.to_le_bytes()[..]);
-    }
-}
-
-/* ==========================================
- * Short keys
- * ========================================== */
-
-#[inline(always)]
-fn hash_len_0to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
-    debug_assert!(len <= 16);
-
-    if len > 8 {
-        hash_len_9to16_64bits(data, len, key, seed)
-    } else if len >= 4 {
-        hash_len_4to8_64bits(data, len, key, seed)
-    } else if len > 0 {
-        hash_len_1to3_64bits(data, len, key, seed)
-    } else {
-        0
-    }
-}
-
-#[inline(always)]
-fn hash_len_9to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
-    debug_assert!((9..=16).contains(&len));
-
-    let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed);
-    let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed);
-    let acc = (len as u64)
-        .wrapping_add(ll1)
-        .wrapping_add(ll2)
-        .wrapping_add(mul128_fold64(ll1, ll2));
-
-    avalanche(acc)
-}
-
-#[inline(always)]
-fn hash_len_4to8_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
-    debug_assert!((4..=8).contains(&len));
-
-    let in1 = u64::from(data.read_u32_le());
-    let in2 = u64::from(data[len - 4..].read_u32_le());
-    let in64 = in1.wrapping_add(in2 << 32);
-    let keyed = in64 ^ key.read_u64_le().wrapping_add(seed);
-    let mix64 =
-        (len as u64).wrapping_add((keyed ^ (keyed >> 51)).wrapping_mul(u64::from(PRIME32_1)));
-
-    avalanche((mix64 ^ (mix64 >> 47)).wrapping_mul(PRIME64_2))
-}
-
-#[inline(always)]
-fn hash_len_1to3_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
-    debug_assert!((1..=3).contains(&len));
-
-    let c1 = u32::from(data[0]);
-    let c2 = u32::from(data[len >> 1]);
-    let c3 = u32::from(data[len - 1]);
-    let combined = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24);
-    let keyed = u64::from(combined) ^ u64::from(key.read_u32_le()).wrapping_add(seed);
-    let mixed = keyed.wrapping_mul(PRIME64_1);
-
-    avalanche(mixed)
-}
-
-#[inline(always)]
-fn hash_len_17to128_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 {
-    debug_assert!((17..=128).contains(&len));
-    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-    let mut acc = PRIME64_1.wrapping_mul(len as u64);
-
-    if len > 32 {
-        if len > 64 {
-            if len > 96 {
-                acc = acc
-                    .wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed))
-                    .wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed));
-            }
-            acc = acc
-                .wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed))
-                .wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed));
-        }
-
-        acc = acc
-            .wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed))
-            .wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed));
-    }
-
-    acc = acc
-        .wrapping_add(mix_16bytes(data, secret, seed))
-        .wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed));
-
-    avalanche(acc)
-}
-
-const MIDSIZE_MAX: usize = 240;
-const MIDSIZE_STARTOFFSET: usize = 3;
-const MIDSIZE_LASTOFFSET: usize = 17;
-
-#[inline(always)]
-fn hash_len_129to240_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 {
-    debug_assert!((129..=MIDSIZE_MAX).contains(&len));
-    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-    let acc = (len as u64).wrapping_mul(PRIME64_1);
-    let acc = (0..8).fold(acc, |acc, i| {
-        acc.wrapping_add(mix_16bytes(&data[16 * i..], &secret[16 * i..], seed))
-    });
-    let acc = avalanche(acc);
-
-    let nb_rounds = len / 16;
-    debug_assert!(nb_rounds >= 8);
-
-    let acc = (8..nb_rounds).fold(acc, |acc, i| {
-        acc.wrapping_add(mix_16bytes(
-            &data[16 * i..],
-            &secret[16 * (i - 8) + MIDSIZE_STARTOFFSET..],
-            seed,
-        ))
-    });
-
-    avalanche(acc.wrapping_add(mix_16bytes(
-        &data[len - 16..],
-        &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..],
-        seed,
-    )))
-}
-
-/* ==========================================
- * Long keys
- * ========================================== */
-
-const STRIPE_LEN: usize = 64;
-const SECRET_CONSUME_RATE: usize = 8; // nb of secret bytes consumed at each accumulation
-const SECRET_MERGEACCS_START: usize = 11; // do not align on 8, so that secret is different from accumulator
-const SECRET_LASTACC_START: usize = 7; // do not align on 8, so that secret is different from scrambler
-const ACC_NB: usize = STRIPE_LEN / mem::size_of::<u64>();
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub(crate) enum AccWidth {
-    Acc64Bits,
-    Acc128Bits,
-}
-
-#[inline(always)]
-fn hash_long_64bits_with_default_secret(data: &[u8], len: usize) -> u64 {
-    hash_long_internal(data, len, &SECRET)
-}
-
-#[inline(always)]
-fn hash_long_64bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u64 {
-    hash_long_internal(data, len, secret)
-}
-
-/// Generate a custom key, based on alteration of default kSecret with the seed,
-/// and then use this key for long mode hashing.
-///
-/// This operation is decently fast but nonetheless costs a little bit of time.
-/// Try to avoid it whenever possible (typically when `seed.is_none()`).
-#[inline(always)]
-fn hash_long_64bits_with_seed(data: &[u8], len: usize, seed: u64) -> u64 {
-    if seed == 0 {
-        hash_long_64bits_with_default_secret(data, len)
-    } else {
-        let secret = Secret::with_seed(seed);
-
-        hash_long_internal(data, len, &secret)
-    }
-}
-
-#[inline(always)]
-fn hash_long_internal(data: &[u8], len: usize, secret: &[u8]) -> u64 {
-    let mut acc = Acc::default();
-
-    hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc64Bits);
-
-    merge_accs(
-        &acc,
-        &secret[SECRET_MERGEACCS_START..],
-        (len as u64).wrapping_mul(PRIME64_1),
-    )
-}
-
-#[inline(always)]
-fn hash_long_internal_loop(
-    acc: &mut [u64],
-    data: &[u8],
-    len: usize,
-    secret: &[u8],
-    acc_width: AccWidth,
-) {
-    let secret_len = secret.len();
-    let nb_rounds = (secret_len - STRIPE_LEN) / SECRET_CONSUME_RATE;
-    let block_len = STRIPE_LEN * nb_rounds;
-
-    debug_assert!(secret_len >= SECRET_SIZE_MIN);
-
-    let mut chunks = data.chunks_exact(block_len);
-
-    for chunk in &mut chunks {
-        accumulate(acc, chunk, secret, nb_rounds, acc_width);
-        unsafe {
-            scramble_acc(acc, &secret[secret_len - STRIPE_LEN..]);
-        }
-    }
-
-    /* last partial block */
-    debug_assert!(len > STRIPE_LEN);
-
-    let nb_stripes = (len % block_len) / STRIPE_LEN;
-
-    debug_assert!(nb_stripes < (secret_len / SECRET_CONSUME_RATE));
-
-    accumulate(acc, chunks.remainder(), secret, nb_stripes, acc_width);
-
-    /* last stripe */
-    if (len & (STRIPE_LEN - 1)) != 0 {
-        unsafe {
-            accumulate512(
-                acc,
-                &data[len - STRIPE_LEN..],
-                &secret[secret_len - STRIPE_LEN - SECRET_LASTACC_START..],
-                acc_width,
-            );
-        }
-    }
-}
-
-#[inline(always)]
-fn accumulate(acc: &mut [u64], data: &[u8], secret: &[u8], nb_stripes: usize, acc_width: AccWidth) {
-    for n in 0..nb_stripes {
-        unsafe {
-            accumulate512(
-                acc,
-                &data[n * STRIPE_LEN..],
-                &secret[n * SECRET_CONSUME_RATE..],
-                acc_width,
-            );
-        }
-    }
-}
-
-#[inline(always)]
-const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
-    ((z << 6) | (y << 4) | (x << 2) | w) as i32
-}
-
-#[cfg(target_feature = "avx2")]
-mod avx2 {
-    use super::*;
-
-    #[target_feature(enable = "avx2")]
-    pub(crate) unsafe fn accumulate512(
-        acc: &mut [u64],
-        data: &[u8],
-        keys: &[u8],
-        acc_width: AccWidth,
-    ) {
-        let xacc = acc.as_mut_ptr() as *mut __m256i;
-        let xdata = data.as_ptr() as *const __m256i;
-        let xkey = keys.as_ptr() as *const __m256i;
-
-        for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() {
-            let d = _mm256_loadu_si256(xdata.add(i));
-            let k = _mm256_loadu_si256(xkey.add(i));
-            let dk = _mm256_xor_si256(d, k); // uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...}
-            let mul = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...}
-
-            xacc.add(i).write(if acc_width == AccWidth::Acc128Bits {
-                let dswap = _mm256_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2));
-                let add = _mm256_add_epi64(xacc.add(i).read(), dswap);
-                _mm256_add_epi64(mul, add)
-            } else {
-                let add = _mm256_add_epi64(xacc.add(i).read(), d);
-                _mm256_add_epi64(mul, add)
-            })
-        }
-    }
-
-    #[target_feature(enable = "avx2")]
-    pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) {
-        let xacc = acc.as_mut_ptr() as *mut __m256i;
-        let xkey = key.as_ptr() as *const __m256i;
-        let prime32 = _mm256_set1_epi32(PRIME32_1 as i32);
-
-        for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() {
-            let data = xacc.add(i).read();
-            let shifted = _mm256_srli_epi64(data, 47);
-            let data = _mm256_xor_si256(data, shifted);
-
-            let k = _mm256_loadu_si256(xkey.add(i));
-            let dk = _mm256_xor_si256(data, k); /* U32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3} */
-            let dk1 = _mm256_mul_epu32(dk, prime32);
-
-            let d2 = _mm256_shuffle_epi32(dk, 0x31);
-            let dk2 = _mm256_mul_epu32(d2, prime32);
-            let dk2h = _mm256_slli_epi64(dk2, 32);
-
-            xacc.add(i).write(_mm256_add_epi64(dk1, dk2h));
-        }
-    }
-}
-
-#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
-mod sse2 {
-    use super::*;
-
-    #[target_feature(enable = "sse2")]
-    #[allow(clippy::cast_ptr_alignment)]
-    pub(crate) unsafe fn accumulate512(
-        acc: &mut [u64],
-        data: &[u8],
-        keys: &[u8],
-        acc_width: AccWidth,
-    ) {
-        let xacc = acc.as_mut_ptr() as *mut __m128i;
-        let xdata = data.as_ptr() as *const __m128i;
-        let xkey = keys.as_ptr() as *const __m128i;
-
-        for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() {
-            let d = _mm_loadu_si128(xdata.add(i));
-            let k = _mm_loadu_si128(xkey.add(i));
-            let dk = _mm_xor_si128(d, k); // uint32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3} */
-            let mul = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */
-            xacc.add(i).write(if acc_width == AccWidth::Acc128Bits {
-                let dswap = _mm_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2));
-                let add = _mm_add_epi64(xacc.add(i).read(), dswap);
-                _mm_add_epi64(mul, add)
-            } else {
-                let add = _mm_add_epi64(xacc.add(i).read(), d);
-                _mm_add_epi64(mul, add)
-            })
-        }
-    }
-
-    #[target_feature(enable = "sse2")]
-    #[allow(clippy::cast_ptr_alignment)]
-    pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) {
-        let xacc = acc.as_mut_ptr() as *mut __m128i;
-        let xkey = key.as_ptr() as *const __m128i;
-        let prime32 = _mm_set1_epi32(PRIME32_1 as i32);
-
-        for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() {
-            let data = xacc.add(i).read();
-            let shifted = _mm_srli_epi64(data, 47);
-            let data = _mm_xor_si128(data, shifted);
-
-            let k = _mm_loadu_si128(xkey.add(i));
-            let dk = _mm_xor_si128(data, k);
-
-            let dk1 = _mm_mul_epu32(dk, prime32);
-
-            let d2 = _mm_shuffle_epi32(dk, 0x31);
-            let dk2 = _mm_mul_epu32(d2, prime32);
-            let dk2h = _mm_slli_epi64(dk2, 32);
-
-            xacc.add(i).write(_mm_add_epi64(dk1, dk2h));
-        }
-    }
-}
-
-#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))]
-mod generic {
-    use super::*;
-
-    #[inline(always)]
-    pub(crate) unsafe fn accumulate512(
-        acc: &mut [u64],
-        data: &[u8],
-        key: &[u8],
-        acc_width: AccWidth,
-    ) {
-        for i in (0..ACC_NB).step_by(2) {
-            let in1 = data[8 * i..].read_u64_le();
-            let in2 = data[8 * (i + 1)..].read_u64_le();
-            let key1 = key[8 * i..].read_u64_le();
-            let key2 = key[8 * (i + 1)..].read_u64_le();
-            let data_key1 = key1 ^ in1;
-            let data_key2 = key2 ^ in2;
-            acc[i] = acc[i].wrapping_add(mul32_to64(data_key1, data_key1 >> 32));
-            acc[i + 1] = acc[i + 1].wrapping_add(mul32_to64(data_key2, data_key2 >> 32));
-
-            if acc_width == AccWidth::Acc128Bits {
-                acc[i] = acc[i].wrapping_add(in2);
-                acc[i + 1] = acc[i + 1].wrapping_add(in1);
-            } else {
-                acc[i] = acc[i].wrapping_add(in1);
-                acc[i + 1] = acc[i + 1].wrapping_add(in2);
-            }
-        }
-    }
-
-    #[inline(always)]
-    fn mul32_to64(a: u64, b: u64) -> u64 {
-        (a & 0xFFFFFFFF).wrapping_mul(b & 0xFFFFFFFF)
-    }
-
-    #[inline(always)]
-    pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) {
-        for i in 0..ACC_NB {
-            let key64 = key[8 * i..].read_u64_le();
-            let mut acc64 = acc[i];
-            acc64 ^= acc64 >> 47;
-            acc64 ^= key64;
-            acc64 = acc64.wrapping_mul(u64::from(PRIME32_1));
-            acc[i] = acc64;
-        }
-    }
-}
-
-cfg_if! {
-    if #[cfg(target_feature = "avx2")] {
-        use avx2::{accumulate512, scramble_acc};
-    } else if #[cfg(target_feature = "sse2")] {
-        use sse2::{accumulate512, scramble_acc};
-    } else {
-        use generic::{accumulate512, scramble_acc};
-    }
-}
-
-#[inline(always)]
-fn merge_accs(acc: &[u64], secret: &[u8], start: u64) -> u64 {
-    avalanche(
-        start
-            .wrapping_add(mix2accs(acc, secret))
-            .wrapping_add(mix2accs(&acc[2..], &secret[16..]))
-            .wrapping_add(mix2accs(&acc[4..], &secret[32..]))
-            .wrapping_add(mix2accs(&acc[6..], &secret[48..])),
-    )
-}
-
-#[inline(always)]
-fn mix2accs(acc: &[u64], secret: &[u8]) -> u64 {
-    mul128_fold64(
-        acc[0] ^ secret.read_u64_le(),
-        acc[1] ^ secret[8..].read_u64_le(),
-    )
-}
-
-#[inline(always)]
-fn mix_16bytes(data: &[u8], key: &[u8], seed: u64) -> u64 {
-    let ll1 = data.read_u64_le();
-    let ll2 = data[8..].read_u64_le();
-
-    mul128_fold64(
-        ll1 ^ key.read_u64_le().wrapping_add(seed),
-        ll2 ^ key[8..].read_u64_le().wrapping_sub(seed),
-    )
-}
-
-#[inline(always)]
-fn mul128_fold64(ll1: u64, ll2: u64) -> u64 {
-    let lll = u128::from(ll1).wrapping_mul(u128::from(ll2));
-
-    (lll as u64) ^ ((lll >> 64) as u64)
-}
-
-#[inline(always)]
-fn avalanche(mut h64: u64) -> u64 {
-    h64 ^= h64 >> 37;
-    h64 = h64.wrapping_mul(PRIME64_3);
-    h64 ^ (h64 >> 32)
-}
-
-/* ===   XXH3 streaming   === */
-
-const INTERNAL_BUFFER_SIZE: usize = 256;
-const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN;
-
-const_assert!(INTERNAL_BUFFER_SIZE >= MIDSIZE_MAX);
-const_assert_eq!(INTERNAL_BUFFER_SIZE % STRIPE_LEN, 0);
-
-#[repr(align(64))]
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone)]
-struct State {
-    acc: Acc,
-    secret: With,
-    buf: Vec<u8>,
-    seed: u64,
-    total_len: usize,
-    nb_stripes_so_far: usize,
-}
-
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone)]
-enum With {
-    Default(Secret),
-    Custom(Secret),
-    Ref(Vec<u8>),
-}
-
-impl Deref for With {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        match self {
-            With::Default(secret) | With::Custom(secret) => &secret.0[..],
-            With::Ref(secret) => secret,
-        }
-    }
-}
-
-impl Default for State {
-    fn default() -> Self {
-        Self::new(0, With::Default(Secret::default()))
-    }
-}
-
-impl State {
-    fn new(seed: u64, secret: With) -> Self {
-        State {
-            acc: Acc::default(),
-            secret,
-            buf: Vec::with_capacity(INTERNAL_BUFFER_SIZE),
-            seed,
-            total_len: 0,
-            nb_stripes_so_far: 0,
-        }
-    }
-
-    fn with_seed(seed: u64) -> Self {
-        Self::new(seed, With::Custom(Secret::with_seed(seed)))
-    }
-
-    fn with_secret<S: Into<Vec<u8>>>(secret: S) -> State {
-        let secret = secret.into();
-
-        debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-        Self::new(0, With::Ref(secret))
-    }
-
-    #[inline(always)]
-    fn secret_limit(&self) -> usize {
-        self.secret.len() - STRIPE_LEN
-    }
-
-    #[inline(always)]
-    fn nb_stripes_per_block(&self) -> usize {
-        self.secret_limit() / SECRET_CONSUME_RATE
-    }
-
-    #[inline(always)]
-    fn update(&mut self, mut input: &[u8], acc_width: AccWidth) {
-        let len = input.len();
-
-        if len == 0 {
-            return;
-        }
-
-        self.total_len += len;
-
-        if self.buf.len() + len <= self.buf.capacity() {
-            self.buf.extend_from_slice(input);
-            return;
-        }
-
-        let nb_stripes_per_block = self.nb_stripes_per_block();
-        let secret_limit = self.secret_limit();
-
-        if !self.buf.is_empty() {
-            // some data within internal buffer: fill then consume it
-            let (load, rest) = input.split_at(self.buf.capacity() - self.buf.len());
-            self.buf.extend_from_slice(load);
-            input = rest;
-            self.nb_stripes_so_far = consume_stripes(
-                &mut self.acc,
-                self.nb_stripes_so_far,
-                nb_stripes_per_block,
-                &self.buf,
-                INTERNAL_BUFFER_STRIPES,
-                &self.secret,
-                secret_limit,
-                acc_width,
-            );
-            self.buf.clear();
-        }
-
-        // consume input by full buffer quantities
-        let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE);
-
-        for chunk in &mut chunks {
-            self.nb_stripes_so_far = consume_stripes(
-                &mut self.acc,
-                self.nb_stripes_so_far,
-                nb_stripes_per_block,
-                chunk,
-                INTERNAL_BUFFER_STRIPES,
-                &self.secret,
-                secret_limit,
-                acc_width,
-            );
-        }
-
-        // some remaining input data : buffer it
-        self.buf.extend_from_slice(chunks.remainder())
-    }
-
-    #[inline(always)]
-    fn digest_long(&self, acc_width: AccWidth) -> Acc {
-        let mut acc = self.acc.clone();
-        let secret_limit = self.secret_limit();
-
-        if self.buf.len() >= STRIPE_LEN {
-            // digest locally, state remains unaltered, and can continue ingesting more data afterwards
-            let total_nb_stripes = self.buf.len() / STRIPE_LEN;
-            let _nb_stripes_so_far = consume_stripes(
-                &mut acc,
-                self.nb_stripes_so_far,
-                self.nb_stripes_per_block(),
-                &self.buf,
-                total_nb_stripes,
-                &self.secret,
-                secret_limit,
-                acc_width,
-            );
-            if (self.buf.len() % STRIPE_LEN) != 0 {
-                unsafe {
-                    accumulate512(
-                        &mut acc,
-                        &self.buf[self.buf.len() - STRIPE_LEN..],
-                        &self.secret[secret_limit - SECRET_LASTACC_START..],
-                        acc_width,
-                    );
-                }
-            }
-        } else if !self.buf.is_empty() {
-            // one last stripe
-            let mut last_stripe = [0u8; STRIPE_LEN];
-            let catchup_size = STRIPE_LEN - self.buf.len();
-
-            last_stripe[..catchup_size].copy_from_slice(unsafe {
-                slice::from_raw_parts(
-                    self.buf.as_ptr().add(self.buf.capacity() - catchup_size),
-                    catchup_size,
-                )
-            });
-            last_stripe[catchup_size..].copy_from_slice(&self.buf);
-
-            unsafe {
-                accumulate512(
-                    &mut acc,
-                    &last_stripe[..],
-                    &self.secret[secret_limit - SECRET_LASTACC_START..],
-                    acc_width,
-                );
-            }
-        }
-
-        acc
-    }
-
-    #[inline(always)]
-    fn digest64(&self) -> u64 {
-        if self.total_len > MIDSIZE_MAX {
-            let acc = self.digest_long(AccWidth::Acc64Bits);
-
-            merge_accs(
-                &acc,
-                &self.secret[SECRET_MERGEACCS_START..],
-                (self.total_len as u64).wrapping_mul(PRIME64_1),
-            )
-        } else if self.seed != 0 {
-            hash64_with_seed(&self.buf, self.seed)
-        } else {
-            hash64_with_secret(&self.buf, &self.secret[..self.secret_limit() + STRIPE_LEN])
-        }
-    }
-
-    #[inline(always)]
-    fn digest128(&self) -> u128 {
-        let secret_limit = self.secret_limit();
-
-        if self.total_len > MIDSIZE_MAX {
-            let acc = self.digest_long(AccWidth::Acc128Bits);
-
-            debug_assert!(secret_limit + STRIPE_LEN >= ACC_SIZE + SECRET_MERGEACCS_START);
-
-            let total_len = self.total_len as u64;
-
-            let low64 = merge_accs(
-                &acc,
-                &self.secret[SECRET_MERGEACCS_START..],
-                total_len.wrapping_mul(PRIME64_1),
-            );
-            let high64 = merge_accs(
-                &acc,
-                &self.secret[secret_limit + STRIPE_LEN - ACC_SIZE - SECRET_MERGEACCS_START..],
-                !total_len.wrapping_mul(PRIME64_2),
-            );
-
-            u128::from(low64) + (u128::from(high64) << 64)
-        } else if self.seed != 0 {
-            hash128_with_seed(&self.buf, self.seed)
-        } else {
-            hash128_with_secret(&self.buf, &self.secret[..secret_limit + STRIPE_LEN])
-        }
-    }
-}
-
-#[inline(always)]
-#[allow(clippy::too_many_arguments)]
-fn consume_stripes(
-    acc: &mut [u64],
-    nb_stripes_so_far: usize,
-    nb_stripes_per_block: usize,
-    data: &[u8],
-    total_stripes: usize,
-    secret: &[u8],
-    secret_limit: usize,
-    acc_width: AccWidth,
-) -> usize {
-    debug_assert!(nb_stripes_so_far < nb_stripes_per_block);
-
-    if nb_stripes_per_block - nb_stripes_so_far <= total_stripes {
-        let nb_stripes = nb_stripes_per_block - nb_stripes_so_far;
-
-        accumulate(
-            acc,
-            data,
-            &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..],
-            nb_stripes,
-            acc_width,
-        );
-        unsafe {
-            scramble_acc(acc, &secret[secret_limit..]);
-        }
-        accumulate(
-            acc,
-            &data[nb_stripes * STRIPE_LEN..],
-            secret,
-            total_stripes - nb_stripes,
-            acc_width,
-        );
-
-        total_stripes - nb_stripes
-    } else {
-        accumulate(
-            acc,
-            data,
-            &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..],
-            total_stripes,
-            acc_width,
-        );
-
-        nb_stripes_so_far + total_stripes
-    }
-}
-
-/* ==========================================
- * XXH3 128 bits (=> XXH128)
- * ========================================== */
-
-#[inline(always)]
-fn hash_len_0to16_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 {
-    debug_assert!(len <= 16);
-
-    if len > 8 {
-        hash_len_9to16_128bits(data, len, secret, seed)
-    } else if len >= 4 {
-        hash_len_4to8_128bits(data, len, secret, seed)
-    } else if len > 0 {
-        hash_len_1to3_128bits(data, len, secret, seed)
-    } else {
-        0
-    }
-}
-
-#[inline(always)]
-fn hash_len_1to3_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 {
-    debug_assert!((1..=3).contains(&len));
-
-    let c1 = u32::from(data[0]);
-    let c2 = u32::from(data[len >> 1]);
-    let c3 = u32::from(data[len - 1]);
-    let combinedl = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24);
-    let combinedh = combinedl.swap_bytes();
-    let keyedl = u64::from(combinedl) ^ u64::from(key.read_u32_le()).wrapping_add(seed);
-    let keyedh = u64::from(combinedh) ^ u64::from(key[4..].read_u32_le()).wrapping_sub(seed);
-    let mixedl = keyedl.wrapping_mul(PRIME64_1);
-    let mixedh = keyedh.wrapping_mul(PRIME64_2);
-
-    u128::from(avalanche(mixedl)) + (u128::from(avalanche(mixedh)) << 64)
-}
-
-#[inline(always)]
-fn hash_len_4to8_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 {
-    debug_assert!((4..=8).contains(&len));
-
-    let in1 = u64::from(data.read_u32_le());
-    let in2 = u64::from(data[len - 4..].read_u32_le());
-    let in64l = in1.wrapping_add(in2 << 32);
-    let in64h = in64l.swap_bytes();
-    let keyedl = in64l ^ key.read_u64_le().wrapping_add(seed);
-    let keyedh = in64h ^ key[8..].read_u64_le().wrapping_sub(seed);
-    let mix64l1 =
-        (len as u64).wrapping_add((keyedl ^ (keyedl >> 51)).wrapping_mul(u64::from(PRIME32_1)));
-    let mix64l2 = (mix64l1 ^ (mix64l1 >> 47)).wrapping_mul(PRIME64_2);
-    let mix64h1 = (keyedh ^ (keyedh >> 47))
-        .wrapping_mul(PRIME64_1)
-        .wrapping_sub(len as u64);
-    let mix64h2 = (mix64h1 ^ (mix64h1 >> 43)).wrapping_mul(PRIME64_4);
-
-    u128::from(avalanche(mix64l2)) + (u128::from(avalanche(mix64h2)) << 64)
-}
-
-#[inline(always)]
-fn hash_len_9to16_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 {
-    debug_assert!((9..=16).contains(&len));
-
-    let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed);
-    let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed);
-    let inlow = ll1 ^ ll2;
-
-    let m128 = u128::from(inlow).wrapping_mul(u128::from(PRIME64_1));
-    let high64 = ((m128 >> 64) as u64).wrapping_add(ll2.wrapping_mul(PRIME64_1));
-    let low64 = (m128 as u64) ^ (high64 >> 32);
-
-    let h128 = u128::from(low64).wrapping_mul(u128::from(PRIME64_2));
-    let high64 = ((h128 >> 64) as u64).wrapping_add(high64.wrapping_mul(PRIME64_2));
-    let low64 = h128 as u64;
-
-    u128::from(avalanche(low64)) + (u128::from(avalanche(high64)) << 64)
-}
-
-#[inline(always)]
-fn hash_len_17to128_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 {
-    debug_assert!((17..=128).contains(&len));
-    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-    let mut acc1 = PRIME64_1.wrapping_mul(len as u64);
-    let mut acc2 = 0u64;
-
-    if len > 32 {
-        if len > 64 {
-            if len > 96 {
-                acc1 = acc1.wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed));
-                acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed));
-            }
-            acc1 = acc1.wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed));
-            acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed));
-        }
-
-        acc1 = acc1.wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed));
-        acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed));
-    }
-
-    acc1 = acc1.wrapping_add(mix_16bytes(data, secret, seed));
-    acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed));
-
-    let low64 = acc1.wrapping_add(acc2);
-    let high64 = acc1
-        .wrapping_mul(PRIME64_1)
-        .wrapping_add(acc2.wrapping_mul(PRIME64_4))
-        .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2));
-
-    u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64)
-}
-
-#[inline(always)]
-fn hash_len_129to240_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 {
-    debug_assert!((129..=MIDSIZE_MAX).contains(&len));
-    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
-    let acc1 = (len as u64).wrapping_mul(PRIME64_1);
-    let acc2 = 0u64;
-
-    let (acc1, acc2) = (0..4).fold((acc1, acc2), |(acc1, acc2), i| {
-        (
-            acc1.wrapping_add(mix_16bytes(&data[32 * i..], &secret[32 * i..], seed)),
-            acc2.wrapping_add(mix_16bytes(
-                &data[32 * i + 16..],
-                &secret[32 * i + 16..],
-                0u64.wrapping_sub(seed),
-            )),
-        )
-    });
-    let acc1 = avalanche(acc1);
-    let acc2 = avalanche(acc2);
-
-    let nb_rounds = len / 32;
-    debug_assert!(nb_rounds >= 4);
-
-    let (acc1, acc2) = (4..nb_rounds).fold((acc1, acc2), |(acc1, acc2), i| {
-        (
-            acc1.wrapping_add(mix_16bytes(
-                &data[32 * i..],
-                &secret[32 * (i - 4) + MIDSIZE_STARTOFFSET..],
-                seed,
-            )),
-            acc2.wrapping_add(mix_16bytes(
-                &data[32 * i + 16..],
-                &secret[32 * (i - 4) + 16 + MIDSIZE_STARTOFFSET..],
-                0u64.wrapping_sub(seed),
-            )),
-        )
-    });
-
-    // last bytes
-    let acc1 = acc1.wrapping_add(mix_16bytes(
-        &data[len - 16..],
-        &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..],
-        seed,
-    ));
-    let acc2 = acc2.wrapping_add(mix_16bytes(
-        &data[len - 32..],
-        &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET - 16..],
-        0u64.wrapping_sub(seed),
-    ));
-
-    let low64 = acc1.wrapping_add(acc2);
-    let high64 = acc1
-        .wrapping_mul(PRIME64_1)
-        .wrapping_add(acc2.wrapping_mul(PRIME64_4))
-        .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2));
-
-    u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64)
-}
-
-#[inline]
-fn hash_long_128bits_with_default_secret(data: &[u8], len: usize) -> u128 {
-    hash_long_128bits_internal(data, len, &SECRET)
-}
-
-#[inline]
-fn hash_long_128bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u128 {
-    hash_long_128bits_internal(data, len, secret)
-}
-
-#[inline]
-fn hash_long_128bits_with_seed(data: &[u8], len: usize, seed: u64) -> u128 {
-    if seed == 0 {
-        hash_long_128bits_with_default_secret(data, len)
-    } else {
-        let secret = Secret::with_seed(seed);
-
-        hash_long_128bits_internal(data, len, &secret)
-    }
-}
-
-#[inline(always)]
-fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 {
-    let mut acc = Acc::default();
-
-    hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc128Bits);
-
-    debug_assert!(secret.len() >= acc.len() + SECRET_MERGEACCS_START);
-
-    let low64 = merge_accs(
-        &acc,
-        &secret[SECRET_MERGEACCS_START..],
-        (len as u64).wrapping_mul(PRIME64_1),
-    );
-    let high64 = merge_accs(
-        &acc,
-        &secret[secret.len() - ACC_SIZE - SECRET_MERGEACCS_START..],
-        !(len as u64).wrapping_mul(PRIME64_2),
-    );
-
-    u128::from(low64) + (u128::from(high64) << 64)
-}
-
-/* ===   XXH3 128-bit streaming   === */
-
-/* all the functions are actually the same as for 64-bit streaming variant,
-just the reset one is different (different initial acc values for 0,5,6,7),
-and near the end of the digest function */
-
-#[cfg(test)]
-mod tests {
-    use alloc::vec;
-
-    use super::*;
-
-    const PRIME: u64 = 2654435761;
-    const PRIME64: u64 = 11400714785074694797;
-    const SANITY_BUFFER_SIZE: usize = 2243;
-
-    fn sanity_buffer() -> [u8; SANITY_BUFFER_SIZE] {
-        let mut buf = [0; SANITY_BUFFER_SIZE];
-        let mut byte_gen: u64 = PRIME;
-
-        for b in buf.iter_mut() {
-            *b = (byte_gen >> 56) as u8;
-            byte_gen = byte_gen.wrapping_mul(PRIME64);
-        }
-
-        buf
-    }
-
-    #[test]
-    fn hash_64bits_sanity_check() {
-        let buf = sanity_buffer();
-
-        let test_cases = vec![
-            (&[][..], 0, 0), /* zero-length hash is always 0 */
-            (&[][..], PRIME64, 0),
-            (&buf[..1], 0, 0x7198D737CFE7F386),       /*  1 -  3 */
-            (&buf[..1], PRIME64, 0xB70252DB7161C2BD), /*  1 -  3 */
-            (&buf[..6], 0, 0x22CBF5F3E1F6257C),       /*  4 -  8 */
-            (&buf[..6], PRIME64, 0x6398631C12AB94CE), /*  4 -  8 */
-            (&buf[..12], 0, 0xD5361CCEEBB5A0CC),      /*  9 - 16 */
-            (&buf[..12], PRIME64, 0xC4C125E75A808C3D), /*  9 - 16 */
-            (&buf[..24], 0, 0x46796F3F78B20F6B),      /* 17 - 32 */
-            (&buf[..24], PRIME64, 0x60171A7CD0A44C10), /* 17 - 32 */
-            (&buf[..48], 0, 0xD8D4D3590D136E11),      /* 33 - 64 */
-            (&buf[..48], PRIME64, 0x05441F2AEC2A1296), /* 33 - 64 */
-            (&buf[..80], 0, 0xA1DC8ADB3145B86A),      /* 65 - 96 */
-            (&buf[..80], PRIME64, 0xC9D55256965B7093), /* 65 - 96 */
-            (&buf[..112], 0, 0xE43E5717A61D3759),     /* 97 -128 */
-            (&buf[..112], PRIME64, 0x5A5F89A3FECE44A5), /* 97 -128 */
-            (&buf[..195], 0, 0x6F747739CBAC22A5),     /* 129-240 */
-            (&buf[..195], PRIME64, 0x33368E23C7F95810), /* 129-240 */
-            (&buf[..403], 0, 0x4834389B15D981E8),     /* one block, last stripe is overlapping */
-            (&buf[..403], PRIME64, 0x85CE5DFFC7B07C87), /* one block, last stripe is overlapping */
-            (&buf[..512], 0, 0x6A1B982631F059A8),     /* one block, finishing at stripe boundary */
-            (&buf[..512], PRIME64, 0x10086868CF0ADC99), /* one block, finishing at stripe boundary */
-            (&buf[..2048], 0, 0xEFEFD4449323CDD4),      /* 2 blocks, finishing at block boundary */
-            (&buf[..2048], PRIME64, 0x01C85E405ECA3F6E), /* 2 blocks, finishing at block boundary */
-            (&buf[..2240], 0, 0x998C0437486672C7),      /* 3 blocks, finishing at stripe boundary */
-            (&buf[..2240], PRIME64, 0x4ED38056B87ABC7F), /* 3 blocks, finishing at stripe boundary */
-            (&buf[..2243], 0, 0xA559D20581D742D3),       /* 3 blocks, last stripe is overlapping */
-            (&buf[..2243], PRIME64, 0x96E051AB57F21FC8), /* 3 blocks, last stripe is overlapping */
-        ];
-
-        for (buf, seed, result) in test_cases {
-            {
-                let hash = hash64_with_seed(buf, seed);
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-
-            // streaming API test
-
-            // single ingestio
-            {
-                let mut hasher = Hash64::with_seed(seed);
-                hasher.write(buf);
-                let hash = hasher.finish();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-
-            if buf.len() > 3 {
-                // 2 ingestions
-                let mut hasher = Hash64::with_seed(seed);
-                hasher.write(&buf[..3]);
-                hasher.write(&buf[3..]);
-                let hash = hasher.finish();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-
-            // byte by byte ingestion
-            {
-                let mut hasher = Hash64::with_seed(seed);
-
-                for chunk in buf.chunks(1) {
-                    hasher.write(chunk);
-                }
-
-                let hash = hasher.finish();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn hash_64bits_with_secret_sanity_check() {
-        let buf = sanity_buffer();
-        let secret = &buf[7..7 + SECRET_SIZE_MIN + 11];
-
-        let test_cases = vec![
-            (&[][..], secret, 0),                       /* zero-length hash is always 0 */
-            (&buf[..1], secret, 0x7F69735D618DB3F0),    /*  1 -  3 */
-            (&buf[..6], secret, 0xBFCC7CB1B3554DCE),    /*  6 -  8 */
-            (&buf[..12], secret, 0x8C50DC90AC9206FC),   /*  9 - 16 */
-            (&buf[..24], secret, 0x1CD2C2EE9B9A0928),   /* 17 - 32 */
-            (&buf[..48], secret, 0xA785256D9D65D514),   /* 33 - 64 */
-            (&buf[..80], secret, 0x6F3053360D21BBB7),   /* 65 - 96 */
-            (&buf[..112], secret, 0x560E82D25684154C),  /* 97 -128 */
-            (&buf[..195], secret, 0xBA5BDDBC5A767B11),  /* 129-240 */
-            (&buf[..403], secret, 0xFC3911BBA656DB58),  /* one block, last stripe is overlapping */
-            (&buf[..512], secret, 0x306137DD875741F1), /* one block, finishing at stripe boundary */
-            (&buf[..2048], secret, 0x2836B83880AD3C0C), /* > one block, at least one scrambling */
-            (&buf[..2243], secret, 0x3446E248A00CB44A), /* > one block, at least one scrambling, last stripe unaligned */
-        ];
-
-        for (buf, secret, result) in test_cases {
-            {
-                let hash = hash64_with_secret(buf, secret);
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    hash,
-                    result
-                );
-            }
-
-            // streaming API test
-
-            // single ingestio
-            {
-                let mut hasher = Hash64::with_secret(secret);
-                hasher.write(buf);
-                let hash = hasher.finish();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    hash,
-                    result
-                );
-            }
-
-            // byte by byte ingestion
-            {
-                let mut hasher = Hash64::with_secret(secret);
-
-                for chunk in buf.chunks(1) {
-                    hasher.write(chunk);
-                }
-
-                let hash = hasher.finish();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    hash,
-                    result
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn hash_128bits_sanity_check() {
-        let buf = sanity_buffer();
-
-        let test_cases = vec![
-            (&[][..], 0, 0u64, 0u64), /* zero-length hash is { seed, -seed } by default */
-            (&[][..], PRIME, 0, 0),
-            (&buf[..1], 0, 0x7198D737CFE7F386, 0x3EE70EA338F3F1E8), /* 1-3 */
-            (&buf[..1], PRIME, 0x8E05996EC27C0F46, 0x90DFC659A8BDCC0C), /* 1-3 */
-            (&buf[..6], 0, 0x22CBF5F3E1F6257C, 0xD4E6C2B94FFC3BFA), /* 4-8 */
-            (&buf[..6], PRIME, 0x97B28D3079F8541F, 0xEFC0B954298E6555), /* 4-8 */
-            (&buf[..12], 0, 0x0E0CD01F05AC2F0D, 0x2B55C95951070D4B), /* 9-16 */
-            (&buf[..12], PRIME, 0xA9DE561CA04CDF37, 0x609E31FDC00A43C9), /* 9-16 */
-            (&buf[..24], 0, 0x46796F3F78B20F6B, 0x58FF55C3926C13FA), /* 17-32 */
-            (&buf[..24], PRIME, 0x30D5C4E9EB415C55, 0x8868344B3A4645D0), /* 17-32 */
-            (&buf[..48], 0, 0xD8D4D3590D136E11, 0x5527A42843020A62), /* 33-64 */
-            (&buf[..48], PRIME, 0x1D8834E1A5407A1C, 0x44375B9FB060F541), /* 33-64 */
-            (&buf[..81], 0, 0x4B9B448ED8DFD3DD, 0xE805A6D1A43D70E5), /* 65-96 */
-            (&buf[..81], PRIME, 0xD2D6B075945617BA, 0xE58BE5736F6E7550), /* 65-96 */
-            (&buf[..103], 0, 0xC5A9F97B29EFA44E, 0x254DB7BE881E125C), /* 97-128 */
-            (&buf[..103], PRIME, 0xFA2086367CDB177F, 0x0AEDEA68C988B0C0), /* 97-128 */
-            (&buf[..192], 0, 0xC3142FDDD9102A3F, 0x06F1747E77185F97), /* 129-240 */
-            (&buf[..192], PRIME, 0xA89F07B35987540F, 0xCF1B35FB2C557F54), /* 129-240 */
-            (&buf[..222], 0, 0xA61AC4EB3295F86B, 0x33FA7B7598C28A07), /* 129-240 */
-            (&buf[..222], PRIME, 0x54135EB88AD8B75E, 0xBC45CE6AE50BCF53), /* 129-240 */
-            (&buf[..403], 0, 0xB0C48E6D18E9D084, 0xB16FC17E992FF45D), /* one block, last stripe is overlapping */
-            (&buf[..403], PRIME64, 0x0A1D320C9520871D, 0xCE11CB376EC93252), /* one block, last stripe is overlapping */
-            (&buf[..512], 0, 0xA03428558AC97327, 0x4ECF51281BA406F7), /* one block, finishing at stripe boundary */
-            (&buf[..512], PRIME64, 0xAF67A482D6C893F2, 0x1382D92F25B84D90), /* one block, finishing at stripe boundary */
-            (&buf[..2048], 0, 0x21901B416B3B9863, 0x212AF8E6326F01E0), /* two blocks, finishing at block boundary */
-            (&buf[..2048], PRIME, 0xBDBB2282577DADEC, 0xF78CDDC2C9A9A692), /* two blocks, finishing at block boundary */
-            (&buf[..2240], 0, 0x00AD52FA9385B6FE, 0xC705BAD3356CE302), /* two blocks, ends at stripe boundary */
-            (&buf[..2240], PRIME, 0x10FD0072EC68BFAA, 0xE1312F3458817F15), /* two blocks, ends at stripe boundary */
-            (&buf[..2237], 0, 0x970C91411533862C, 0x4BBD06FF7BFF0AB1), /* two blocks, ends at stripe boundary */
-            (&buf[..2237], PRIME, 0xD80282846D814431, 0x14EBB157B84D9785), /* two blocks, ends at stripe boundary */
-        ];
-
-        for (buf, seed, lo, hi) in test_cases {
-            let result = u128::from(lo) + (u128::from(hi) << 64);
-
-            {
-                let hash = hash128_with_seed(buf, seed);
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-
-            // streaming API test
-
-            // single ingestio
-            {
-                let mut hasher = Hash128::with_seed(seed);
-                hasher.write(buf);
-                let hash = hasher.finish_ext();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-
-            if buf.len() > 3 {
-                // 2 ingestions
-                let mut hasher = Hash128::with_seed(seed);
-                hasher.write(&buf[..3]);
-                hasher.write(&buf[3..]);
-                let hash = hasher.finish_ext();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-
-            // byte by byte ingestion
-            {
-                let mut hasher = Hash128::with_seed(seed);
-
-                for chunk in buf.chunks(1) {
-                    hasher.write(chunk);
-                }
-
-                let hash = hasher.finish_ext();
-
-                assert_eq!(
-                    hash,
-                    result,
-                    "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}",
-                    buf.len(),
-                    seed,
-                    hash,
-                    result
-                );
-            }
-        }
-    }
-}
diff --git a/renu/src/xxhash32.rs b/src/xxhash32.rs
similarity index 100%
rename from renu/src/xxhash32.rs
rename to src/xxhash32.rs
diff --git a/renu/src/xxhash3_64.rs b/src/xxhash3_64.rs
similarity index 100%
rename from renu/src/xxhash3_64.rs
rename to src/xxhash3_64.rs
diff --git a/renu/src/xxhash3_64/avx2.rs b/src/xxhash3_64/avx2.rs
similarity index 100%
rename from renu/src/xxhash3_64/avx2.rs
rename to src/xxhash3_64/avx2.rs
diff --git a/renu/src/xxhash3_64/neon.rs b/src/xxhash3_64/neon.rs
similarity index 100%
rename from renu/src/xxhash3_64/neon.rs
rename to src/xxhash3_64/neon.rs
diff --git a/renu/src/xxhash3_64/scalar.rs b/src/xxhash3_64/scalar.rs
similarity index 100%
rename from renu/src/xxhash3_64/scalar.rs
rename to src/xxhash3_64/scalar.rs
diff --git a/renu/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs
similarity index 100%
rename from renu/src/xxhash3_64/secret.rs
rename to src/xxhash3_64/secret.rs
diff --git a/renu/src/xxhash3_64/sse2.rs b/src/xxhash3_64/sse2.rs
similarity index 100%
rename from renu/src/xxhash3_64/sse2.rs
rename to src/xxhash3_64/sse2.rs
diff --git a/renu/src/xxhash64.rs b/src/xxhash64.rs
similarity index 100%
rename from renu/src/xxhash64.rs
rename to src/xxhash64.rs
diff --git a/renu/renu-sum/.gitignore b/twox-hash-sum/.gitignore
similarity index 100%
rename from renu/renu-sum/.gitignore
rename to twox-hash-sum/.gitignore
diff --git a/renu/renu-sum/Cargo.toml b/twox-hash-sum/Cargo.toml
similarity index 79%
rename from renu/renu-sum/Cargo.toml
rename to twox-hash-sum/Cargo.toml
index 9de3c49a9..a175a8119 100644
--- a/renu/renu-sum/Cargo.toml
+++ b/twox-hash-sum/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "renu-sum"
+name = "twox-hash-sum"
 version = "0.1.0"
 edition = "2021"
 
diff --git a/renu/renu-sum/src/main.rs b/twox-hash-sum/src/main.rs
similarity index 100%
rename from renu/renu-sum/src/main.rs
rename to twox-hash-sum/src/main.rs
diff --git a/renu/xx_hash-sys/.gitignore b/xx_hash-sys/.gitignore
similarity index 100%
rename from renu/xx_hash-sys/.gitignore
rename to xx_hash-sys/.gitignore
diff --git a/renu/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml
similarity index 100%
rename from renu/xx_hash-sys/Cargo.toml
rename to xx_hash-sys/Cargo.toml
diff --git a/renu/xx_hash-sys/build.rs b/xx_hash-sys/build.rs
similarity index 100%
rename from renu/xx_hash-sys/build.rs
rename to xx_hash-sys/build.rs
diff --git a/renu/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs
similarity index 100%
rename from renu/xx_hash-sys/src/lib.rs
rename to xx_hash-sys/src/lib.rs
diff --git a/renu/xx_hash-sys/xxHash b/xx_hash-sys/xxHash
similarity index 100%
rename from renu/xx_hash-sys/xxHash
rename to xx_hash-sys/xxHash

From 19b73deaf2ee7061a28240d0e2574e314bbc58c8 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 9 Oct 2024 14:12:53 -0400
Subject: [PATCH 150/166] Restore tests to working condition when serialization
 is enabled

Primarily, serde_json adds some inference failures. Some traits
were missing and some other warnings had popped up.
---
 src/xxhash32.rs   | 12 ++++++++----
 src/xxhash3_64.rs | 14 ++++++++------
 src/xxhash64.rs   | 12 ++++++++----
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 097a4c129..67dca2cc5 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -389,6 +389,8 @@ mod test {
 
     use super::*;
 
+    const EMPTY_BYTES: [u8; 0] = [];
+
     #[test]
     fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
         let bytes = [0; 32];
@@ -409,7 +411,7 @@ mod test {
     #[test]
     fn hash_of_nothing_matches_c_implementation() {
         let mut hasher = Hasher::with_seed(0);
-        hasher.write(&[]);
+        hasher.write(&EMPTY_BYTES);
         assert_eq!(hasher.finish(), 0x02cc_5d05);
     }
 
@@ -438,7 +440,7 @@ mod test {
     #[test]
     fn hash_with_different_seed_matches_c_implementation() {
         let mut hasher = Hasher::with_seed(0x42c9_1977);
-        hasher.write(&[]);
+        hasher.write(&EMPTY_BYTES);
         assert_eq!(hasher.finish(), 0xd6bf_8459);
     }
 
@@ -628,6 +630,8 @@ mod serialize_impl {
 
     #[cfg(test)]
     mod test {
+        use std::hash::Hasher as _;
+
         use super::*;
 
         type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
@@ -636,7 +640,7 @@ mod serialize_impl {
         fn test_serialization_cycle() -> Result {
             let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
-            hasher.finish();
+            let _ = hasher.finish();
 
             let serialized = serde_json::to_string(&hasher)?;
             let unserialized: Hasher = serde_json::from_str(&serialized)?;
@@ -648,7 +652,7 @@ mod serialize_impl {
         fn test_serialization_stability() -> Result {
             let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
-            hasher.finish();
+            let _ = hasher.finish();
 
             let expected_serialized = r#"{
                 "total_len": 14,
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 70c84d337..5ca1a7e22 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -1327,6 +1327,8 @@ mod test {
 
     use super::*;
 
+    const EMPTY_BYTES: [u8; 0] = [];
+
     #[test]
     fn default_secret_is_valid() {
         assert!(DEFAULT_SECRET.is_valid())
@@ -1377,13 +1379,13 @@ mod test {
 
     #[test]
     fn oneshot_empty() {
-        let hash = Hasher::oneshot(&[]);
+        let hash = Hasher::oneshot(&EMPTY_BYTES);
         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
     }
 
     #[test]
     fn streaming_empty() {
-        let hash = hash_byte_by_byte(&[]);
+        let hash = hash_byte_by_byte(&EMPTY_BYTES);
         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
     }
 
@@ -1614,7 +1616,7 @@ mod test {
 
         let (a, b) = x.bp_as_chunks::<1>();
         assert_eq!(a, &[[1], [2], [3], [4], [5]]);
-        assert_eq!(b, &[]);
+        assert_eq!(b, &[] as &[i32]);
 
         let (a, b) = x.bp_as_chunks::<2>();
         assert_eq!(a, &[[1, 2], [3, 4]]);
@@ -1630,7 +1632,7 @@ mod test {
 
         let (a, b) = x.bp_as_chunks::<5>();
         assert_eq!(a, &[[1, 2, 3, 4, 5]]);
-        assert_eq!(b, &[]);
+        assert_eq!(b, &[] as &[i32]);
 
         let (a, b) = x.bp_as_chunks::<6>();
         assert_eq!(a, &[] as &[[i32; 6]]);
@@ -1642,7 +1644,7 @@ mod test {
         let x = [1, 2, 3, 4, 5];
 
         let (a, b) = x.bp_as_rchunks::<1>();
-        assert_eq!(a, &[]);
+        assert_eq!(a, &[] as &[i32]);
         assert_eq!(b, &[[1], [2], [3], [4], [5]]);
 
         let (a, b) = x.bp_as_rchunks::<2>();
@@ -1658,7 +1660,7 @@ mod test {
         assert_eq!(b, &[[2, 3, 4, 5]]);
 
         let (a, b) = x.bp_as_rchunks::<5>();
-        assert_eq!(a, &[]);
+        assert_eq!(a, &[] as &[i32]);
         assert_eq!(b, &[[1, 2, 3, 4, 5]]);
 
         let (a, b) = x.bp_as_rchunks::<6>();
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index f488e429f..58569cf84 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -400,6 +400,8 @@ mod test {
 
     use super::*;
 
+    const EMPTY_BYTES: [u8; 0] = [];
+
     #[test]
     fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
         let bytes = [0x9c; 32];
@@ -420,7 +422,7 @@ mod test {
     #[test]
     fn hash_of_nothing_matches_c_implementation() {
         let mut hasher = Hasher::with_seed(0);
-        hasher.write(&[]);
+        hasher.write(&EMPTY_BYTES);
         assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
     }
 
@@ -449,7 +451,7 @@ mod test {
     #[test]
     fn hash_with_different_seed_matches_c_implementation() {
         let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
-        hasher.write(&[]);
+        hasher.write(&EMPTY_BYTES);
         assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
     }
 
@@ -617,6 +619,8 @@ mod serialize_impl {
 
     #[cfg(test)]
     mod test {
+        use std::hash::Hasher as _;
+
         use super::*;
 
         type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
@@ -625,7 +629,7 @@ mod serialize_impl {
         fn test_serialization_cycle() -> Result {
             let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
-            hasher.finish();
+            let _ = hasher.finish();
 
             let serialized = serde_json::to_string(&hasher)?;
             let unserialized: Hasher = serde_json::from_str(&serialized)?;
@@ -637,7 +641,7 @@ mod serialize_impl {
         fn test_serialization_stability() -> Result {
             let mut hasher = Hasher::with_seed(0);
             hasher.write(b"Hello, world!\0");
-            hasher.finish();
+            let _ = hasher.finish();
 
             let expected_serialized = r#"{
                 "total_len": 14,

From f480537a50aa4f07f74fea6e95e9c9926cd23d6b Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 9 Oct 2024 14:29:27 -0400
Subject: [PATCH 151/166] Move Box-specific trait impls behind the feature gate

---
 src/xxhash3_64.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 5ca1a7e22..c4a351b1c 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -182,12 +182,6 @@ unsafe impl<const N: usize> FixedBuffer for &mut [u8; N] {}
 // Safety: An array will never change size.
 unsafe impl<const N: usize> FixedMutBuffer for &mut [u8; N] {}
 
-// Safety: A plain slice will never change size.
-unsafe impl FixedBuffer for Box<[u8]> {}
-
-// Safety: A plain slice will never change size.
-unsafe impl FixedMutBuffer for Box<[u8]> {}
-
 /// Holds secret and temporary buffers that are ensured to be
 /// appropriately sized.
 pub struct SecretBuffer<S> {
@@ -273,6 +267,12 @@ mod with_alloc {
 
     use super::*;
 
+    // Safety: A plain slice will never change size.
+    unsafe impl FixedBuffer for Box<[u8]> {}
+
+    // Safety: A plain slice will never change size.
+    unsafe impl FixedMutBuffer for Box<[u8]> {}
+
     impl Hasher {
         /// Constructs the hasher using the default seed and secret values.
         pub fn new() -> Self {

From c0b5ca550cdc38c24063cfdbe297c05cc7b87742 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 9 Oct 2024 14:36:03 -0400
Subject: [PATCH 152/166] Don't warn when we use one of the implementation
 forcing cfgs

---
 src/xxhash3_64.rs | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index c4a351b1c..654ad0422 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -616,26 +616,30 @@ macro_rules! dispatch {
         #[cfg(_internal_xxhash3_force_scalar)]
         return do_scalar($($arg_name),*);
 
-        #[cfg(all(target_arch = "aarch64", feature = "std"))]
+        // This code can be unreachable if one of the `*_force_*` cfgs
+        // are set above, but that's the point.
+        #[allow(unreachable_code)]
         {
-            if std::arch::is_aarch64_feature_detected!("neon") {
-                // Safety: We just ensured we have the NEON feature
-                return unsafe { do_neon($($arg_name),*) };
+            #[cfg(all(target_arch = "aarch64", feature = "std"))]
+            {
+                if std::arch::is_aarch64_feature_detected!("neon") {
+                    // Safety: We just ensured we have the NEON feature
+                    return unsafe { do_neon($($arg_name),*) };
+                }
             }
-        }
 
-        #[cfg(all(target_arch = "x86_64", feature = "std"))]
-        {
-            if is_x86_feature_detected!("avx2") {
-                // Safety: We just ensured we have the AVX2 feature
-                return unsafe { do_avx2($($arg_name),*) };
-            } else if is_x86_feature_detected!("sse2") {
-                // Safety: We just ensured we have the SSE2 feature
-                return unsafe { do_sse2($($arg_name),*) };
+            #[cfg(all(target_arch = "x86_64", feature = "std"))]
+            {
+                if is_x86_feature_detected!("avx2") {
+                    // Safety: We just ensured we have the AVX2 feature
+                    return unsafe { do_avx2($($arg_name),*) };
+                } else if is_x86_feature_detected!("sse2") {
+                    // Safety: We just ensured we have the SSE2 feature
+                    return unsafe { do_sse2($($arg_name),*) };
+                }
             }
+            do_scalar($($arg_name),*)
         }
-
-        do_scalar($($arg_name),*)
     };
 }
 

From 98e3aa6ff7d31ec1fc4a0136d6e0cbb5eab7689f Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 9 Oct 2024 13:51:44 -0400
Subject: [PATCH 153/166] Update CI configuration

---
 .github/workflows/ci.yml | 124 ++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 66 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 566f1e2c2..cea7e7f56 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,106 +2,98 @@ on: push
 
 name: Continuous integration
 
+env:
+  RUSTFLAGS: -D warnings
+  RUSTDOCFLAGS: -D warnings
+
 jobs:
   library:
-    runs-on: ubuntu-latest
     strategy:
       matrix:
+        platform:
+          - ubuntu-latest
+
         rust:
           - stable
           - beta
           - nightly
-          - 1.37.0  # MSRV
+          - 1.81.0  # MSRV
 
-    steps:
-      - uses: actions/checkout@v2
+        include:
+          - platform: macos-latest # This serves as our aarch64 / arm64 runner
+            rust: stable
 
-      - uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: ${{ matrix.rust }}
-          override: true
-          components: rustfmt, clippy
+          - platform: windows-latest
+            rust: stable
 
-      - uses: actions-rs/cargo@v1
-        with:
-          command: build
+    runs-on: ${{ matrix.platform }}
 
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
+    steps:
+      - uses: actions/checkout@v4
 
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --all-features
+      - run: git submodule update --init --recursive
 
-      - uses: actions-rs/cargo@v1
+      - uses: dtolnay/rust-toolchain@master
         with:
-          command: fmt
-          args: --all -- --check
-        if: ${{ matrix.rust == 'stable' }}
+          toolchain: ${{ matrix.rust }}
 
-      - uses: actions-rs/cargo@v1
-        with:
-          command: clippy
-          args: --all-features -- -D warnings
-        if: ${{ matrix.rust == 'stable' }}
+      - name: Unit Tests
+        run: cargo test --all-features
 
-  no-std:
+      - name: Property Tests
+        run: cargo test -p comparison --all-features
+
+  miri:
     runs-on: ubuntu-latest
+    env:
+      MIRIFLAGS: --cfg _internal_xxhash3_force_scalar
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: actions-rs/toolchain@v1
+      - uses: dtolnay/rust-toolchain@master
         with:
-          profile: minimal
-          toolchain: stable
-          target: thumbv6m-none-eabi
-          override: true
+          toolchain: nightly
+          components: miri
 
-      - uses: actions-rs/cargo@v1
-        with:
-          command: build
-          args: --no-default-features --target thumbv6m-none-eabi --lib
+      - name: Unsafe Code
+        run: cargo miri test --all-features
 
-  compatibility-tests:
+      - name: Big Endian Platform
+        run: cargo miri test --all-features --target s390x-unknown-linux-gnu
+
+  lints:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        test:
-          - digest_0_8
-          - digest_0_9
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
+
+      - run: git submodule update --init --recursive
 
-      - uses: actions-rs/toolchain@v1
+      - uses: dtolnay/rust-toolchain@master
         with:
-          profile: minimal
           toolchain: stable
-          override: true
+          components: rustfmt, clippy
 
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --manifest-path "compatibility-tests/${{ matrix.test }}/Cargo.toml"
+      - run: cargo fmt --all
+
+      - run: cargo clippy --all --all-targets --all-features
 
-  big_endian:
+      - run: cargo doc --all-features
+
+  no-std:
     runs-on: ubuntu-latest
+
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
+      - uses: dtolnay/rust-toolchain@master
         with:
-          platforms: s390x
+          toolchain: stable
+          targets: thumbv6m-none-eabi
 
-      - name: Cross test
-        uses: actions-rs/cargo@v1
-        with:
-          use-cross: true
-          command: test
-          args: --target s390x-unknown-linux-gnu
+      - run: >
+          cargo build
+          --no-default-features
+          --features=xxhash32,xxhash64,xxhash3_64
+          --target thumbv6m-none-eabi

From b4ee2a5aea050ce3b45602ff0a902e6487a03698 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 16 Oct 2024 13:22:19 -0400
Subject: [PATCH 154/166] Ignore dead-code warnings for our integer conversion
 traits

These are tiny and are used in a fun mix of the algorithms. Nothing
much will be lost if we accidentally stop using them.
---
 src/lib.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index 2ee51fb45..970d8d8fb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -99,6 +99,7 @@ pub mod xxhash3_64;
 #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
 pub use xxhash3_64::Hasher as XxHash3_64;
 
+#[allow(dead_code, reason = "Too lazy to cfg-gate these")]
 trait IntoU32 {
     fn into_u32(self) -> u32;
 }
@@ -109,6 +110,7 @@ impl IntoU32 for u8 {
     }
 }
 
+#[allow(dead_code, reason = "Too lazy to cfg-gate these")]
 trait IntoU64 {
     fn into_u64(self) -> u64;
 }
@@ -132,6 +134,7 @@ impl IntoU64 for usize {
     }
 }
 
+#[allow(dead_code, reason = "Too lazy to cfg-gate these")]
 trait IntoU128 {
     fn into_u128(self) -> u128;
 }

From cfab4eb3516ab69cdefe6d825cedb5a2cfa923e0 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 08:55:07 -0400
Subject: [PATCH 155/166] Place SIMD code behind the `std` feature

We can only detect enabled features via the standard library's
macros (for now), so gate them to avoid warnings.
---
 src/lib.rs             | 2 +-
 src/xxhash3_64.rs      | 6 +++---
 src/xxhash3_64/avx2.rs | 1 +
 src/xxhash3_64/neon.rs | 1 +
 src/xxhash3_64/sse2.rs | 1 +
 5 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 970d8d8fb..832950509 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -69,7 +69,7 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
 
-#[cfg(feature = "alloc")]
+#[cfg(all(feature = "alloc", feature = "xxhash3_64"))]
 extern crate alloc;
 
 #[cfg(any(feature = "std", doc, test))]
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 654ad0422..7bd227ed7 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -559,7 +559,7 @@ macro_rules! dispatch {
         /// You must ensure that the CPU has the NEON feature
         #[inline]
         #[target_feature(enable = "neon")]
-        #[cfg(target_arch = "aarch64")]
+        #[cfg(all(target_arch = "aarch64", feature = "std"))]
         unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
         where
             $($wheres)*
@@ -575,7 +575,7 @@ macro_rules! dispatch {
         /// You must ensure that the CPU has the AVX2 feature
         #[inline]
         #[target_feature(enable = "avx2")]
-        #[cfg(target_arch = "x86_64")]
+        #[cfg(all(target_arch = "x86_64", feature = "std"))]
         unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
         where
             $($wheres)*
@@ -591,7 +591,7 @@ macro_rules! dispatch {
         /// You must ensure that the CPU has the SSE2 feature
         #[inline]
         #[target_feature(enable = "sse2")]
-        #[cfg(target_arch = "x86_64")]
+        #[cfg(all(target_arch = "x86_64", feature = "std"))]
         unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
         where
             $($wheres)*
diff --git a/src/xxhash3_64/avx2.rs b/src/xxhash3_64/avx2.rs
index 8cfb54f15..752d7aa77 100644
--- a/src/xxhash3_64/avx2.rs
+++ b/src/xxhash3_64/avx2.rs
@@ -10,6 +10,7 @@ impl Impl {
     ///
     /// You must ensure that the CPU has the AVX2 feature
     #[inline]
+    #[cfg(feature = "std")]
     pub unsafe fn new_unchecked() -> Impl {
         Impl(())
     }
diff --git a/src/xxhash3_64/neon.rs b/src/xxhash3_64/neon.rs
index 372bca749..f86da1522 100644
--- a/src/xxhash3_64/neon.rs
+++ b/src/xxhash3_64/neon.rs
@@ -10,6 +10,7 @@ impl Impl {
     ///
     /// You must ensure that the CPU has the NEON feature
     #[inline]
+    #[cfg(feature = "std")]
     pub unsafe fn new_unchecked() -> Self {
         Self(())
     }
diff --git a/src/xxhash3_64/sse2.rs b/src/xxhash3_64/sse2.rs
index 0290038e4..29a9c2ae9 100644
--- a/src/xxhash3_64/sse2.rs
+++ b/src/xxhash3_64/sse2.rs
@@ -10,6 +10,7 @@ impl Impl {
     ///
     /// You must ensure that the CPU has the SSE2 feature
     #[inline]
+    #[cfg(feature = "std")]
     pub unsafe fn new_unchecked() -> Impl {
         Impl(())
     }

From 36975a07379f1f63c6a9aedd67befa0e5c443d4e Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 16 Oct 2024 16:14:56 -0400
Subject: [PATCH 156/166] Implement `Clone` for the hashers and states

---
 src/xxhash32.rs   | 22 ++++++++++++++++++----
 src/xxhash3_64.rs |  8 ++++++++
 src/xxhash64.rs   | 21 +++++++++++++++++----
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/src/xxhash32.rs b/src/xxhash32.rs
index 67dca2cc5..42ff7a19b 100644
--- a/src/xxhash32.rs
+++ b/src/xxhash32.rs
@@ -21,7 +21,7 @@ type Bytes = [u8; 16];
 
 const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
 
-#[derive(PartialEq)]
+#[derive(Clone, PartialEq)]
 struct BufferData(Lanes);
 
 impl BufferData {
@@ -48,7 +48,7 @@ impl fmt::Debug for BufferData {
     }
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 struct Buffer {
     offset: usize,
     data: BufferData,
@@ -126,7 +126,7 @@ impl Buffer {
     }
 }
 
-#[derive(PartialEq)]
+#[derive(Clone, PartialEq)]
 struct Accumulators(Lanes);
 
 impl Accumulators {
@@ -199,7 +199,7 @@ impl fmt::Debug for Accumulators {
 /// Although this struct implements [`hash::Hasher`][], it only calculates a
 /// 32-bit number, leaving the upper bits as 0. This means it is
 /// unlikely to be correct to use this in places like a [`HashMap`][std::collections::HashMap].
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct Hasher {
     seed: u32,
     accumulators: Accumulators,
@@ -362,6 +362,7 @@ const fn round(mut acc: u32, lane: u32) -> u32 {
 
 /// Constructs [`Hasher`][] for multiple hasher instances. See
 /// the [usage warning][Hasher#caution].
+#[derive(Clone)]
 pub struct State(u32);
 
 impl State {
@@ -389,6 +390,12 @@ mod test {
 
     use super::*;
 
+    const _TRAITS: () = {
+        const fn is_clone<T: Clone>() {}
+        is_clone::<Hasher>();
+        is_clone::<State>();
+    };
+
     const EMPTY_BYTES: [u8; 0] = [];
 
     #[test]
@@ -505,6 +512,7 @@ mod random_impl {
 
     /// Constructs a randomized seed and reuses it for multiple hasher
     /// instances. See the [usage warning][Hasher#caution].
+    #[derive(Clone)]
     pub struct RandomState(State);
 
     impl Default for RandomState {
@@ -533,6 +541,12 @@ mod random_impl {
 
         use super::*;
 
+        const _: () = {
+            const fn is_clone<T: Clone>() {}
+            is_clone::<Hasher>();
+            is_clone::<RandomState>();
+        };
+
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
             let mut hash: HashMap<_, _, RandomState> = Default::default();
diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 7bd227ed7..390c0ec65 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -68,6 +68,7 @@ const DEFAULT_SECRET_RAW: DefaultSecret = [
 const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) };
 
 /// Calculates the 64-bit hash.
+#[derive(Clone)]
 pub struct Hasher {
     #[cfg(feature = "alloc")]
     inner: with_alloc::AllocRawHasher,
@@ -184,6 +185,7 @@ unsafe impl<const N: usize> FixedMutBuffer for &mut [u8; N] {}
 
 /// Holds secret and temporary buffers that are ensured to be
 /// appropriately sized.
+#[derive(Clone)]
 pub struct SecretBuffer<S> {
     seed: u64,
     secret: S,
@@ -516,6 +518,7 @@ impl StripeAccumulator {
 /// usages may desire more flexibility. This type, combined with
 /// [`SecretBuffer`][], offer that flexibility at the cost of a
 /// generic type.
+#[derive(Clone)]
 pub struct RawHasher<S> {
     secret_buffer: SecretBuffer<S>,
     buffer_usage: usize,
@@ -1331,6 +1334,11 @@ mod test {
 
     use super::*;
 
+    const _: () = {
+        const fn is_clone<T: Clone>() {}
+        is_clone::<Hasher>();
+    };
+
     const EMPTY_BYTES: [u8; 0] = [];
 
     #[test]
diff --git a/src/xxhash64.rs b/src/xxhash64.rs
index 58569cf84..d72d57dd8 100644
--- a/src/xxhash64.rs
+++ b/src/xxhash64.rs
@@ -21,7 +21,7 @@ type Bytes = [u8; 32];
 
 const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
 
-#[derive(PartialEq)]
+#[derive(Clone, PartialEq)]
 struct BufferData(Lanes);
 
 impl BufferData {
@@ -48,7 +48,7 @@ impl fmt::Debug for BufferData {
     }
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 struct Buffer {
     offset: usize,
     data: BufferData,
@@ -126,7 +126,7 @@ impl Buffer {
     }
 }
 
-#[derive(PartialEq)]
+#[derive(Clone, PartialEq)]
 struct Accumulators(Lanes);
 
 impl Accumulators {
@@ -210,7 +210,7 @@ impl fmt::Debug for Accumulators {
 }
 
 /// Calculates the 64-bit hash.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct Hasher {
     seed: u64,
     accumulators: Accumulators,
@@ -373,6 +373,7 @@ const fn round(mut acc: u64, lane: u64) -> u64 {
 }
 
 /// Constructs [`Hasher`][] for multiple hasher instances.
+#[derive(Clone)]
 pub struct State(u64);
 
 impl State {
@@ -400,6 +401,12 @@ mod test {
 
     use super::*;
 
+    const _TRAITS: () = {
+        const fn is_clone<T: Clone>() {}
+        is_clone::<Hasher>();
+        is_clone::<State>();
+    };
+
     const EMPTY_BYTES: [u8; 0] = [];
 
     #[test]
@@ -494,6 +501,7 @@ mod random_impl {
 
     /// Constructs a randomized seed and reuses it for multiple hasher
     /// instances.
+    #[derive(Clone)]
     pub struct RandomState(State);
 
     impl Default for RandomState {
@@ -522,6 +530,11 @@ mod random_impl {
 
         use super::*;
 
+        const _TRAITS: () = {
+            const fn is_clone<T: Clone>() {}
+            is_clone::<RandomState>();
+        };
+
         #[test]
         fn can_be_used_in_a_hashmap_with_a_random_seed() {
             let mut hash: HashMap<_, _, RandomState> = Default::default();

From e039f1314a22602013ef0f86900ca9c77a92810a Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Wed, 16 Oct 2024 17:32:43 -0400
Subject: [PATCH 157/166] CI: features

---
 .github/workflows/ci.yml | 45 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cea7e7f56..59b165acd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -97,3 +97,48 @@ jobs:
           --no-default-features
           --features=xxhash32,xxhash64,xxhash3_64
           --target thumbv6m-none-eabi
+
+  features:
+    runs-on: ubuntu-latest
+
+    env:
+      IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64
+      FEATURE_SET: random serialize std alloc
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - run: git submodule update --init --recursive
+
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: stable
+
+      - name: Compute Powerset
+        shell: "ruby {0}"
+        run: |
+          features = ENV['FEATURE_SET']
+            .split(' ')
+            .reduce([[]]) { |ps, i| ps + ps.map { |e| e + [i] } }
+            .map { |s| s.join(',') }
+            .join(" ")
+
+          File.open(ENV['GITHUB_ENV'], 'a') { |f| f.write("FEATURES=#{features}") }
+
+      - name: Check implementations with features
+        run: |
+          for impl in ${IMPLEMENTATIONS}; do
+              echo "::group::Implementation ${impl}"
+
+              # Check the implementation by itself
+              cargo check --no-default-features --features="${impl}"
+
+              # And with extra features
+              for feature in ${FEATURES}; do
+                  echo "::group::Features ${feature}"
+                  cargo check --no-default-features --features="${impl},${feature}"
+                  echo "::endgroup::"
+              done
+
+              echo ::endgroup::
+          done

From 0d1105f0c36d7529410382aff8defd46bb3e2fb1 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 12:33:04 -0400
Subject: [PATCH 158/166] Pin to xxHash 0.8.2

---
 xx_hash-sys/xxHash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xx_hash-sys/xxHash b/xx_hash-sys/xxHash
index 805c00b68..bbb27a5ef 160000
--- a/xx_hash-sys/xxHash
+++ b/xx_hash-sys/xxHash
@@ -1 +1 @@
-Subproject commit 805c00b68fa754200ada0c207ffeaa7a4409377c
+Subproject commit bbb27a5efb85b92a0486cf361a8635715a53f6ba

From 28f0d836b78a00b1e1804d45c5cb9d5d1300267a Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 12:59:41 -0400
Subject: [PATCH 159/166] Unify the README and crate documentation

---
 README.md  | 140 ++++++++++++++++++++++++++---------------------------
 src/lib.rs |  66 +------------------------
 2 files changed, 69 insertions(+), 137 deletions(-)

diff --git a/README.md b/README.md
index d8656f327..7d0b96f33 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,59 @@
-# TwoX-Hash
+A Rust implementation of the [xxHash] algorithm.
 
-A Rust implementation of the [XXHash] algorithm.
+[![Crates.io][crates-badge]][crates-url]
+[![Documentation][docs-badge]][docs-url]
+[![Build Status][actions-badge]][actions-url]
 
-[![Build Status](https://travis-ci.org/shepmaster/twox-hash.svg)](https://travis-ci.org/shepmaster/twox-hash) [![Current Version](https://img.shields.io/crates/v/twox-hash.svg)](https://crates.io/crates/twox-hash)
+[xxHash]: https://github.com/Cyan4973/xxHash
 
-[Documentation](https://docs.rs/twox-hash/)
+[crates-badge]: https://img.shields.io/crates/v/twox-hash.svg
+[crates-url]: https://crates.io/crates/twox-hash
+[docs-badge]: https://img.shields.io/docsrs/twox-hash
+[docs-url]: https://docs.rs/twox-hash/
+[actions-badge]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml/badge.svg?branch=main
+[actions-url]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml?query=branch%3Amain
 
-[XXHash]: https://github.com/Cyan4973/xxHash
+# Examples
 
-## Examples
+These examples use [`XxHash64`](XxHash64) but the same ideas can be
+used for [`XxHash32`](XxHash32) or [`XxHash3_64`](XxHash3_64).
 
-### With a fixed seed
+## Hashing arbitrary data
+
+### When all the data is available at once
 
 ```rust
-use std::hash::BuildHasherDefault;
-use std::collections::HashMap;
 use twox_hash::XxHash64;
 
-let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+let seed = 1234;
+let hash = XxHash64::oneshot(seed, b"some bytes");
+assert_eq!(0xeab5_5659_a496_d78b, hash);
+```
+
+### When the data is streaming
+
+```rust
+use std::hash::Hasher as _;
+use twox_hash::XxHash64;
+
+let seed = 1234;
+let mut hasher = XxHash64::with_seed(seed);
+hasher.write(b"some");
+hasher.write(b" ");
+hasher.write(b"bytes");
+let hash = hasher.finish();
+assert_eq!(0xeab5_5659_a496_d78b, hash);
+```
+
+## In a [`HashMap`](std::collections::HashMap)
+
+### With a default seed
+
+```rust
+use std::{collections::HashMap, hash::BuildHasherDefault};
+use twox_hash::XxHash64;
+
+let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
 hash.insert(42, "the answer");
 assert_eq!(hash.get(&42), Some(&"the answer"));
 ```
@@ -26,73 +62,33 @@ assert_eq!(hash.get(&42), Some(&"the answer"));
 
 ```rust
 use std::collections::HashMap;
-use twox_hash::RandomXxHashBuilder64;
+use twox_hash::xxhash64;
+
+let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
+hash.insert(42, "the answer");
+assert_eq!(hash.get(&42), Some(&"the answer"));
+```
+
+### With a fixed seed
+
+```rust
+use std::collections::HashMap;
+use twox_hash::xxhash64;
 
-let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
+let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
 hash.insert(42, "the answer");
 assert_eq!(hash.get(&42), Some(&"the answer"));
 ```
 
-## Benchmarks
-
-### 64-bit
-
-|   Bytes | SipHasher (MB/s) | XXHash (MB/s) | Ratio |
-|---------|------------------|---------------|-------|
-|       1 |               52 |            38 |   73% |
-|       4 |              210 |           148 |   70% |
-|      16 |              615 |           615 |  100% |
-|      32 |              914 |          1391 |  152% |
-|     128 |             1347 |          3657 |  271% |
-|     256 |             1414 |          5019 |  355% |
-|     512 |             1546 |          6168 |  399% |
-|    1024 |             1565 |          6206 |  397% |
-| 1048576 |             1592 |          7564 |  475% |
-
-|   Bytes | [FnvHasher][fnv] (MB/s) | XXHash (MB/s) | Ratio |
-|---------|-------------------------|---------------|-------|
-|       1 |                    1000 |            38 |    4% |
-|       4 |                     800 |           148 |   19% |
-|      16 |                     761 |           615 |   81% |
-|      32 |                     761 |          1391 |  183% |
-|     128 |                     727 |          3657 |  503% |
-|     256 |                     759 |          5019 |  661% |
-|     512 |                     745 |          6168 |  828% |
-|    1024 |                     741 |          6206 |  838% |
-| 1048576 |                     745 |          7564 | 1015% |
-
-### 32-bit
-
-|   Bytes | SipHasher (MB/s) | XXHash32 (MB/s) | Ratio |
-|---------|------------------|-----------------|-------|
-|       1 |               52 |              55 |  106% |
-|       4 |              210 |             210 |  100% |
-|      16 |              615 |            1230 |  200% |
-|      32 |              914 |            1882 |  206% |
-|     128 |             1347 |            3282 |  244% |
-|     256 |             1414 |            3459 |  245% |
-|     512 |             1546 |            3792 |  245% |
-|    1024 |             1565 |            3938 |  252% |
-| 1048576 |             1592 |            4127 |  259% |
-
-|   Bytes | [FnvHasher][fnv] (MB/s) | XXHash32 (MB/s) | Ratio |
-|---------|-------------------------|-----------------|-------|
-|       1 |                    1000 |              55 |    6% |
-|       4 |                     800 |             210 |   26% |
-|      16 |                     761 |            1230 |  162% |
-|      32 |                     761 |            1882 |  247% |
-|     128 |                     727 |            3282 |  451% |
-|     256 |                     759 |            3459 |  456% |
-|     512 |                     745 |            3792 |  509% |
-|    1024 |                     741 |            3938 |  531% |
-| 1048576 |                     745 |            4127 |  554% |
-
-
-[fnv]: https://github.com/servo/rust-fnv
-
-## Contributing
-
-1. Fork it ( https://github.com/shepmaster/twox-hash/fork )
+# Benchmarks
+
+See benchmarks in the [comparison][] README.
+
+[comparison]: https://github.com/shepmaster/twox-hash/tree/main/comparison
+
+# Contributing
+
+1. Fork it (<https://github.com/shepmaster/twox-hash/fork>)
 2. Create your feature branch (`git checkout -b my-new-feature`)
 3. Add a failing test.
 4. Add code to pass the test.
diff --git a/src/lib.rs b/src/lib.rs
index 832950509..156cafd29 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,68 +1,4 @@
-//! A Rust implementation of the [XXHash][] algorithm.
-//!
-//! [XXHash]: https://github.com/Cyan4973/xxHash
-//!
-//! ## Hashing arbitrary data
-//!
-//! ### When all the data is available at once
-//!
-//! ```rust
-//! use twox_hash::XxHash64;
-//!
-//! let seed = 1234;
-//! let hash = XxHash64::oneshot(seed, b"some bytes");
-//! assert_eq!(0xeab5_5659_a496_d78b, hash);
-//! ```
-//!
-//! ### When the data is streaming
-//!
-//! ```rust
-//! use std::hash::Hasher as _;
-//! use twox_hash::XxHash64;
-//!
-//! let seed = 1234;
-//! let mut hasher = XxHash64::with_seed(seed);
-//! hasher.write(b"some");
-//! hasher.write(b" ");
-//! hasher.write(b"bytes");
-//! let hash = hasher.finish();
-//! assert_eq!(0xeab5_5659_a496_d78b, hash);
-//! ```
-//!
-//! ## In a [`HashMap`](std::collections::HashMap)
-//!
-//! ### With a default seed
-//!
-//! ```rust
-//! use std::{collections::HashMap, hash::BuildHasherDefault};
-//! use twox_hash::XxHash64;
-//!
-//! let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
-//!
-//! ### With a random seed
-//!
-//! ```rust
-//! use std::collections::HashMap;
-//! use twox_hash::xxhash64;
-//!
-//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
-//!
-//! ### With a fixed seed
-//!
-//! ```rust
-//! use std::collections::HashMap;
-//! use twox_hash::xxhash64;
-//!
-//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
+#![doc = include_str!("../README.md")]
 
 #![deny(rust_2018_idioms)]
 #![deny(missing_docs)]

From 45f21b028f88801af5af9b198f0c14108e784300 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 13:12:25 -0400
Subject: [PATCH 160/166] Document the feature flags

---
 README.md | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 7d0b96f33..6939cf498 100644
--- a/README.md
+++ b/README.md
@@ -15,8 +15,8 @@ A Rust implementation of the [xxHash] algorithm.
 
 # Examples
 
-These examples use [`XxHash64`](XxHash64) but the same ideas can be
-used for [`XxHash32`](XxHash32) or [`XxHash3_64`](XxHash3_64).
+These examples use [`XxHash64`][] but the same ideas can be
+used for [`XxHash32`][] or [`XxHash3_64`][].
 
 ## Hashing arbitrary data
 
@@ -45,7 +45,7 @@ let hash = hasher.finish();
 assert_eq!(0xeab5_5659_a496_d78b, hash);
 ```
 
-## In a [`HashMap`](std::collections::HashMap)
+## In a [`HashMap`][]
 
 ### With a default seed
 
@@ -80,6 +80,18 @@ hash.insert(42, "the answer");
 assert_eq!(hash.get(&42), Some(&"the answer"));
 ```
 
+# Feature Flags
+
+| name       | description                                                                                             |
+|------------|---------------------------------------------------------------------------------------------------------|
+| xxhash32   | Include the [`XxHash32`][] algorithm                                                                    |
+| xxhash64   | Include the [`XxHash64`][] algorithm                                                                    |
+| xxhash3_64 | Include the [`XxHash3_64`][] algorithm                                                                  |
+| random     | Create random instances of the hashers                                                                  |
+| serialize  | Serialize and deserialize hasher state with Serde                                                       |
+| std        | Use the Rust standard library. Enable this if you want SIMD support in [`XxHash3_64`][]                 |
+| alloc      | Use the Rust allocator library. Enable this if you want to create [`XxHash3_64`][] with dynamic secrets |
+
 # Benchmarks
 
 See benchmarks in the [comparison][] README.
@@ -96,3 +108,9 @@ See benchmarks in the [comparison][] README.
 6. Ensure tests pass.
 7. Push to the branch (`git push origin my-new-feature`)
 8. Create a new Pull Request
+
+
+[`Hashmap`]: std::collections::HashMap
+[`XxHash32`]: crate::XxHash32
+[`XxHash64`]: crate::XxHash64
+[`XxHash3_64`]: crate::XxHash3_64

From 4c577a33d4957e79d6f5d9db38652562251edcc6 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 13:53:35 -0400
Subject: [PATCH 161/166] Test for minimal dependency versions

---
 .github/workflows/ci.yml | 30 ++++++++++++++++++++++++++++++
 Cargo.toml               |  2 ++
 2 files changed, 32 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 59b165acd..f7806408b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -142,3 +142,33 @@ jobs:
 
               echo ::endgroup::
           done
+
+  minimal-versions:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - run: git submodule update --init --recursive
+
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: 1.81.0  # MSRV
+
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: nightly
+
+      - name: Remove non-essential dependencies
+        run: |
+          # Remove workspace dependencies
+          sed -i '/\[workspace]/,/#END-\[workspace]/d' Cargo.toml
+
+          # Remove dev-dependencies
+          sed -i '/\[dev-dependencies]/,/#END-\[dev-dependencies]/d' Cargo.toml
+
+      - name: Downgrade to minimal dependencies
+        run: |
+          cargo +nightly -Z minimal-versions update
+
+      - run: cargo +1.81.0 build --all-features
diff --git a/Cargo.toml b/Cargo.toml
index 426cd074e..cf7483bfd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ members = [
     "twox-hash-sum",
     "xx_hash-sys",
 ]
+#END-[workspace]
 
 [features]
 default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
@@ -52,6 +53,7 @@ serde = { version = "1.0.0", optional = true, default-features = false, features
 
 [dev-dependencies]
 serde_json = "1.0.117"
+#END-[dev-dependencies]
 
 [package.metadata.docs.rs]
 all-features = true

From 9bd194326fa76a9cd5f2ec8d48526c88640428fb Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 14:19:38 -0400
Subject: [PATCH 162/166] Don't create empty ranges in proptests

---
 comparison/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs
index 7701a8314..2e1c04aa9 100644
--- a/comparison/src/lib.rs
+++ b/comparison/src/lib.rs
@@ -366,7 +366,7 @@ mod xxhash3_64 {
 fn vec_and_index() -> impl Strategy<Value = (Vec<u8>, usize)> {
     prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| {
         let len = vec.len();
-        (Just(vec), 0..len)
+        (Just(vec), 0..=len)
     })
 }
 

From 5ce7f4b071a7ad45dd1314320b6afbfe04302db9 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 14:21:01 -0400
Subject: [PATCH 163/166] Introduce a changelog

---
 CHANGELOG.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 000000000..6db2c9b23
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,85 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [2.0.0] - Unreleased
+
+[2.0.0]: https://github.com/shepmaster/twox-hash/tree/v2.0.0
+
+This release is a complete rewrite of the crate, including
+reorganization of the code. The XXH3 algorithm now matches the 0.8
+release of the reference C xxHash implementation.
+
+### Added
+
+- `XxHash32::oneshot` and `XxHash64::oneshot` can perform hashing with
+  zero allocation and generally improved performance. If you have code
+  that creates a hasher and hashes a slice of bytes exactly once, you
+  are strongly encouraged to use the new functions. This might look
+  like:
+
+  ```rust
+  // Before
+  let mut hasher = XxHash64::new(); // or XxHash32, or with seeds
+  some_bytes.hash(&mut hasher);
+  let hash = hasher.finish();
+
+  // After
+  let hash = XxHash64::oneshot(some_bytes);
+  ```
+
+- There is a feature flag for each hashing implementation. It is
+  recommended that you opt-out of the crate's default features and
+  only select the implementations you need to improve compile speed.
+
+### Changed
+
+- The crates minimum supported Rust version (MSRV) is now 1.81.
+
+- Functional and performance comparisons are made against the
+  reference C xxHash library version 0.8.2, which includes a stable
+  XXH3 algorithm.
+
+- Support for randomly-generated hasher instances is now behind the
+  `random` feature flag. It was previously combined with the `std`
+  feature flag.
+
+### Removed
+
+- The deprecated type aliases `XxHash` and `RandomXxHashBuilder` have
+  been removed. Replace them with `XxHash64` and
+  `xxhash64::RandomState` respectively.
+
+- `RandomXxHashBuilder32` and `RandomXxHashBuilder64` are no longer
+  available at the top-level of the crate. Replace them with
+  `xxhash32::RandomState` and ``xxhash64::RandomState` respectively.
+
+- `Xxh3Hash64` and `xx3::Hash64` have been renamed to `XxHash3_64` and
+  `xxhash3_64::Hasher` respectively.
+
+- The free functions `xxh3::hash64`, `xxh3::hash64_with_seed`, and
+  `xxh3::hash64_with_secret` are now associated functions of
+  `xxhash3_64::Hasher`: `oneshot`, `oneshot_with_seed` and
+  `oneshot_with_secret`. Note that the argument order has changed.
+
+- Support for the [digest][] crate has been removed. The digest crate
+  is for **cryptographic** hash functions and xxHash is
+  **non-cryptographic**.
+
+- `XxHash32` and `XxHash64` no longer implement `Copy`. This prevents
+  accidentally mutating a duplicate instance of the state instead of
+  the original state. `Clone` is still implemented so you can make
+  deliberate duplicates.
+
+- The XXH3 128-bit variant is not yet re-written. Work is in progress
+  for this.
+
+- We no longer provide support for randomly-generated instances of the
+  XXH3 64-bit variant. The XXH3 algorithm takes both a seed and a
+  secret as input and deciding what to randomize is non-trivial and
+  can have negative impacts on performance.
+
+[digest]: https://docs.rs/digest/latest/digest/

From a635afe6fc4b1e4464940c5805b52bce48cb2e9d Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Thu, 17 Oct 2024 14:33:34 -0400
Subject: [PATCH 164/166] Tweaks to get benchmarking running again after
 renaming

---
 comparison/benchmark.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/comparison/benchmark.sh b/comparison/benchmark.sh
index 145cabbcd..af793ef55 100755
--- a/comparison/benchmark.sh
+++ b/comparison/benchmark.sh
@@ -11,7 +11,9 @@ function capture() {
 
     raw_data="${temp_dir}/raw-data.streaming-json"
 
-    cargo criterion -p compare --message-format=json -- "${subset}" > "${raw_data}"
+    echo "Benchmarking with $(rustc --version)"
+
+    cargo criterion -p comparison --message-format=json -- "${subset}" > "${raw_data}"
 
     echo "Raw benchmark data captured to ${raw_data}"
     echo "Next, run \`${SCRIPT_INVOKED_AS} analyze ${raw_data}\`"
@@ -50,7 +52,8 @@ case "${mode}" in
         ;;
 
     *)
-        echo "Unknown command '${mode}'"
+        echo "Unknown command '${mode}'" >&2
+        echo "Known commands: capture, analyze" >&2
         exit 1
         ;;
 esac

From 979e71b5f94d1fe01e62b3b70084993eed0fc9b6 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 18 Oct 2024 08:33:38 -0400
Subject: [PATCH 165/166] Update benchmarks for Rust 1.81 / xxHash 0.8.2

---
 comparison/README.md                          |  24 +-
 .../results/xxhash3_64-streaming-aarch64.svg  | 174 +++++++-------
 .../results/xxhash3_64-streaming-x86_64.svg   | 216 +++++++++---------
 .../results/xxhash3_64-tiny_data-aarch64.svg  | 143 ++++++------
 .../results/xxhash3_64-tiny_data-x86_64.svg   | 175 +++++++-------
 .../results/xxhash64-streaming-aarch64.svg    | 107 ++++-----
 .../results/xxhash64-streaming-x86_64.svg     | 107 ++++-----
 .../results/xxhash64-tiny_data-aarch64.svg    | 152 ++++++------
 .../results/xxhash64-tiny_data-x86_64.svg     | 150 ++++++------
 9 files changed, 624 insertions(+), 624 deletions(-)

diff --git a/comparison/README.md b/comparison/README.md
index efb868e2b..69a16a924 100644
--- a/comparison/README.md
+++ b/comparison/README.md
@@ -19,15 +19,15 @@ graphs are boring flat lines, so a table is used instead.
 
 | Implementation | Throughput (GiB/s) |
 |----------------|--------------------|
-| Rust           | 13.4               |
-| C              | 13.4               |
+| Rust           | 13.5               |
+| C              | 13.5               |
 
 ## x86_64
 
 | Implementation | Throughput (GiB/s) |
 |----------------|--------------------|
-| Rust           | 15.7               |
-| C              | 15.8               |
+| Rust           | 16.5               |
+| C              | 16.5               |
 
 
 ## Streaming data
@@ -88,20 +88,20 @@ graphs are boring flat lines, so a table is used instead.
 
 | Implementation | Throughput (GiB/s) |
 |----------------|--------------------|
-| Rust           | 34.8               |
-| C              | 34.8               |
-| C (scalar)     | 21.0               |
-| C (NEON)       | 34.7               |
+| Rust           | 35.2               |
+| C              | 35.0               |
+| C (scalar)     | 21.2               |
+| C (NEON)       | 35.1               |
 
 ### x86_64
 
 | Implementation | Throughput (GiB/s) |
 |----------------|--------------------|
-| Rust           | 58.3               |
+| Rust           | 58.6               |
 | C              | 25.0               |
 | C (scalar)     | 7.5                |
 | C (SSE2)       | 25.1               |
-| C (AVX2)       | 58.1               |
+| C (AVX2)       | 57.8               |
 
 ## Streaming data
 
@@ -156,7 +156,7 @@ cluttering the graph and wasting benchmarking time.
 
 | CPU               | Memory | C compiler         |
 |-------------------|--------|--------------------|
-| Apple M1 Max      | 64 GiB | clang 15.0.0       |
+| Apple M1 Max      | 64 GiB | clang 16.0.0       |
 | AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 |
 
 Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
@@ -178,7 +178,7 @@ Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
 
   <tr>
     <th>C compiler</th>
-    <td>Apple clang version 15.0.0 (clang-1500.3.9.4)</td>
+    <td>Apple clang version 16.0.0 (clang-1600.0.26.3)</td>
   </tr>
 </table>
 
diff --git a/comparison/results/xxhash3_64-streaming-aarch64.svg b/comparison/results/xxhash3_64-streaming-aarch64.svg
index 8e21742b4..dc46b30db 100644
--- a/comparison/results/xxhash3_64-streaming-aarch64.svg
+++ b/comparison/results/xxhash3_64-streaming-aarch64.svg
@@ -21,94 +21,94 @@
   <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
     <path d="M69.24 661.16h982.41M69.24 615.4h982.41M69.24 569.64h982.41M69.24 523.87h982.41M69.24 478.11h982.41M69.24 432.35h982.41M69.24 386.58h982.41M69.24 340.82h982.41M69.24 295.06h982.41M69.24 249.29h982.41M69.24 203.53h982.41M69.24 157.77h982.41M69.24 112h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="1007" cy="157.45" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1007" cy="157.27" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="1007" cy="157.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="1007" cy="106.34" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="157.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="157.31" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="157.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="106.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="157.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="157.23" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="157.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="106.79" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="157.35" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="157.38" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="157.31" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="106.52" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="157.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="157.44" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="156.9" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="109.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="157.5" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="157.59" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="157.13" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="106.88" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="157.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="158.07" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="160.27" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="116.89" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="157.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="157.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="158.47" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="112.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="160.1" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="159.08" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="159" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="109.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="157.6" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="158.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="158.65" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="113.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="157.97" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="161.65" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="158.11" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="122.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="158.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="165.39" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="158.81" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="132.08" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="160.94" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="173.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="160.87" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="157.33" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1007" cy="156.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="156.03" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1007" cy="156.27" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1007" cy="105.88" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="156.33" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="156.26" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="156.3" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="105.77" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="156.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="156.23" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="156.42" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="105.99" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="156.49" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="156.21" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="156.6" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="106.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="156.64" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="156.49" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="156.65" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="106.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="156.66" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="156.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="156.8" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="106.6" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="156.82" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="155.98" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="156.84" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="107.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="157.08" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="156.9" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="158.58" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="109.31" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="156.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="156.89" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="156.82" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="109.6" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="156.64" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="157.42" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="156.51" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="113.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="157.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="159.23" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="156.93" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="120.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="158.63" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="164.15" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="158.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="131.25" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="160.73" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="172.06" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="160.42" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="156.66" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
     <circle cx="426.48" cy="166.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="179.42" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="166.19" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="165.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="190.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="200.24" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="190.26" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="177.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="240.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="245.82" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="240.29" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="228.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="277" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="280.16" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="276.95" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="261.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="306.55" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="308.72" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="306.5" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="296.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="367.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="368.62" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="367.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="351.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="402.19" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="403.78" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="402.21" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="378.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="425.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="427.08" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="425.53" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="424.06" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m113.9 424.06 44.65-45.8 44.66-27.02 44.65-55.22 44.66-34.3 44.65-33.57 44.66-50.34 44.65-12.46 44.66-8.02 44.65-25.25 44.66-9.72 44.65-8.93 44.66-3.89 44.65 2.66 44.66 4.69 44.65-10.01 44.66 2.83 44.65-3.19 44.66.27 44.65-.41 44.66-.04" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 425.44 44.65-23.25 44.66-34.61 44.65-61.03L292.52 277l44.65-36.32 44.66-50.46 44.65-24.01 44.66-5.27 44.65-2.09 44.66-.88 44.65-.37 44.66 2.5 44.65-2.62 44.66.22 44.65-.2 44.66-.09 44.65-.06 44.66.09 44.65-.35 44.66.36" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 427.08 44.65-23.3 44.66-35.16 44.65-59.9 44.66-28.56 44.65-34.34 44.66-45.58 44.65-20.82 44.66-6.14 44.65-7.89 44.66-3.74 44.65-2.82 44.66.25 44.65-1.8 44.66.79 44.65-.48 44.66-.15 44.65-.06 44.66-.15 44.65.08 44.66-.04" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 425.53 44.65-23.32 44.66-34.6 44.65-61.11 44.66-29.55 44.65-36.66 44.66-50.03 44.65-24.07 44.66-5.32 44.65-2.06 44.66-.7 44.65.54 44.66.35 44.65-.53 44.66 1.8 44.65-3.14 44.66-.23 44.65.41 44.66-.15 44.65.09 44.66.39" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <circle cx="426.48" cy="178.56" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="165.8" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="163.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="189.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="199.53" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="189.57" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="176.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="240.06" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="245.27" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="239.89" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="227.62" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="276.77" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="279.76" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="276.6" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="261.34" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="306.43" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="310.44" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="307.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="297.08" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="368.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="369" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="368.62" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="351.31" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="404.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="405.33" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="405.08" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="379.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="428.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="427.5" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="426.86" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="424.14" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 424.14 44.65-44.98 44.66-27.85 44.65-54.23 44.66-35.74 44.65-33.72 44.66-50.84 44.65-13.22 44.66-6.9 44.65-25.41 44.66-10.99 44.65-7.11 44.66-3.55 44.65-.29 44.66-1.75 44.65-.96 44.66-.58 44.65.16 44.66-.19 44.65-.22 44.66.11" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 428.09 44.65-24 44.66-35.92 44.65-61.74 44.66-29.66 44.65-36.71 44.66-50.38 44.65-23.47 44.66-5.48 44.65-2.1 44.66-1.42 44.65-.57 44.66.21 44.65.23 44.66-.26 44.65-.16 44.66-.02 44.65-.15 44.66-.08 44.65-.08 44.66-.11" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 427.5 44.65-22.17L203.21 369l44.65-58.56 44.66-30.68 44.65-34.49 44.66-45.74 44.65-20.97 44.66-6.5 44.65-7.91 44.66-4.92 44.65-1.81 44.66-.53 44.65.01 44.66-.92 44.65 1.01 44.66-.5 44.65-.28 44.66.02 44.65.03 44.66-.23" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 426.86 44.65-21.78 44.66-36.46 44.65-60.98 44.66-31.04 44.65-36.71 44.66-50.32 44.65-23.77 44.66-5.38 44.65-1.81 44.66-1.68 44.65-.42 44.66.31 44.65 1.76 44.66-1.74 44.65-.04 44.66-.15 44.65-.05 44.66-.18 44.65-.12 44.66-.03" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
     <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.62">8 MiB/sec</text>
diff --git a/comparison/results/xxhash3_64-streaming-x86_64.svg b/comparison/results/xxhash3_64-streaming-x86_64.svg
index 38428ce59..6a3997ed9 100644
--- a/comparison/results/xxhash3_64-streaming-x86_64.svg
+++ b/comparison/results/xxhash3_64-streaming-x86_64.svg
@@ -22,115 +22,115 @@
     <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
     <path d="M69.24 661.16h982.41M69.24 615.4h982.41M69.24 569.64h982.41M69.24 523.87h982.41M69.24 478.11h982.41M69.24 432.35h982.41M69.24 386.58h982.41M69.24 340.82h982.41M69.24 295.06h982.41M69.24 249.29h982.41M69.24 203.53h982.41M69.24 157.77h982.41M69.24 112h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
     <circle cx="1007" cy="124.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1007" cy="202.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="1007" cy="76.7" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="1007" cy="124.74" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="1007" cy="74.13" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="124.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="201.66" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="77.03" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="125.06" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="74.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="124.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="201.8" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="77.7" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="124.78" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="74.8" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="125.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="201.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="79.46" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="125.51" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="75.5" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="125.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="202" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="82.68" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="125.82" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="76.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="126.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="202.67" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="87.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="126.02" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="77.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="127.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="202.89" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="95.55" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="127.27" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="80.34" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="129.06" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="204.05" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1007" cy="201.38" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1007" cy="77.42" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="1007" cy="124.73" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1007" cy="74.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="124.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="201.74" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="77.35" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="124.91" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="75.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="125.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="201.58" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="78.91" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="124.31" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="75.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="125.19" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="202.67" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="79.75" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="125.32" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="75.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="125.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="202.13" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="82.57" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="124.93" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="75.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="125.9" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="202.38" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="87.48" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="125.54" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="78.04" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="127.64" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="203.42" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="95.44" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="126.67" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="80.82" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="129.03" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="203.91" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
     <circle cx="694.41" cy="109.79" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="129.59" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="85.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="131.99" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="205.59" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="130.45" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="131.62" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="89.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="136.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="206.95" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="158.55" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="137.17" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="101.39" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="145.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="206.52" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="193.35" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="145.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="118.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="158.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="213.51" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="233.25" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="157.99" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="141.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="169.3" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="220.8" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="270.51" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="169.45" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="171.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="191.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="228.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="277.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="190.09" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="183.31" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="214.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="244.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="286.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="212.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="205.85" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="231.01" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="264.04" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="403.53" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="228.62" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="226.27" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="272.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="297.57" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="444.42" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="270.95" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="269.28" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="312.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="330.64" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="486.87" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="309.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="310.85" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="353.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="370.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="531.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="350.76" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="354.4" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="398.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="413.72" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="577.09" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="393.69" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="398.39" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="442.23" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="457.99" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="630.56" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="439.37" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="443.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m113.9 443.22 44.65-44.83 44.66-43.99 44.65-43.55 44.66-41.57 44.65-43.01 44.66-20.42 44.65-22.54 44.66-11.66 44.65-29.94 44.66-23.56 44.65-16.76 44.66-11.61 44.65-4.22 44.66-5.22 44.65-2.53 44.66-1.79 44.65-.52 44.66-.7 44.65-.26 44.66-.41" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 442.23 44.65-44.08 44.66-44.73 44.65-41.22 44.66-39.42 44.65-41.77 44.66-16.94 44.65-22.87 44.66-21.9 44.65-10.51 44.66-13.37 44.65-8.62 44.66-4.81 44.65-2.93 44.66-1.36 44.65-1.31 44.66-.7 44.65-.6 44.66-.71 44.65-.07 44.66-.16" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 457.99 44.65-44.27 44.66-42.73 44.65-40.35 44.66-33.07 44.65-33.53 44.66-19.76 44.65-15.29 44.66-8.19 44.65-7.29 44.66-6.99 44.65.43 44.66-1.36 44.65-1.54 44.66-1.16 44.65-.22 44.66-.67 44.65-.17 44.66-.03 44.65-.14 44.66.43" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 439.37 44.65-45.68 44.66-42.93 44.65-41.6 44.66-38.21 44.65-42.33 44.66-15.98 44.65-22.55 44.66-20.64 44.65-11.46 44.66-12.35 44.65-8.47 44.66-5.55 44.65-2.03 44.66-2.32 44.65-1.25 44.66-.2 44.65-.31 44.66-.73 44.65.28 44.66-.32" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 630.56 44.65-53.47 44.66-45.73 44.65-44.49 44.66-42.45 44.65-40.89 44.66-117.17 44.65-9 44.66-6.85 44.65-37.26 44.66-39.9 44.65-34.8 44.66-28.1 44.65-20.66 44.66-14.24 44.65-8.54 44.66-4.33 44.65-3.22 44.66-1.76 44.65-.67 44.66-.33" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <circle cx="694.41" cy="129.35" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="84.68" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="131.34" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="205.25" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="129.81" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="130.42" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="89.87" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="136.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="207.23" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="158.63" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="136.21" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="100.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="145.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="206.78" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="193.24" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="145.23" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="117.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="157.87" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="213.02" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="234.21" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="158.65" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="130.92" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="169.83" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="221.43" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="271.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="169.43" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="156.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="190.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="228.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="277.65" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="190.77" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="181.4" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="212.91" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="244.5" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="286.92" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="213.21" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="206.92" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="227.73" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="264.85" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="403.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="227.95" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="226" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="270.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="298.18" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="443.76" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="270.53" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="269.09" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="308.54" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="331.07" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="487.68" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="307.75" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="311.08" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="348.96" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="370.24" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="531.72" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="349.74" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="353.88" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="393.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="413.21" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="580.39" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="392.92" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="397.7" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="437.91" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="457.44" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="631.07" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="437.88" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="443.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 443.2 44.65-45.5 44.66-43.82 44.65-42.8 44.66-41.99L337.17 226l44.66-19.08 44.65-25.52 44.66-25.24 44.65-25.24 44.66-13.55 44.65-17.15 44.66-10.35 44.65-5.19 44.66-3.86 44.65-2.78 44.66-2.06 44.65-.34 44.66-.53 44.65-.09 44.66-.6" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 437.91 44.65-44.69 44.66-44.26 44.65-40.42 44.66-37.96 44.65-42.85 44.66-14.82 44.65-22.06 44.66-21.02 44.65-11.96 44.66-12.51 44.65-8.68 44.66-5.34 44.65-2.31 44.66-1.39 44.65-1.74 44.66-.22 44.65-.49 44.66.01 44.65-1.03 44.66-.02" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 457.44 44.65-44.23 44.66-42.97 44.65-39.17 44.66-32.89 44.65-33.33 44.66-20.35 44.65-15.67 44.66-7.4 44.65-8.41 44.66-6.24 44.65.45 44.66-1.98 44.65-1.34 44.66-.49 44.65-1.04 44.66-.25 44.65.54 44.66-1.09 44.65.16 44.66-.36" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 437.88 44.65-44.96 44.66-43.18 44.65-41.99 44.66-37.22 44.65-42.58 44.66-14.74 44.65-22.44 44.66-21.34 44.65-10.78 44.66-13.42 44.65-9.02 44.66-5.79 44.65-1.07 44.66-2.68 44.65-1.13 44.66-.61 44.65.39 44.66-1.01 44.65.6 44.66-.18" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 631.07 44.65-50.68 44.66-48.67 44.65-44.04 44.66-43.92 44.65-40.4 44.66-116.44 44.65-9.27 44.66-6.64 44.65-36.8 44.66-40.97 44.65-34.61 44.66-28.82 44.65-20.02 44.66-14.35 44.65-7.96 44.66-4.91 44.65-2.82 44.66-.84 44.65-1.56 44.66.07" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
     <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.62">8 MiB/sec</text>
diff --git a/comparison/results/xxhash3_64-tiny_data-aarch64.svg b/comparison/results/xxhash3_64-tiny_data-aarch64.svg
index 50e2a7f14..753a36cc5 100644
--- a/comparison/results/xxhash3_64-tiny_data-aarch64.svg
+++ b/comparison/results/xxhash3_64-tiny_data-aarch64.svg
@@ -20,81 +20,80 @@
   </defs>
   <g clip-path="url(#cpMzkuOTJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M39.92 36.5h1011.74v654.42H39.92z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
-    <path d="M39.92 655.05h1011.73M39.92 543.38h1011.73M39.92 431.71h1011.73M39.92 320.05h1011.73M39.92 208.38h1011.73M39.92 96.72h1011.73M185.88 690.91V36.5m199.95 654.41V36.5m199.95 654.41V36.5m199.94 654.41V36.5m199.95 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <path d="M39.92 599.21h1011.73M39.92 487.55h1011.73M39.92 375.88h1011.73M39.92 264.22h1011.73M39.92 152.55h1011.73M39.92 40.88h1011.73M85.91 690.91V36.5m199.94 654.41V36.5M485.8 690.91V36.5m199.95 654.41V36.5M885.7 690.91V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="85.91" cy="657.93" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="657.71" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="657.75" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="613.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="648.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="648.07" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="648.63" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="603.96" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="661.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M39.92 654.87h1011.73M39.92 519.79h1011.73M39.92 384.71h1011.73M39.92 249.63h1011.73M39.92 114.55h1011.73M185.88 690.91V36.5m199.95 654.41V36.5m199.95 654.41V36.5m199.94 654.41V36.5m199.95 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M39.92 587.33h1011.73M39.92 452.25h1011.73M39.92 317.17h1011.73M39.92 182.09h1011.73M39.92 47.01h1011.73M85.91 690.91V36.5m199.94 654.41V36.5M485.8 690.91V36.5m199.95 654.41V36.5M885.7 690.91V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="85.91" cy="658.86" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="658.75" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="658.9" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="595.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="658.66" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="658.64" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="658.84" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="584.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="661.16" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
     <circle cx="109.9" cy="661.16" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="661.07" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="613.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="658.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="658.33" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="658.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="603.7" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="629.9" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="630.02" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="629.92" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="595.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="579.24" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="579.15" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="579.18" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="569.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="525.37" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="525.37" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="525.69" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="540.46" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="477.56" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="477.06" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="478.38" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="513.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="513.35" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="514.29" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="513.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="478.23" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="489.33" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="489.2" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="490.06" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="458.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="472.01" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="472.42" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="472.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="439.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="451.04" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="452.22" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="452.64" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="419.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="429.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="431.18" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="430.42" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="399.67" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="409.16" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="412.29" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="411.63" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="379.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="388.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="391.65" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="390.24" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="361.45" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m85.91 613.59 7.99-9.63 16 9.63 27.99-9.89 47.99-8.22 99.97-26.26 119.97-28.76 131.97-26.55 87.97-35.68 59.99-19.62 59.98-19.18 79.98-20.25 59.99-19.51 59.98-19.83 59.99-18.39" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 657.93 7.99-9.35 16 12.49 27.99-2.78 47.99-28.39 99.97-50.66 119.97-53.87 131.97-47.81 87.97 35.79 59.99-24.02 59.98-17.32 79.98-20.97 59.99-21.51 59.98-20.37 59.99-20.72" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 657.71 7.99-9.64 16 13.09 27.99-2.83 47.99-28.31 99.97-50.87 119.97-53.78 131.97-48.31 87.97 37.23 59.99-25.09 59.98-16.78 79.98-20.2 59.99-21.04 59.98-18.89 59.99-20.64" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 657.75 7.99-9.12 16 12.44 27.99-2.82 47.99-28.33 99.97-50.74 119.97-53.49 131.97-47.31 87.97 35.23 59.99-23.55 59.98-17.81 79.98-19.61 59.99-22.22 59.98-18.79 59.99-21.39" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <circle cx="109.9" cy="661.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="605.79" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="658.25" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="659.47" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="659.49" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="595.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="625.64" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="625.63" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="625.4" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="584.66" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="564.76" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="564.78" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="564.36" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="550.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="500.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="500.4" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="500.41" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="518.69" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="443.46" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="443.23" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="444.89" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="485.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="486.66" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="483.34" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="486.67" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="443.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="457.55" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="453.15" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="457.17" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="419.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="436.33" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="431.62" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="434.75" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="395.93" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="411.49" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="407.68" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="410.01" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="370.68" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="386.9" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="385.28" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="384.71" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="348.55" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="363.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="361.84" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="360.13" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="325.19" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="338.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="336.15" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="334.25" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="302.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m85.91 595.35 7.99-10.74 16 21.18 27.99-10.41 47.99-10.72 99.97-34.37 119.97-31.6 131.97-33.05 87.97-42.16 59.99-24.1 59.98-23.45 79.98-25.25 59.99-22.13 59.98-23.36 59.99-22.82" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 658.86 7.99-.2 16 2.5 27.99-2.91 47.99-32.61 99.97-60.88 119.97-64.69 131.97-56.61 87.97 43.2 59.99-29.11 59.98-21.22 79.98-24.84 59.99-24.59 59.98-23.32 59.99-25.16" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 658.75 7.99-.11 16 2.52 27.99-1.69 47.99-33.84 99.97-60.85 119.97-64.38 131.97-57.17 87.97 40.11 59.99-30.19 59.98-21.53 79.98-23.94 59.99-22.4 59.98-23.44 59.99-25.69" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 658.9 7.99-.06 16 2.32 27.99-1.67 47.99-34.09 99.97-61.04 119.97-63.95 131.97-55.52 87.97 41.78 59.99-29.5 59.98-22.42 79.98-24.74 59.99-25.3 59.98-24.58 59.99-25.88" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
-    <text x="34.99" y="602.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
-    <text x="34.99" y="490.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
-    <text x="34.99" y="379.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">12ns</text>
-    <text x="34.99" y="267.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">16ns</text>
-    <text x="34.99" y="155.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">20ns</text>
-    <text x="34.99" y="44.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">24ns</text>
-    <path d="M37.18 599.21h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74M37.18 40.88h2.74m45.99 652.77v-2.74m199.94 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="34.99" y="590.48" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="34.99" y="455.4" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <text x="34.99" y="320.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">12ns</text>
+    <text x="34.99" y="185.24" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">16ns</text>
+    <text x="34.99" y="50.16" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">20ns</text>
+    <path d="M37.18 587.33h2.74m-2.74-135.08h2.74m-2.74-135.08h2.74m-2.74-135.08h2.74M37.18 47.01h2.74m45.99 646.64v-2.74m199.94 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
     <text x="85.91" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
     <text x="285.85" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">50 B</text>
     <text x="485.8" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">100 B</text>
diff --git a/comparison/results/xxhash3_64-tiny_data-x86_64.svg b/comparison/results/xxhash3_64-tiny_data-x86_64.svg
index 671c7e0d7..81b4fc317 100644
--- a/comparison/results/xxhash3_64-tiny_data-x86_64.svg
+++ b/comparison/results/xxhash3_64-tiny_data-x86_64.svg
@@ -20,97 +20,96 @@
   </defs>
   <g clip-path="url(#cpMzkuOTJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M39.92 36.5h1011.74v654.42H39.92z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
-    <path d="M39.92 655.05h1011.73M39.92 543.38h1011.73M39.92 431.71h1011.73M39.92 320.05h1011.73M39.92 208.38h1011.73M39.92 96.72h1011.73M185.88 690.91V36.5m199.95 654.41V36.5m199.95 654.41V36.5m199.94 654.41V36.5m199.95 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <path d="M39.92 599.21h1011.73M39.92 487.55h1011.73M39.92 375.88h1011.73M39.92 264.22h1011.73M39.92 152.55h1011.73M39.92 40.88h1011.73M85.91 690.91V36.5m199.94 654.41V36.5M485.8 690.91V36.5m199.95 654.41V36.5M885.7 690.91V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="85.91" cy="653.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="641.93" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="641.56" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="653.79" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="85.91" cy="404" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="641.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="641.17" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="640.66" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="641.2" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="93.9" cy="403.86" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="646.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="646.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="646.73" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="646.57" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="109.9" cy="399.63" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="650.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="650.59" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="650.31" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="650.75" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="137.89" cy="401.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="636.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="636.98" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="636.86" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="637.16" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="185.88" cy="384.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="602.7" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="600.63" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="600.78" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="602.81" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="285.85" cy="380.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="567.63" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="566.83" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="569.66" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="567.63" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="405.82" cy="365.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="536.34" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="536.04" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="536.76" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="537.39" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="537.79" cy="335.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="417.23" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="416.03" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="416.44" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="417.67" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="625.76" cy="274.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="394.46" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="395.38" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="396.18" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="396.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="685.75" cy="248.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="365.98" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="370.26" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="366.51" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="367.9" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="745.73" cy="199.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="338.45" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="338.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="338.92" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="342.48" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="825.71" cy="184.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="315.43" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="315.09" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="316.41" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="314.44" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="885.7" cy="137.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="286.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="287.01" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="286.84" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="286.71" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
-    <circle cx="945.68" cy="125.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="261.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="260.46" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="260.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
-    <circle cx="1005.67" cy="260.29" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <path d="M39.92 654.87h1011.73M39.92 519.79h1011.73M39.92 384.71h1011.73M39.92 249.63h1011.73M39.92 114.55h1011.73M185.88 690.91V36.5m199.95 654.41V36.5m199.95 654.41V36.5m199.94 654.41V36.5m199.95 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M39.92 587.33h1011.73M39.92 452.25h1011.73M39.92 317.17h1011.73M39.92 182.09h1011.73M39.92 47.01h1011.73M85.91 690.91V36.5m199.94 654.41V36.5M485.8 690.91V36.5m199.95 654.41V36.5M885.7 690.91V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="85.91" cy="652.66" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="638.35" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="630.76" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="653.71" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="85.91" cy="453.96" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="637.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="637.54" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="628.98" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="638.09" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="93.9" cy="452.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="644.33" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="643.93" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="637.79" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="644.72" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="109.9" cy="473.89" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="648.99" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="649.07" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="648.61" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="647.93" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="137.89" cy="473.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="632.55" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="633.25" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="629.43" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="632.36" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="185.88" cy="445.05" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="593.03" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="589.62" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="582.66" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="593.52" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="285.85" cy="403.67" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="548.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="549.6" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="547.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="549.76" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="405.82" cy="371.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="511.45" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="511.8" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="511.01" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="512.87" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="537.79" cy="321.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="365.55" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="364.73" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="364.06" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="366.22" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="625.76" cy="264.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="338.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="342.14" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="339.7" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="336.46" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="685.75" cy="232.73" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="304.49" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="306.92" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="308.46" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="307.62" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="745.73" cy="183.13" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="272.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="271.65" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="270.04" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="271.61" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="825.71" cy="155.94" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="242.93" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="241.73" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="243.36" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="245.96" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="885.7" cy="120.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="205.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="210.49" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="207" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="203.78" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
+    <circle cx="945.68" cy="98.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="173.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="177.67" r="1.95" style="stroke-width:0.71;stroke:#4DAF4A;stroke-opacity:0.70;fill:#4DAF4A;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="175.17" r="1.95" style="stroke-width:0.71;stroke:#FF7F00;stroke-opacity:0.70;fill:#FF7F00;fill-opacity:0.70"/>
+    <circle cx="1005.67" cy="177.14" r="1.95" style="stroke-width:0.71;stroke:#984EA3;stroke-opacity:0.70;fill:#984EA3;fill-opacity:0.70"/>
     <circle cx="1005.67" cy="66.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m85.91 404 7.99-.14 16-4.23 27.99 2.21 47.99-17.82 99.97-3.43 119.97-14.81 131.97-30.52 87.97-60.97 59.99-25.65 59.98-49.03 79.98-14.85 59.99-47.47 59.98-11.64 59.99-59.41" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 653.7 7.99-12.41 16 4.76 27.99 4.12 47.99-13.64 99.97-33.83 119.97-35.07 131.97-31.29 87.97-119.11 59.99-22.77 59.98-28.48 79.98-27.53 59.99-23.02 59.98-28.58 59.99-25.8" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 641.93 7.99-.76 16 4.92 27.99 4.5 47.99-13.61 99.97-36.35 119.97-33.8 131.97-30.79 87.97-120.01 59.99-20.65 59.98-25.12 79.98-32.17 59.99-23 59.98-28.08 59.99-26.55" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 653.79 7.99-12.59 16 5.37 27.99 4.18 47.99-13.59 99.97-34.35 119.97-35.18 131.97-30.24 87.97-119.72 59.99-21.06 59.98-28.71 79.98-25.42 59.99-28.04 59.98-27.73 59.99-26.42" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m85.91 641.56 7.99-.9 16 6.07 27.99 3.58 47.99-13.45 99.97-36.08 119.97-31.12 131.97-32.9 87.97-120.32 59.99-20.26 59.98-29.67 79.98-27.59 59.99-22.51 59.98-29.57 59.99-26.83" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 453.96 7.99-1.74 16 21.67 27.99-.54 47.99-28.3 99.97-41.38 119.97-32.43 131.97-49.65 87.97-56.61 59.99-32.25 59.98-49.6 79.98-27.19 59.99-35.03 59.98-22.88 59.99-31.79" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 652.66 7.99-15.27 16 6.94 27.99 4.66 47.99-16.44 99.97-39.52 119.97-44.5 131.97-37.08 87.97-145.9 59.99-27.24 59.98-33.82 79.98-32.18 59.99-29.38 59.98-37.05 59.99-32.76" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 638.35 7.99-.81 16 6.39 27.99 5.14 47.99-15.82 99.97-43.63 119.97-40.02 131.97-37.8 87.97-147.07 59.99-22.59 59.98-35.22 79.98-35.27 59.99-29.92 59.98-31.24 59.99-32.82" style="stroke-width:1.07;stroke:#4DAF4A;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 653.71 7.99-15.62 16 6.63 27.99 3.21 47.99-15.57 99.97-38.84 119.97-43.76 131.97-36.89 87.97-146.65 59.99-29.76 59.98-28.84 79.98-36.01 59.99-25.65 59.98-42.18 59.99-26.64" style="stroke-width:1.07;stroke:#984EA3;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m85.91 630.76 7.99-1.78 16 8.81 27.99 10.82 47.99-19.18 99.97-46.77 119.97-35.65 131.97-36 87.97-146.95 59.99-24.36 59.98-31.24 79.98-38.42 59.99-26.68L945.68 207l59.99-31.83" style="stroke-width:1.07;stroke:#FF7F00;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
-    <text x="34.99" y="602.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
-    <text x="34.99" y="490.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
-    <text x="34.99" y="379.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">12ns</text>
-    <text x="34.99" y="267.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">16ns</text>
-    <text x="34.99" y="155.7" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">20ns</text>
-    <text x="34.99" y="44.04" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">24ns</text>
-    <path d="M37.18 599.21h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74m-2.74-111.66h2.74m-2.74-111.67h2.74M37.18 40.88h2.74m45.99 652.77v-2.74m199.94 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="34.99" y="590.48" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="34.99" y="455.4" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <text x="34.99" y="320.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">12ns</text>
+    <text x="34.99" y="185.24" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">16ns</text>
+    <text x="34.99" y="50.16" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="19.09">20ns</text>
+    <path d="M37.18 587.33h2.74m-2.74-135.08h2.74m-2.74-135.08h2.74m-2.74-135.08h2.74M37.18 47.01h2.74m45.99 646.64v-2.74m199.94 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74m199.95 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
     <text x="85.91" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
     <text x="285.85" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">50 B</text>
     <text x="485.8" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="23">100 B</text>
diff --git a/comparison/results/xxhash64-streaming-aarch64.svg b/comparison/results/xxhash64-streaming-aarch64.svg
index effd1c847..1f58161a6 100644
--- a/comparison/results/xxhash64-streaming-aarch64.svg
+++ b/comparison/results/xxhash64-streaming-aarch64.svg
@@ -20,62 +20,63 @@
   </defs>
   <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
-    <path d="M69.24 661.16h982.41M69.24 576.18h982.41M69.24 491.19h982.41M69.24 406.2h982.41M69.24 321.21h982.41M69.24 236.22h982.41M69.24 151.23h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="1007" cy="172.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1007" cy="172.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="172.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="172.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="172.28" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="172.19" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="171.73" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="172.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="172.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="172.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="172.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="172.7" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="173" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="172.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="172.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="172.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="173.54" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="173.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="175.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="174.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="177.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="176.45" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="182.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="180.5" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="191.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="186.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="208.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="200.32" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="234.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="224.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="272.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="261.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="362.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="354.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="406.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="413.32" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="528.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="533" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="568.02" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="569.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="627.87" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="635.1" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m113.9 635.1 44.65-65.67L203.21 533l44.65-119.68 44.66-59.1 44.65-92.48 44.66-36.76 44.65-24.66 44.66-13.41 44.65-6.41 44.66-4.05 44.65-1.92 44.66-1.11 44.65-.68 44.66-.02 44.65-.02 44.66-.4 44.65-.15 44.66.04 44.65.11 44.66-.08" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 627.87 44.65-59.85 44.66-39.62 44.65-121.99 44.66-43.99 44.65-90.13 44.66-38.12 44.65-25.43 44.66-16.93 44.65-8.89 44.66-5.18 44.65-2.32 44.66-1.88 44.65-.66 44.66.12 44.65-.79 44.66.17 44.65-.65 44.66.55 44.65.03 44.66.17" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M69.24 661.16h982.41M69.24 586.8h982.41M69.24 512.43h982.41M69.24 438.07h982.41M69.24 363.7h982.41M69.24 289.34h982.41M69.24 214.97h982.41M69.24 140.61h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="1007" cy="158.46" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="158.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="158.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="158.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="158.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="158.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="158.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="158.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="158.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="158.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="158.63" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="158.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="158.72" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="158.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="159.1" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="158.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="159.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="159.33" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="160.82" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="160.23" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="163.25" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="162.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="167.66" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="165.62" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="175.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="171.69" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="190.42" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="183.12" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="212.26" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="204.51" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="245.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="236.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="324.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="314.28" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="366.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="367.89" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="474.11" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="471.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="510.18" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="503.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="566.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="561.6" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 561.6 44.65-57.96 44.66-31.86 44.65-103.89 44.66-53.61 44.65-77.74 44.66-32.03 44.65-21.39 44.66-11.43 44.65-6.07 44.66-3.59 44.65-1.8 44.66-.9 44.65-.42 44.66-.19h44.65l44.66-.43 44.65.01 44.66.35 44.65-.45 44.66.09" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 566.05 44.65-55.87 44.66-36.07 44.65-107.3 44.66-42.69 44.65-78.44 44.66-33.42 44.65-21.84 44.66-14.74 44.65-8.02 44.66-4.41 44.65-2.43 44.66-1.41 44.65-.31 44.66-.38 44.65-.09 44.66-.19 44.65-.05 44.66.05 44.65-.05 44.66.07" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
-    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
-    <text x="64.31" y="579.33" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
-    <text x="64.31" y="494.34" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
-    <text x="64.31" y="409.35" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
-    <text x="64.31" y="324.36" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
-    <text x="64.31" y="239.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
-    <text x="64.31" y="154.38" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
+    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">128 MiB/sec</text>
+    <text x="64.31" y="589.95" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
+    <text x="64.31" y="515.59" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
+    <text x="64.31" y="441.22" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
+    <text x="64.31" y="366.86" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
+    <text x="64.31" y="292.49" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
+    <text x="64.31" y="218.13" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
+    <text x="64.31" y="143.76" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
     <text x="64.31" y="69.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">32 GiB/sec</text>
-    <path d="M66.5 661.16h2.74m-2.74-84.98h2.74m-2.74-84.99h2.74M66.5 406.2h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <path d="M66.5 661.16h2.74M66.5 586.8h2.74m-2.74-74.37h2.74m-2.74-74.36h2.74M66.5 363.7h2.74m-2.74-74.36h2.74m-2.74-74.37h2.74m-2.74-74.36h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
     <text x="113.9" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">1 B</text>
     <text x="158.55" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">2 B</text>
     <text x="203.21" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">4 B</text>
diff --git a/comparison/results/xxhash64-streaming-x86_64.svg b/comparison/results/xxhash64-streaming-x86_64.svg
index 636c7eafc..064188466 100644
--- a/comparison/results/xxhash64-streaming-x86_64.svg
+++ b/comparison/results/xxhash64-streaming-x86_64.svg
@@ -20,62 +20,63 @@
   </defs>
   <g clip-path="url(#cpNjkuMjR8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M69.24 36.5h982.41v654.42H69.24z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
-    <path d="M69.24 661.16h982.41M69.24 576.18h982.41M69.24 491.19h982.41M69.24 406.2h982.41M69.24 321.21h982.41M69.24 236.22h982.41M69.24 151.23h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="1007" cy="157.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1007" cy="150.68" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="154.91" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="962.34" cy="150.14" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="156.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="917.69" cy="150.09" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="157.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="873.03" cy="149.52" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="155.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="828.38" cy="151.33" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="153.08" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="783.72" cy="154.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="156.98" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="739.07" cy="150.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="155.76" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="694.41" cy="151.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="156.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="649.76" cy="151.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="156.3" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="605.1" cy="151.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="155.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="560.45" cy="150.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="161.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="515.79" cy="150.86" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="165.62" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="471.14" cy="153.55" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="182.18" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="426.48" cy="160.68" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="204.61" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="381.83" cy="176" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="243.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="337.17" cy="197.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="339.43" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="292.52" cy="308.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="410.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="247.86" cy="384.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="491.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="203.21" cy="475.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="586.01" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="158.55" cy="569.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="660.85" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="113.9" cy="646.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m113.9 646.16 44.65-76.62 44.66-94.01 44.65-90.7 44.66-76.35 44.65-110.83L381.83 176l44.65-15.32 44.66-7.13 44.65-2.69 44.66-.75 44.65 1.63 44.66-.52 44.65.22 44.66-.73 44.65 3.72 44.66-3.1 44.65-1.81 44.66.57 44.65.05 44.66.54" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m113.9 660.85 44.65-74.84 44.66-94.44 44.65-81.18 44.66-70.96 44.65-96.05 44.66-38.77 44.65-22.43 44.66-16.56 44.65-4.23 44.66-5.6 44.65.51 44.66.11 44.65-.65 44.66 1.22 44.65-3.9 44.66 2.09 44.65 2.21 44.66-1.16 44.65-1.31 44.66 2.89" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M69.24 661.16h982.41M69.24 586.8h982.41M69.24 512.43h982.41M69.24 438.07h982.41M69.24 363.7h982.41M69.24 289.34h982.41M69.24 214.97h982.41M69.24 140.61h982.41M69.24 66.24h982.41M113.9 690.91V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5m44.66 654.41V36.5m44.65 654.41V36.5M1007 690.91V36.5m44.65 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="1007" cy="138.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1007" cy="137.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="138.65" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="962.34" cy="137.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="138.51" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="917.69" cy="137.49" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="138.67" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="873.03" cy="137.43" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="141.33" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="828.38" cy="137.93" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="139.75" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="783.72" cy="137.94" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="140.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="739.07" cy="138.52" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="141.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="694.41" cy="139.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="144.45" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="649.76" cy="140.93" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="141.72" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="605.1" cy="138.51" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="142.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="560.45" cy="139.12" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="149.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="515.79" cy="140.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="153.97" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="471.14" cy="142.17" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="167.3" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="426.48" cy="149.25" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="186.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="381.83" cy="162.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="221.1" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="337.17" cy="175.78" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="299.28" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="292.52" cy="295.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="361.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="247.86" cy="372.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="432.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="203.21" cy="451.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="513.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="158.55" cy="529.49" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="580.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="113.9" cy="598.94" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m113.9 598.94 44.65-69.45 44.66-77.85 44.65-79.4 44.66-76.76 44.65-119.7 44.66-13.48 44.65-13.05 44.66-7.08 44.65-1.75 44.66-1.3 44.65-.61 44.66 2.42 44.65-1.57 44.66-.84 44.65-.58 44.66-.01 44.65-.5 44.66.06 44.65-.31 44.66.41" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m113.9 580.69 44.65-67.11 44.66-81.19 44.65-71.34 44.66-61.77 44.65-78.18 44.66-34.53 44.65-19.27 44.66-13.33 44.65-4.49 44.66-7.08 44.65-.68 44.66 2.73 44.65-3.05 44.66-.61 44.65-1.04 44.66 1.58 44.65-2.66 44.66-.16 44.65.14 44.66-.21" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
-    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
-    <text x="64.31" y="579.33" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
-    <text x="64.31" y="494.34" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
-    <text x="64.31" y="409.35" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
-    <text x="64.31" y="324.36" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
-    <text x="64.31" y="239.37" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
-    <text x="64.31" y="154.38" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
+    <text x="64.31" y="664.32" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">128 MiB/sec</text>
+    <text x="64.31" y="589.95" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">256 MiB/sec</text>
+    <text x="64.31" y="515.59" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="48.41">512 MiB/sec</text>
+    <text x="64.31" y="441.22" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">1 GiB/sec</text>
+    <text x="64.31" y="366.86" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">2 GiB/sec</text>
+    <text x="64.31" y="292.49" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">4 GiB/sec</text>
+    <text x="64.31" y="218.13" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="38.13">8 GiB/sec</text>
+    <text x="64.31" y="143.76" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">16 GiB/sec</text>
     <text x="64.31" y="69.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="43.03">32 GiB/sec</text>
-    <path d="M66.5 661.16h2.74m-2.74-84.98h2.74m-2.74-84.99h2.74M66.5 406.2h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74m-2.74-84.99h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <path d="M66.5 661.16h2.74M66.5 586.8h2.74m-2.74-74.37h2.74m-2.74-74.36h2.74M66.5 363.7h2.74m-2.74-74.36h2.74m-2.74-74.37h2.74m-2.74-74.36h2.74M66.5 66.24h2.74m44.66 627.41v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74m44.66 2.74v-2.74m44.65 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
     <text x="113.9" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">1 B</text>
     <text x="158.55" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">2 B</text>
     <text x="203.21" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">4 B</text>
diff --git a/comparison/results/xxhash64-tiny_data-aarch64.svg b/comparison/results/xxhash64-tiny_data-aarch64.svg
index d732a33eb..df15121a7 100644
--- a/comparison/results/xxhash64-tiny_data-aarch64.svg
+++ b/comparison/results/xxhash64-tiny_data-aarch64.svg
@@ -20,84 +20,84 @@
   </defs>
   <g clip-path="url(#cpMzUuMDJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M35.02 36.5h1016.63v654.42H35.02z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
-    <path d="M35.02 600.26h1016.63M35.02 440.79h1016.63M35.02 281.31h1016.63M35.02 121.84h1016.63M225.64 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <path d="M35.02 680h1016.63M35.02 520.52h1016.63M35.02 361.05h1016.63M35.02 201.57h1016.63M35.02 42.1h1016.63M81.23 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5m288.82 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="81.23" cy="475.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="81.23" cy="654.66" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="110.11" cy="483.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="110.11" cy="603.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="139" cy="453.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="139" cy="575.61" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="167.88" cy="352.94" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="167.88" cy="546.23" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="196.76" cy="491.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="196.76" cy="605.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="225.64" cy="460.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="225.64" cy="569.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="254.52" cy="433.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="254.52" cy="532.93" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="283.4" cy="352.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="283.4" cy="493.21" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="312.29" cy="472.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="312.29" cy="575.27" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="341.17" cy="441.87" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="341.17" cy="539.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="370.05" cy="410.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="370.05" cy="503.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="398.93" cy="352.64" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="398.93" cy="464.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="427.81" cy="445.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="427.81" cy="546.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="456.69" cy="415.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="456.69" cy="508.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="485.57" cy="378.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="485.57" cy="469.82" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="514.46" cy="336.13" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="514.46" cy="429.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="543.34" cy="425.22" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="543.34" cy="524.39" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="572.22" cy="395.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="572.22" cy="486.1" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="601.1" cy="359.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="601.1" cy="448" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="629.98" cy="316.49" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="629.98" cy="406.32" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="658.86" cy="401.16" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="658.86" cy="496.15" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="687.75" cy="366.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="687.75" cy="456.92" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="716.63" cy="325.72" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="716.63" cy="415.19" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="745.51" cy="273.83" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="745.51" cy="373.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="774.39" cy="380.96" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="774.39" cy="472.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="803.27" cy="344.73" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="803.27" cy="434.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="832.15" cy="303.97" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="832.15" cy="391.57" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="861.04" cy="261.88" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="861.04" cy="349.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="889.92" cy="350.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="889.92" cy="445.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="918.8" cy="311.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="918.8" cy="403.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="947.68" cy="267.14" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="947.68" cy="362.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="976.56" cy="215.71" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="976.56" cy="315.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="1005.44" cy="229.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1005.44" cy="354.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m81.23 654.66 28.88-50.83L139 575.61l28.88-29.38 28.88 59.21 28.88-36.24 28.88-36.27 28.88-39.72 28.89 82.06 28.88-35.92 28.88-36.24 28.88-38.93 28.88 82.63 28.88-38.37 28.88-38.62 28.89-40.06 28.88 94.63 28.88-38.29L601.1 448l28.88-41.68 28.88 89.83 28.89-39.23 28.88-41.73 28.88-41.55 28.88 98.62 28.88-37.96 28.88-42.73 28.89-42.04 28.88 95.84 28.88-41.84 28.88-41.5 28.88-46.05 28.88 38.4" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m81.23 475.79 28.88 7.5L139 453.4l28.88-100.46 28.88 138.47 28.88-30.49 28.88-27.77 28.88-80.57 28.89 120.21 28.88-30.92 28.88-31.47 28.88-57.76 28.88 92.72 28.88-29.58 28.88-37.57 28.89-42.08 28.88 89.09 28.88-29.41 28.88-36.37 28.88-42.95 28.88 84.67 28.89-34.28 28.88-41.16 28.88-51.89 28.88 107.13 28.88-36.23 28.88-40.76 28.89-42.09 28.88 88.81 28.88-39.57 28.88-43.98 28.88-51.43 28.88 13.65" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="M35.02 599.04h1016.63M35.02 439.18h1016.63M35.02 279.31h1016.63M35.02 119.44h1016.63M225.64 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M35.02 678.98h1016.63M35.02 519.11h1016.63M35.02 359.24h1016.63M35.02 199.37h1016.63M35.02 39.5h1016.63M81.23 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5m288.82 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="81.23" cy="477.96" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="81.23" cy="653.7" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="481.59" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="603.26" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="139" cy="453.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="139" cy="574.94" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="352.56" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="545.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="492.02" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="604.98" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="461.91" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="568.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="433.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="532.44" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="353.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="492.41" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="472.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="573.96" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="440.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="538.49" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="410.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="504.08" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="353.21" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="465.45" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="446.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="547.77" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="416.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="510.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="378.75" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="471.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="327.52" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="429.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="425.31" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="525.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="396.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="488.08" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="359.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="448.81" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="316.58" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="407.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="401.55" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="496.65" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="366.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="458.07" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="325.61" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="415.53" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="280.29" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="372.11" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="381.24" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="472.91" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="344.89" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="434.51" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="304.57" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="392.07" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="262.08" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="348.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="349.98" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="446.24" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="310.98" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="404.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="267.54" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="362.89" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="216.93" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="316.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="228.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="354.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m81.23 653.7 28.88-50.44L139 574.94l28.88-29.52 28.88 59.56 28.88-36.39 28.88-36.15 28.88-40.03 28.89 81.55 28.88-35.47 28.88-34.41 28.88-38.63 28.88 82.32 28.88-37.66 28.88-38.3 28.89-42.43 28.88 95.98 28.88-37.28 28.88-39.27 28.88-41.65 28.88 89.49 28.89-38.58 28.88-42.54 28.88-43.42 28.88 100.8 28.88-38.4 28.88-42.44 28.89-43.91 28.88 98.08 28.88-42.22 28.88-41.13 28.88-46.17 28.88 38" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m81.23 477.96 28.88 3.63L139 453.81l28.88-101.25 28.88 139.46 28.88-30.11 28.88-28.53 28.88-80.29 28.89 119.48 28.88-32.42 28.88-29.35 28.88-57.59 28.88 92.91 28.88-29.59 28.88-37.78 28.89-51.23 28.88 97.79 28.88-28.87 28.88-36.75 28.88-43.11 28.88 84.97 28.89-34.75 28.88-41.19 28.88-45.32 28.88 100.95 28.88-36.35 28.88-40.32 28.89-42.49 28.88 87.9 28.88-39 28.88-43.44 28.88-50.61 28.88 11.43" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
-    <text x="30.09" y="683.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="4.9">0</text>
-    <text x="30.09" y="523.68" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">2ns</text>
-    <text x="30.09" y="364.2" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
-    <text x="30.09" y="204.73" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">6ns</text>
-    <text x="30.09" y="45.25" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
-    <path d="M32.28 680h2.74m-2.74-159.48h2.74m-2.74-159.47h2.74m-2.74-159.48h2.74M32.28 42.1h2.74m46.21 651.55v-2.74m288.82 2.74v-2.74m288.81 2.74v-2.74m288.82 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="30.09" y="682.13" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="4.9">0</text>
+    <text x="30.09" y="522.26" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">2ns</text>
+    <text x="30.09" y="362.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="30.09" y="202.53" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">6ns</text>
+    <text x="30.09" y="42.66" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <path d="M32.28 678.98h2.74m-2.74-159.87h2.74m-2.74-159.87h2.74m-2.74-159.87h2.74M32.28 39.5h2.74m46.21 654.15v-2.74m288.82 2.74v-2.74m288.81 2.74v-2.74m288.82 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
     <text x="81.23" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
     <text x="370.05" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">10 B</text>
     <text x="658.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">20 B</text>
diff --git a/comparison/results/xxhash64-tiny_data-x86_64.svg b/comparison/results/xxhash64-tiny_data-x86_64.svg
index cdbb6396d..ca5185f8c 100644
--- a/comparison/results/xxhash64-tiny_data-x86_64.svg
+++ b/comparison/results/xxhash64-tiny_data-x86_64.svg
@@ -20,84 +20,84 @@
   </defs>
   <g clip-path="url(#cpMzUuMDJ8MTA1MS42NXwzNi41MHw2OTAuOTE=)">
     <path d="M35.02 36.5h1016.63v654.42H35.02z" style="stroke-width:1.07;stroke:none;fill:#EBEBEB"/>
-    <path d="M35.02 600.26h1016.63M35.02 440.79h1016.63M35.02 281.31h1016.63M35.02 121.84h1016.63M225.64 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <path d="M35.02 680h1016.63M35.02 520.52h1016.63M35.02 361.05h1016.63M35.02 201.57h1016.63M35.02 42.1h1016.63M81.23 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5m288.82 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
-    <circle cx="81.23" cy="473.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <path d="M35.02 599.04h1016.63M35.02 439.18h1016.63M35.02 279.31h1016.63M35.02 119.44h1016.63M225.64 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5" style="stroke-width:0.53;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <path d="M35.02 678.98h1016.63M35.02 519.11h1016.63M35.02 359.24h1016.63M35.02 199.37h1016.63M35.02 39.5h1016.63M81.23 690.91V36.5m288.82 654.41V36.5m288.81 654.41V36.5m288.82 654.41V36.5" style="stroke-width:1.07;stroke:#FFFFFF;stroke-linecap:butt"/>
+    <circle cx="81.23" cy="479.53" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
     <circle cx="81.23" cy="661.16" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="110.11" cy="452.32" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="110.11" cy="571.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="139" cy="434.27" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="139" cy="539.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="167.88" cy="396" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="167.88" cy="500.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="196.76" cy="479.14" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="196.76" cy="561.86" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="225.64" cy="441.51" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="225.64" cy="502.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="254.52" cy="400.62" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="254.52" cy="455.59" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="283.4" cy="355.13" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="283.4" cy="409.87" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="312.29" cy="427.48" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="312.29" cy="519.87" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="341.17" cy="401.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="341.17" cy="452.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="370.05" cy="346.96" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="370.05" cy="407.55" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="398.93" cy="283.4" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="398.93" cy="347.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="427.81" cy="384.78" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="427.81" cy="462.46" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="456.69" cy="351.56" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="456.69" cy="404.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="485.57" cy="299.27" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="485.57" cy="351.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="514.46" cy="246.99" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="514.46" cy="291.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="543.34" cy="371.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="543.34" cy="437.31" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="572.22" cy="327.8" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="572.22" cy="348.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="601.1" cy="266.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="601.1" cy="329.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="629.98" cy="231.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="629.98" cy="286.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="658.86" cy="312.79" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="658.86" cy="359.56" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="687.75" cy="253.2" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="687.75" cy="298.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="716.63" cy="196.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="716.63" cy="234.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="745.51" cy="151.61" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="745.51" cy="177.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="774.39" cy="312.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="774.39" cy="371.03" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="803.27" cy="264.68" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="803.27" cy="321.01" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="832.15" cy="206.03" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="832.15" cy="254.12" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="861.04" cy="142.27" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="861.04" cy="189.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="889.92" cy="252.12" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="889.92" cy="314.83" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="918.8" cy="196.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="918.8" cy="233.28" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="947.68" cy="118.15" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="947.68" cy="167.92" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="976.56" cy="66.24" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="976.56" cy="90.64" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <circle cx="1005.44" cy="105.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
-    <circle cx="1005.44" cy="150.71" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
-    <path d="m81.23 661.16 28.88-89.4L139 539.83l28.88-39.09 28.88 61.12 28.88-59.56 28.88-46.71 28.88-45.72 28.89 110 28.88-67.51 28.88-44.81 28.88-60.19 28.88 115.1 28.88-58.43 28.88-52.83 28.89-59.64 28.88 145.75 28.88-88.83 28.88-19.18 28.88-42.58 28.88 72.84 28.89-61.54 28.88-63.6 28.88-56.58 28.88 193.19 28.88-50.02 28.88-66.89 28.89-64.75 28.88 125.46 28.88-81.55 28.88-65.36 28.88-77.28 28.88 60.07" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
-    <path d="m81.23 473.78 28.88-21.46L139 434.27 167.88 396l28.88 83.14 28.88-37.63 28.88-40.89 28.88-45.49 28.89 72.35 28.88-26.12 28.88-54.4 28.88-63.56 28.88 101.38 28.88-33.22 28.88-52.29 28.89-52.28 28.88 124.21 28.88-43.4 28.88-60.88 28.88-35.11 28.88 80.98 28.89-59.59 28.88-56.52 28.88-45.07 28.88 161.31 28.88-48.24 28.88-58.65 28.89-63.76 28.88 109.85 28.88-55.38 28.88-78.59 28.88-51.91 28.88 39.2" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <circle cx="110.11" cy="459.17" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="110.11" cy="576.21" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="139" cy="427.92" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="139" cy="539.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="369.39" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="167.88" cy="496.37" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="474.35" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="196.76" cy="560.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="430.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="225.64" cy="501.12" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="371.36" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="254.52" cy="455.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="331" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="283.4" cy="411.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="416.05" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="312.29" cy="518.74" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="384.37" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="341.17" cy="463.35" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="345.16" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="370.05" cy="415.84" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="282.56" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="398.93" cy="361.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="384.41" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="427.81" cy="461.36" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="330.81" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="456.69" cy="397.38" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="275.02" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="485.57" cy="348.76" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="215.38" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="514.46" cy="298.21" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="353.6" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="543.34" cy="442.89" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="316.69" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="572.22" cy="376.72" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="276.44" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="601.1" cy="331.94" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="201.74" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="629.98" cy="287.3" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="321.75" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="658.86" cy="384.85" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="270.63" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="687.75" cy="321.29" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="195.26" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="716.63" cy="265.54" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="140.09" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="745.51" cy="197.2" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="290.66" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="774.39" cy="380.49" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="240.83" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="803.27" cy="327.42" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="190.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="832.15" cy="268.6" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="134.76" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="861.04" cy="212.66" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="234.86" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="889.92" cy="319.48" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="177.07" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="918.8" cy="249.45" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="117.99" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="947.68" cy="183.18" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="70.06" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="976.56" cy="130.02" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="66.24" r="1.95" style="stroke-width:0.71;stroke:#377EB8;stroke-opacity:0.70;fill:#377EB8;fill-opacity:0.70"/>
+    <circle cx="1005.44" cy="185.22" r="1.95" style="stroke-width:0.71;stroke:#E41A1C;stroke-opacity:0.70;fill:#E41A1C;fill-opacity:0.70"/>
+    <path d="m81.23 661.16 28.88-84.95L139 539.84l28.88-43.47 28.88 64.35 28.88-59.6 28.88-45.83 28.88-44.07 28.89 107.52 28.88-55.39 28.88-47.51 28.88-54.64 28.88 100.16 28.88-63.98 28.88-48.62 28.89-50.55 28.88 144.68 28.88-66.17 28.88-44.78 28.88-44.64 28.88 97.55 28.89-63.56 28.88-55.75 28.88-68.34 28.88 183.29 28.88-53.07 28.88-58.82 28.89-55.94 28.88 106.82 28.88-70.03 28.88-66.27 28.88-53.16 28.88 55.2" style="stroke-width:1.07;stroke:#E41A1C;stroke-opacity:0.30;stroke-linecap:butt"/>
+    <path d="m81.23 479.53 28.88-20.36L139 427.92l28.88-58.53 28.88 104.96 28.88-44.28 28.88-58.71L283.4 331l28.89 85.05 28.88-31.68 28.88-39.21 28.88-62.6 28.88 101.85 28.88-53.6 28.88-55.79 28.89-59.64 28.88 138.22 28.88-36.91 28.88-40.25 28.88-74.7 28.88 120.01 28.89-51.12 28.88-75.37 28.88-55.17 28.88 150.57 28.88-49.83 28.88-50.76 28.89-55.31 28.88 100.1 28.88-57.79 28.88-59.08 28.88-47.93 28.88-3.82" style="stroke-width:1.07;stroke:#377EB8;stroke-opacity:0.30;stroke-linecap:butt"/>
   </g>
   <g clip-path="url(#cpMC4wMHwxMDgwLjAwfDAuMDB8NzIwLjAw)">
-    <text x="30.09" y="683.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="4.9">0</text>
-    <text x="30.09" y="523.68" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">2ns</text>
-    <text x="30.09" y="364.2" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
-    <text x="30.09" y="204.73" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">6ns</text>
-    <text x="30.09" y="45.25" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
-    <path d="M32.28 680h2.74m-2.74-159.48h2.74m-2.74-159.47h2.74m-2.74-159.48h2.74M32.28 42.1h2.74m46.21 651.55v-2.74m288.82 2.74v-2.74m288.81 2.74v-2.74m288.82 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
+    <text x="30.09" y="682.13" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="4.9">0</text>
+    <text x="30.09" y="522.26" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">2ns</text>
+    <text x="30.09" y="362.39" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">4ns</text>
+    <text x="30.09" y="202.53" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">6ns</text>
+    <text x="30.09" y="42.66" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="end" textLength="14.19">8ns</text>
+    <path d="M32.28 678.98h2.74m-2.74-159.87h2.74m-2.74-159.87h2.74m-2.74-159.87h2.74M32.28 39.5h2.74m46.21 654.15v-2.74m288.82 2.74v-2.74m288.81 2.74v-2.74m288.82 2.74v-2.74" style="stroke-width:1.07;stroke:#333333;stroke-linecap:butt"/>
     <text x="81.23" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="13.2">0 B</text>
     <text x="370.05" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">10 B</text>
     <text x="658.86" y="702.15" lengthAdjust="spacingAndGlyphs" style="font-size:8.80px;fill:#4D4D4D;font-family:&quot;Arial&quot;" text-anchor="middle" textLength="18.1">20 B</text>

From 6d4ffd4e846325ed3e51a05524747616a2d251e8 Mon Sep 17 00:00:00 2001
From: Jake Goulding <jake.goulding@gmail.com>
Date: Fri, 18 Oct 2024 09:01:27 -0400
Subject: [PATCH 166/166] Remove vestigial comment

---
 src/xxhash3_64.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs
index 390c0ec65..336ba7aaf 100644
--- a/src/xxhash3_64.rs
+++ b/src/xxhash3_64.rs
@@ -250,8 +250,6 @@ impl<S> SecretBuffer<S> {
 
 impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> {
     /// Use the default seed and secret values while allocating nothing.
-    ///
-    /// Note that this type may take up a surprising amount of stack space.
     #[inline]
     pub const fn default() -> Self {
         SecretBuffer {