From 95b7080acd5bde4a87533669beed57fdeac18903 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 23 Jun 2024 08:40:24 -0400 Subject: [PATCH 001/166] Initial commit --- .gitignore | 2 + Cargo.toml | 7 ++ src/lib.rs | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 266 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..96ef6c0b9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..197471fea --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "xx-renu" +version = "0.1.0" +edition = "2021" + +[dev-dependencies] +proptest = "1.4.0" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000..ca11d623b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,257 @@ +#![deny(rust_2018_idioms)] + +use core::mem; + +const PRIME64_1: u64 = 0x9E3779B185EBCA87; +const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; +const PRIME64_3: u64 = 0x165667B19E3779F9; +const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; +const PRIME64_5: u64 = 0x27D4EB2F165667C5; + +#[derive(Default)] +#[repr(align(32))] +struct AlignedData([u8; 32]); + +impl AlignedData { + fn as_u64s(&self) -> &[u64; 4] { + // SAFETY: We are guaranteed to be aligned + unsafe { mem::transmute(&self.0) } + } +} + +#[derive(Default)] +struct Buffer { + offset: usize, + data: AlignedData, +} + +impl Buffer { + fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) { + if self.offset == 0 { + return (None, data); + }; + + let (_filled, empty) = self.data.0.split_at_mut(self.offset); // todo unchecked? + let n_to_copy = usize::min(empty.len(), data.len()); + + let dst = &mut empty[..n_to_copy]; + let (src, rest) = data.split_at(n_to_copy); + + dst.copy_from_slice(src); + self.offset += n_to_copy; + + if self.offset == self.data.0.len() { + (Some(self.data.as_u64s()), rest) + } else { + (None, rest) + } + } + + fn set(&mut self, data: &[u8]) { + let n_to_copy = data.len(); + debug_assert!(n_to_copy < self.data.0.len()); + self.data.0[..n_to_copy].copy_from_slice(data); + self.offset = data.len(); + } + + fn remaining(&self) -> &[u8] { + &self.data.0[..self.offset] + } +} + +pub struct XxHash64 { + seed: u64, + accumulators: [u64; 4], + buffer: Buffer, + length: u64, +} + +impl XxHash64 { + pub fn with_seed(seed: u64) -> Self { + // Step 1. Initialize internal accumulators + let accumulators = [ + seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2), + seed.wrapping_add(PRIME64_2), + seed, + seed.wrapping_sub(PRIME64_1), + ]; + + Self { + seed, + accumulators, + buffer: Buffer::default(), + length: 0, + } + } + + pub fn write(&mut self, data: &[u8]) { + let len = data.len(); + + // Step 2. Process stripes + // todo: dereference? + let [acc1, acc2, acc3, acc4] = &mut self.accumulators; + + let (check, data) = self.buffer.extend(data); + + if let Some(&[lane1, lane2, lane3, lane4]) = check { + // todo: little-endian transform + + *acc1 = Self::round(*acc1, lane1); + *acc2 = Self::round(*acc2, lane2); + *acc3 = Self::round(*acc3, lane3); + *acc4 = Self::round(*acc4, lane4); + } + + let mut data = data; + while let Some((chunk, rest)) = data.split_first_chunk::<32>() { + let [lane1, lane2, lane3, lane4] = + unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; + // todo: little-endian transform + + *acc1 = Self::round(*acc1, lane1); + *acc2 = Self::round(*acc2, lane2); + *acc3 = Self::round(*acc3, lane3); + *acc4 = Self::round(*acc4, lane4); + + data = rest; + } + let data = data; + + self.buffer.set(data); + + self.length += len.into_u64(); + } + + pub fn finish(&mut self) -> u64 { + // Step 3. Accumulator convergence + let mut acc = if self.length < 32 { + self.seed.wrapping_add(PRIME64_5) + } else { + let [acc1, acc2, acc3, acc4] = self.accumulators; + + let mut acc = { + let acc1 = acc1.rotate_left(1); + let acc2 = acc2.rotate_left(7); + let acc3 = acc3.rotate_left(12); + let acc4 = acc4.rotate_left(18); + + acc1.wrapping_add(acc2) + .wrapping_add(acc3) + .wrapping_add(acc4) + }; + + acc = Self::merge_accumulator(acc, acc1); + acc = Self::merge_accumulator(acc, acc2); + acc = Self::merge_accumulator(acc, acc3); + acc = Self::merge_accumulator(acc, acc4); + + acc + }; + + // Step 4. Add input length + acc += self.length; + + // Step 5. Consume remaining input + let mut remaining = self.buffer.remaining(); + + while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() { + let lane = u64::from_ne_bytes(*chunk); + // todo: little-endian + + acc ^= Self::round(0, lane); + acc = acc.rotate_left(27).wrapping_mul(PRIME64_1); + acc = acc.wrapping_add(PRIME64_4); + remaining = rest; + } + + while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() { + let lane = u32::from_ne_bytes(*chunk).into_u64(); + // todo: little-endian + + acc ^= lane.wrapping_mul(PRIME64_1); + acc = acc.rotate_left(23).wrapping_mul(PRIME64_2); + acc = acc.wrapping_add(PRIME64_3); + + remaining = rest; + } + + while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() { + let lane = chunk[0].into_u64(); + + acc ^= lane.wrapping_mul(PRIME64_5); + acc = acc.rotate_left(11).wrapping_mul(PRIME64_1); + + remaining = rest; + } + + // Step 6. Final mix (avalanche) + acc ^= acc >> 33; + acc = acc.wrapping_mul(PRIME64_2); + acc ^= acc >> 29; + acc = acc.wrapping_mul(PRIME64_3); + acc ^= acc >> 32; + + acc + } + + fn round(mut acc: u64, lane: u64) -> u64 { + acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); + acc = acc.rotate_left(31); + acc.wrapping_mul(PRIME64_1) + } + + fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { + acc ^= Self::round(0, acc_n); + acc = acc.wrapping_mul(PRIME64_1); + acc.wrapping_add(PRIME64_4) + } +} + +trait IntoU64 { + fn into_u64(self) -> u64; +} + +impl IntoU64 for u8 { + fn into_u64(self) -> u64 { + self.into() + } +} + +impl IntoU64 for u32 { + fn into_u64(self) -> u64 { + self.into() + } +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl IntoU64 for usize { + fn into_u64(self) -> u64 { + self as u64 + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn hash_of_nothing_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); + } + + #[test] + fn hash_of_single_byte_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[42]); + assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); + } + + #[test] + fn hash_of_exactly_32_bytes() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[0; 32]); + assert_eq!(hasher.finish(), 0xf6e9_be5d_7063_2cf5); + } +} From ccb57232a31e88f0165657f3007a7733c4d2485e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 23 Jun 2024 09:49:18 -0400 Subject: [PATCH 002/166] Add comparison to native implementation --- .gitignore | 2 +- .gitmodules | 6 ++++ compare/.gitignore | 2 ++ compare/Cargo.toml | 9 ++++++ compare/src/lib.rs | 39 +++++++++++++++++++++++ compare/xx_hash-sys/.gitignore | 2 ++ compare/xx_hash-sys/Cargo.toml | 8 +++++ compare/xx_hash-sys/build.rs | 10 ++++++ compare/xx_hash-sys/src/lib.rs | 58 ++++++++++++++++++++++++++++++++++ compare/xx_hash-sys/xxHash | 1 + 10 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 .gitmodules create mode 100644 compare/.gitignore create mode 100644 compare/Cargo.toml create mode 100644 compare/src/lib.rs create mode 100644 compare/xx_hash-sys/.gitignore create mode 100644 compare/xx_hash-sys/Cargo.toml create mode 100644 compare/xx_hash-sys/build.rs create mode 100644 compare/xx_hash-sys/src/lib.rs create mode 160000 compare/xx_hash-sys/xxHash diff --git a/.gitignore b/.gitignore index 96ef6c0b9..1b72444ae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ +/Cargo.lock /target -Cargo.lock diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..52476fd22 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "xxHash"] + path = compare/xxHash + url = git@github.com:Cyan4973/xxHash.git +[submodule "compare/xx_hash-sys/xxHash"] + path = compare/xx_hash-sys/xxHash + url = git@github.com:Cyan4973/xxHash.git diff --git a/compare/.gitignore b/compare/.gitignore new file mode 100644 index 000000000..1b72444ae --- /dev/null +++ b/compare/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target diff --git a/compare/Cargo.toml b/compare/Cargo.toml new file mode 100644 index 000000000..ac6bb09cb --- /dev/null +++ b/compare/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "compare" +version = "0.1.0" +edition = "2021" + +[dependencies] +proptest = "1.5.0" +xx-renu = { path = ".." } +xx_hash-sys = { path = "xx_hash-sys" } diff --git a/compare/src/lib.rs b/compare/src/lib.rs new file mode 100644 index 000000000..02947ab45 --- /dev/null +++ b/compare/src/lib.rs @@ -0,0 +1,39 @@ +#![cfg(test)] + +use proptest::{num, prelude::*, test_runner::TestCaseResult}; + +proptest! { + #[test] + fn it_works(seed: u64, data: Vec) { + it_works_impl(seed, &data)?; + } + + #[test] + fn it_works_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + it_works_impl(seed, &data[offset..])?; + } +} + +fn it_works_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = xx_hash_sys::Stream::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = xx_renu::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) +} + +fn vec_and_index() -> impl Strategy, usize)> { + prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| { + let len = vec.len(); + (Just(vec), 0..len) + }) +} diff --git a/compare/xx_hash-sys/.gitignore b/compare/xx_hash-sys/.gitignore new file mode 100644 index 000000000..1b72444ae --- /dev/null +++ b/compare/xx_hash-sys/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target diff --git a/compare/xx_hash-sys/Cargo.toml b/compare/xx_hash-sys/Cargo.toml new file mode 100644 index 000000000..d385daf66 --- /dev/null +++ b/compare/xx_hash-sys/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "xx_hash-sys" +version = "0.0.0" +edition = "2021" +publish = false + +[dependencies] +libc = { version = "0.2.155", default-features = false } diff --git a/compare/xx_hash-sys/build.rs b/compare/xx_hash-sys/build.rs new file mode 100644 index 000000000..cdc31eb97 --- /dev/null +++ b/compare/xx_hash-sys/build.rs @@ -0,0 +1,10 @@ +use std::{env, path::PathBuf}; + +fn main() { + let base = env::var_os("CARGO_MANIFEST_DIR").unwrap(); + let base: PathBuf = base.into(); + let xxhash = base.join("xxHash"); + + println!("cargo::rustc-link-lib=static=xxhash"); + println!("cargo::rustc-link-search={}", xxhash.display()); +} diff --git a/compare/xx_hash-sys/src/lib.rs b/compare/xx_hash-sys/src/lib.rs new file mode 100644 index 000000000..6728d92b8 --- /dev/null +++ b/compare/xx_hash-sys/src/lib.rs @@ -0,0 +1,58 @@ +#![allow(non_camel_case_types)] + +type XXH64_hash_t = u64; +#[repr(C)] +pub struct XXH64_state_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +type XXH_errorcode = libc::c_int; +const XXH_OK: XXH_errorcode = 0; + +extern "C" { + fn XXH64(input: *const libc::c_void, length: libc::size_t, seed: XXH64_hash_t) -> XXH64_hash_t; + + fn XXH64_createState() -> *mut XXH64_state_t; + fn XXH64_reset(state: *mut XXH64_state_t, seed: XXH64_hash_t) -> XXH_errorcode; + fn XXH64_update( + state: *mut XXH64_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn XXH64_digest(state: *mut XXH64_state_t) -> XXH64_hash_t; + fn XXH64_freeState(state: *mut XXH64_state_t); +} + +pub fn oneshot(seed: u64, data: &[u8]) -> u64 { + unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) } +} + +pub struct Stream(*mut XXH64_state_t); + +impl Stream { + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = XXH64_createState(); + XXH64_reset(state, seed); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = unsafe { XXH64_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { XXH64_digest(self.0) } + } +} + +impl Drop for Stream { + fn drop(&mut self) { + unsafe { XXH64_freeState(self.0) } + } +} diff --git a/compare/xx_hash-sys/xxHash b/compare/xx_hash-sys/xxHash new file mode 160000 index 000000000..805c00b68 --- /dev/null +++ b/compare/xx_hash-sys/xxHash @@ -0,0 +1 @@ +Subproject commit 805c00b68fa754200ada0c207ffeaa7a4409377c From e61bdb7617c5ae97a7d9fe16984e23f4b7261828 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 23 Jun 2024 14:20:55 -0400 Subject: [PATCH 003/166] Add benchmarks --- compare/Cargo.toml | 6 +++ compare/benches/benchmark.rs | 95 ++++++++++++++++++++++++++++++++++ compare/xx_hash-sys/src/lib.rs | 8 +-- src/lib.rs | 6 +++ 4 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 compare/benches/benchmark.rs diff --git a/compare/Cargo.toml b/compare/Cargo.toml index ac6bb09cb..cb14ba84f 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -3,7 +3,13 @@ name = "compare" version = "0.1.0" edition = "2021" +[[bench]] +name = "benchmark" +harness = false + [dependencies] +criterion = "0.5.1" proptest = "1.5.0" +rand = "0.8.5" xx-renu = { path = ".." } xx_hash-sys = { path = "xx_hash-sys" } diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs new file mode 100644 index 000000000..58fce0ebc --- /dev/null +++ b/compare/benches/benchmark.rs @@ -0,0 +1,95 @@ +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use rand::{Rng, RngCore, SeedableRng}; +use std::{hint::black_box, iter}; +use xx_hash_sys::Stream; +use xx_renu::XxHash64; + +fn oneshot(c: &mut Criterion) { + let (seed, data) = gen_data(); + let mut g = c.benchmark_group("oneshot"); + + for size in half_sizes(&data).take(10) { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("xxHash/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = Stream::oneshot(seed, &data); + black_box(hash); + }) + }); + + let id = format!("renu/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = XxHash64::oneshot(seed, &data); + black_box(hash); + }) + }); + } + + g.finish(); +} + +fn streaming_one_chunk(c: &mut Criterion) { + let (seed, data) = gen_data(); + let mut g = c.benchmark_group("streaming_one_chunk"); + + for size in half_sizes(&data).take(10) { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("xxHash/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = Stream::with_seed(seed); + hasher.write(&data); + let hash = hasher.finish(); + black_box(hash); + }) + }); + + let id = format!("renu/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = XxHash64::with_seed(seed); + hasher.write(&data); + let hash = hasher.finish(); + black_box(hash); + }) + }); + } + + g.finish(); +} + +const SEED: u64 = 0xc651_4843_1995_363f; +const DATA_SIZE: usize = 100 * 1024 * 1024; + +fn gen_data() -> (u64, Vec) { + let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); + + let seed = rng.gen(); + + let mut data = vec![0; DATA_SIZE]; + rng.fill_bytes(&mut data); + + (seed, data) +} + +fn half_sizes(data: &[u8]) -> impl Iterator { + iter::successors( + Some(data.len()), + |&v| { + if v == 1 { + None + } else { + Some(v / 2) + } + }, + ) +} + +criterion_group!(benches, oneshot, streaming_one_chunk); +criterion_main!(benches); diff --git a/compare/xx_hash-sys/src/lib.rs b/compare/xx_hash-sys/src/lib.rs index 6728d92b8..2c3ed6e8a 100644 --- a/compare/xx_hash-sys/src/lib.rs +++ b/compare/xx_hash-sys/src/lib.rs @@ -24,13 +24,13 @@ extern "C" { fn XXH64_freeState(state: *mut XXH64_state_t); } -pub fn oneshot(seed: u64, data: &[u8]) -> u64 { - unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) } -} - pub struct Stream(*mut XXH64_state_t); impl Stream { + pub fn oneshot(seed: u64, data: &[u8]) -> u64 { + unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) } + } + pub fn with_seed(seed: u64) -> Self { let state = unsafe { let state = XXH64_createState(); diff --git a/src/lib.rs b/src/lib.rs index ca11d623b..f3e904706 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,6 +67,12 @@ pub struct XxHash64 { } impl XxHash64 { + pub fn oneshot(seed: u64, data: &[u8]) -> u64 { + let mut this = Self::with_seed(seed); + this.write(data); + this.finish() + } + pub fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators let accumulators = [ From c7eed0e8280e960933d6c9567b6fac6dd2ed9161 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 23 Jun 2024 21:24:45 -0400 Subject: [PATCH 004/166] Add commandline sum tool --- renu-sum/.gitignore | 2 ++ renu-sum/Cargo.toml | 7 +++++++ renu-sum/src/main.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 renu-sum/.gitignore create mode 100644 renu-sum/Cargo.toml create mode 100644 renu-sum/src/main.rs diff --git a/renu-sum/.gitignore b/renu-sum/.gitignore new file mode 100644 index 000000000..1b72444ae --- /dev/null +++ b/renu-sum/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target diff --git a/renu-sum/Cargo.toml b/renu-sum/Cargo.toml new file mode 100644 index 000000000..2677f2ea2 --- /dev/null +++ b/renu-sum/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "renu-sum" +version = "0.1.0" +edition = "2021" + +[dependencies] +xx-renu = { path = ".." } diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs new file mode 100644 index 000000000..08d1e1413 --- /dev/null +++ b/renu-sum/src/main.rs @@ -0,0 +1,40 @@ +use std::{ + env, + fs::File, + io::Read, + path::{Path, PathBuf}, +}; +use xx_renu::XxHash64; + +type Result> = std::result::Result; + +fn main() -> Result<()> { + let mut buffer = vec![0; 32 * 1024 * 1024]; + + for path in env::args_os().skip(1) { + let path = PathBuf::from(path); + let hash = hash_one_file(&path, &mut buffer)?; + eprintln!("{hash:x} {}", path.display()); + } + + Ok(()) +} + +fn hash_one_file(path: &Path, buffer: &mut [u8]) -> Result { + let mut file = File::open(path)?; + let mut hasher = XxHash64::with_seed(0); + + loop { + let n_bytes = file.read(buffer)?; + if n_bytes == 0 { + break; + } + + let valid = &buffer[..n_bytes]; + + hasher.write(valid); + } + + let hash = hasher.finish(); + Ok(hash) +} From c5150c9e9bbcb00d67a86351721563d0c88793c2 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 23 Jun 2024 21:30:18 -0400 Subject: [PATCH 005/166] Move to a workspace --- .gitmodules | 2 +- Cargo.toml | 8 ++++++-- compare/.gitignore | 1 - compare/Cargo.toml | 2 +- renu-sum/.gitignore | 1 - {compare/xx_hash-sys => xx_hash-sys}/.gitignore | 1 - {compare/xx_hash-sys => xx_hash-sys}/Cargo.toml | 0 {compare/xx_hash-sys => xx_hash-sys}/build.rs | 0 {compare/xx_hash-sys => xx_hash-sys}/src/lib.rs | 0 {compare/xx_hash-sys => xx_hash-sys}/xxHash | 0 10 files changed, 8 insertions(+), 7 deletions(-) rename {compare/xx_hash-sys => xx_hash-sys}/.gitignore (60%) rename {compare/xx_hash-sys => xx_hash-sys}/Cargo.toml (100%) rename {compare/xx_hash-sys => xx_hash-sys}/build.rs (100%) rename {compare/xx_hash-sys => xx_hash-sys}/src/lib.rs (100%) rename {compare/xx_hash-sys => xx_hash-sys}/xxHash (100%) diff --git a/.gitmodules b/.gitmodules index 52476fd22..5a68a159b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,5 +2,5 @@ path = compare/xxHash url = git@github.com:Cyan4973/xxHash.git [submodule "compare/xx_hash-sys/xxHash"] - path = compare/xx_hash-sys/xxHash + path = xx_hash-sys/xxHash url = git@github.com:Cyan4973/xxHash.git diff --git a/Cargo.toml b/Cargo.toml index 197471fea..09a2d4f94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,5 +3,9 @@ name = "xx-renu" version = "0.1.0" edition = "2021" -[dev-dependencies] -proptest = "1.4.0" +[workspace] +members = [ + "compare", + "renu-sum", + "xx_hash-sys", +] diff --git a/compare/.gitignore b/compare/.gitignore index 1b72444ae..5a44eef09 100644 --- a/compare/.gitignore +++ b/compare/.gitignore @@ -1,2 +1 @@ /Cargo.lock -/target diff --git a/compare/Cargo.toml b/compare/Cargo.toml index cb14ba84f..f91e12f2a 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -12,4 +12,4 @@ criterion = "0.5.1" proptest = "1.5.0" rand = "0.8.5" xx-renu = { path = ".." } -xx_hash-sys = { path = "xx_hash-sys" } +xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/renu-sum/.gitignore b/renu-sum/.gitignore index 1b72444ae..5a44eef09 100644 --- a/renu-sum/.gitignore +++ b/renu-sum/.gitignore @@ -1,2 +1 @@ /Cargo.lock -/target diff --git a/compare/xx_hash-sys/.gitignore b/xx_hash-sys/.gitignore similarity index 60% rename from compare/xx_hash-sys/.gitignore rename to xx_hash-sys/.gitignore index 1b72444ae..5a44eef09 100644 --- a/compare/xx_hash-sys/.gitignore +++ b/xx_hash-sys/.gitignore @@ -1,2 +1 @@ /Cargo.lock -/target diff --git a/compare/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml similarity index 100% rename from compare/xx_hash-sys/Cargo.toml rename to xx_hash-sys/Cargo.toml diff --git a/compare/xx_hash-sys/build.rs b/xx_hash-sys/build.rs similarity index 100% rename from compare/xx_hash-sys/build.rs rename to xx_hash-sys/build.rs diff --git a/compare/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs similarity index 100% rename from compare/xx_hash-sys/src/lib.rs rename to xx_hash-sys/src/lib.rs diff --git a/compare/xx_hash-sys/xxHash b/xx_hash-sys/xxHash similarity index 100% rename from compare/xx_hash-sys/xxHash rename to xx_hash-sys/xxHash From e9bc13664f304a2ebf11a51c328364e3ef87d115 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 08:11:24 -0400 Subject: [PATCH 006/166] Adjust submodule --- .gitmodules | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.gitmodules b/.gitmodules index 5a68a159b..451faf0f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "xxHash"] - path = compare/xxHash - url = git@github.com:Cyan4973/xxHash.git -[submodule "compare/xx_hash-sys/xxHash"] +[submodule "xx_hash-sys/xxHash"] path = xx_hash-sys/xxHash - url = git@github.com:Cyan4973/xxHash.git + url = https://github.com/Cyan4973/xxHash.git From 198fe0b8fb5ce2fbe3477cb2be86134fe542822d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 11:01:07 -0400 Subject: [PATCH 007/166] Extract a helper --- src/lib.rs | 129 ++++++++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f3e904706..eadf5c4c8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ const PRIME64_5: u64 = 0x27D4EB2F165667C5; struct AlignedData([u8; 32]); impl AlignedData { - fn as_u64s(&self) -> &[u64; 4] { + const fn as_u64s(&self) -> &[u64; 4] { // SAFETY: We are guaranteed to be aligned unsafe { mem::transmute(&self.0) } } @@ -59,9 +59,61 @@ impl Buffer { } } +struct Accumulators([u64; 4]); + +impl Accumulators { + const fn new(seed: u64) -> Self { + Self([ + seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2), + seed.wrapping_add(PRIME64_2), + seed, + seed.wrapping_sub(PRIME64_1), + ]) + } + + fn write(&mut self, lanes: [u64; 4]) { + let [acc1, acc2, acc3, acc4] = &mut self.0; + let [lane1, lane2, lane3, lane4] = lanes; + // todo: little-endian transform + + *acc1 = round(*acc1, lane1); + *acc2 = round(*acc2, lane2); + *acc3 = round(*acc3, lane3); + *acc4 = round(*acc4, lane4); + } + + const fn finish(&self) -> u64 { + let [acc1, acc2, acc3, acc4] = self.0; + + let mut acc = { + let acc1 = acc1.rotate_left(1); + let acc2 = acc2.rotate_left(7); + let acc3 = acc3.rotate_left(12); + let acc4 = acc4.rotate_left(18); + + acc1.wrapping_add(acc2) + .wrapping_add(acc3) + .wrapping_add(acc4) + }; + + acc = Self::merge_accumulator(acc, acc1); + acc = Self::merge_accumulator(acc, acc2); + acc = Self::merge_accumulator(acc, acc3); + acc = Self::merge_accumulator(acc, acc4); + + acc + } + + const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { + acc ^= round(0, acc_n); + acc = acc.wrapping_mul(PRIME64_1); + acc.wrapping_add(PRIME64_4) + } +} + pub struct XxHash64 { seed: u64, - accumulators: [u64; 4], + accumulators: Accumulators, buffer: Buffer, length: u64, } @@ -75,16 +127,10 @@ impl XxHash64 { pub fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators - let accumulators = [ - seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2), - seed.wrapping_add(PRIME64_2), - seed, - seed.wrapping_sub(PRIME64_1), - ]; Self { seed, - accumulators, + accumulators: Accumulators::new(seed), buffer: Buffer::default(), length: 0, } @@ -94,31 +140,18 @@ impl XxHash64 { let len = data.len(); // Step 2. Process stripes - // todo: dereference? - let [acc1, acc2, acc3, acc4] = &mut self.accumulators; - - let (check, data) = self.buffer.extend(data); - - if let Some(&[lane1, lane2, lane3, lane4]) = check { - // todo: little-endian transform + let (buffered_lanes, data) = self.buffer.extend(data); - *acc1 = Self::round(*acc1, lane1); - *acc2 = Self::round(*acc2, lane2); - *acc3 = Self::round(*acc3, lane3); - *acc4 = Self::round(*acc4, lane4); + if let Some(&lanes) = buffered_lanes { + self.accumulators.write(lanes); } let mut data = data; while let Some((chunk, rest)) = data.split_first_chunk::<32>() { - let [lane1, lane2, lane3, lane4] = - unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; - // todo: little-endian transform - - *acc1 = Self::round(*acc1, lane1); - *acc2 = Self::round(*acc2, lane2); - *acc3 = Self::round(*acc3, lane3); - *acc4 = Self::round(*acc4, lane4); - + // SAFETY: We have the right number of bytes and are + // handling the unaligned case. + let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; + self.accumulators.write(lanes); data = rest; } let data = data; @@ -133,25 +166,7 @@ impl XxHash64 { let mut acc = if self.length < 32 { self.seed.wrapping_add(PRIME64_5) } else { - let [acc1, acc2, acc3, acc4] = self.accumulators; - - let mut acc = { - let acc1 = acc1.rotate_left(1); - let acc2 = acc2.rotate_left(7); - let acc3 = acc3.rotate_left(12); - let acc4 = acc4.rotate_left(18); - - acc1.wrapping_add(acc2) - .wrapping_add(acc3) - .wrapping_add(acc4) - }; - - acc = Self::merge_accumulator(acc, acc1); - acc = Self::merge_accumulator(acc, acc2); - acc = Self::merge_accumulator(acc, acc3); - acc = Self::merge_accumulator(acc, acc4); - - acc + self.accumulators.finish() }; // Step 4. Add input length @@ -164,7 +179,7 @@ impl XxHash64 { let lane = u64::from_ne_bytes(*chunk); // todo: little-endian - acc ^= Self::round(0, lane); + acc ^= round(0, lane); acc = acc.rotate_left(27).wrapping_mul(PRIME64_1); acc = acc.wrapping_add(PRIME64_4); remaining = rest; @@ -199,18 +214,12 @@ impl XxHash64 { acc } +} - fn round(mut acc: u64, lane: u64) -> u64 { - acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); - acc = acc.rotate_left(31); - acc.wrapping_mul(PRIME64_1) - } - - fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { - acc ^= Self::round(0, acc_n); - acc = acc.wrapping_mul(PRIME64_1); - acc.wrapping_add(PRIME64_4) - } +const fn round(mut acc: u64, lane: u64) -> u64 { + acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); + acc = acc.rotate_left(31); + acc.wrapping_mul(PRIME64_1) } trait IntoU64 { From f9ff61c0bdd12f8ef452a09758b76550ee0bf66f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 11:01:30 -0400 Subject: [PATCH 008/166] Use threads for the CLI --- renu-sum/src/main.rs | 67 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs index 08d1e1413..1fe00770e 100644 --- a/renu-sum/src/main.rs +++ b/renu-sum/src/main.rs @@ -3,38 +3,79 @@ use std::{ fs::File, io::Read, path::{Path, PathBuf}, + sync::mpsc, + thread, }; use xx_renu::XxHash64; -type Result> = std::result::Result; +type Error = Box; +type Result = std::result::Result; -fn main() -> Result<()> { - let mut buffer = vec![0; 32 * 1024 * 1024]; +const BUFFER_SIZE: usize = 16 * 1024; +const BUFFER_COUNT: usize = 8; +fn main() -> Result<()> { for path in env::args_os().skip(1) { let path = PathBuf::from(path); - let hash = hash_one_file(&path, &mut buffer)?; + let hash = hash_one_file(&path)?; eprintln!("{hash:x} {}", path.display()); } Ok(()) } -fn hash_one_file(path: &Path, buffer: &mut [u8]) -> Result { +fn hash_one_file(path: &Path) -> Result { let mut file = File::open(path)?; let mut hasher = XxHash64::with_seed(0); - loop { - let n_bytes = file.read(buffer)?; - if n_bytes == 0 { - break; - } + let (tx, rx) = mpsc::sync_channel(BUFFER_COUNT); + let (tx2, rx2) = mpsc::sync_channel(BUFFER_COUNT); - let valid = &buffer[..n_bytes]; - - hasher.write(valid); + for _ in 0..BUFFER_COUNT { + tx.send(vec![0; BUFFER_SIZE]) + .expect("Must be able to populate initial buffers"); } + thread::scope(|scope| { + let t1 = scope.spawn(move || { + while let Ok(mut buffer) = rx.recv() { + let n_bytes = file.read(&mut buffer)?; + + if n_bytes == 0 { + break; + } + + tx2.send((buffer, n_bytes))?; + } + + Ok::<_, Error>(()) + }); + + let t2 = scope.spawn({ + let hasher = &mut hasher; + move || { + while let Ok((buffer, n_bytes)) = rx2.recv() { + let valid = &buffer[..n_bytes]; + + hasher.write(valid); + + if tx.send(buffer).is_err() { + // The reading thread has exited and there's + // nowhere to return this buffer to. + continue; + } + } + + Ok::<_, Error>(()) + } + }); + + t1.join().unwrap()?; + t2.join().unwrap()?; + + Ok::<_, Error>(()) + })?; + let hash = hasher.finish(); Ok(hash) } From 7fa13e66413f161abeab0edc8baa7d0d7d460f03 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 11:36:47 -0400 Subject: [PATCH 009/166] Parameterize and tune the buffer size and count --- renu-sum/src/main.rs | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs index 1fe00770e..54d0d699c 100644 --- a/renu-sum/src/main.rs +++ b/renu-sum/src/main.rs @@ -11,28 +11,54 @@ use xx_renu::XxHash64; type Error = Box; type Result = std::result::Result; -const BUFFER_SIZE: usize = 16 * 1024; +const BUFFER_SIZE: usize = 128 * 1024; const BUFFER_COUNT: usize = 8; +struct Config { + buffer_size: usize, + buffer_count: usize, +} + +impl Config { + fn from_env() -> Self { + let buffer_size = env::var("BUFFER_SIZE") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(BUFFER_SIZE); + + let buffer_count = env::var("BUFFER_COUNT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(BUFFER_COUNT); + + Self { + buffer_size, + buffer_count, + } + } +} + fn main() -> Result<()> { + let config = Config::from_env(); + for path in env::args_os().skip(1) { let path = PathBuf::from(path); - let hash = hash_one_file(&path)?; + let hash = hash_one_file(&config, &path)?; eprintln!("{hash:x} {}", path.display()); } Ok(()) } -fn hash_one_file(path: &Path) -> Result { +fn hash_one_file(config: &Config, path: &Path) -> Result { let mut file = File::open(path)?; let mut hasher = XxHash64::with_seed(0); - let (tx, rx) = mpsc::sync_channel(BUFFER_COUNT); - let (tx2, rx2) = mpsc::sync_channel(BUFFER_COUNT); + let (tx, rx) = mpsc::sync_channel(config.buffer_count); + let (tx2, rx2) = mpsc::sync_channel(config.buffer_count); - for _ in 0..BUFFER_COUNT { - tx.send(vec![0; BUFFER_SIZE]) + for _ in 0..config.buffer_count { + tx.send(vec![0; config.buffer_size]) .expect("Must be able to populate initial buffers"); } From 51beb3f12b790b828de796e9f7b0fe8e1df95148 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 12:04:12 -0400 Subject: [PATCH 010/166] Reduce allocation count --- renu-sum/src/main.rs | 55 +++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs index 54d0d699c..409ee35a7 100644 --- a/renu-sum/src/main.rs +++ b/renu-sum/src/main.rs @@ -3,7 +3,7 @@ use std::{ fs::File, io::Read, path::{Path, PathBuf}, - sync::mpsc, + sync::mpsc::{self, SendError}, thread, }; use xx_renu::XxHash64; @@ -41,63 +41,66 @@ impl Config { fn main() -> Result<()> { let config = Config::from_env(); + let mut buffer = vec![0; config.buffer_count * config.buffer_size]; + for path in env::args_os().skip(1) { let path = PathBuf::from(path); - let hash = hash_one_file(&config, &path)?; + let hash = hash_one_file(&config, &path, &mut buffer)?; eprintln!("{hash:x} {}", path.display()); } Ok(()) } -fn hash_one_file(config: &Config, path: &Path) -> Result { +fn hash_one_file(config: &Config, path: &Path, buffer: &mut [u8]) -> Result { let mut file = File::open(path)?; let mut hasher = XxHash64::with_seed(0); - let (tx, rx) = mpsc::sync_channel(config.buffer_count); - let (tx2, rx2) = mpsc::sync_channel(config.buffer_count); + let (tx_empty, rx_empty) = mpsc::channel(); + let (tx_filled, rx_filled) = mpsc::channel(); - for _ in 0..config.buffer_count { - tx.send(vec![0; config.buffer_size]) + for buffer in buffer.chunks_mut(config.buffer_size) { + tx_empty + .send(buffer) .expect("Must be able to populate initial buffers"); } thread::scope(|scope| { - let t1 = scope.spawn(move || { - while let Ok(mut buffer) = rx.recv() { - let n_bytes = file.read(&mut buffer)?; + let thread_reader = scope.spawn(move || { + while let Ok(buffer) = rx_empty.recv() { + let n_bytes = file.read(buffer)?; if n_bytes == 0 { break; } - tx2.send((buffer, n_bytes))?; + tx_filled + .send((buffer, n_bytes)) + .map_err(|_| SendError(()))?; } Ok::<_, Error>(()) }); - let t2 = scope.spawn({ - let hasher = &mut hasher; - move || { - while let Ok((buffer, n_bytes)) = rx2.recv() { - let valid = &buffer[..n_bytes]; + let hasher = &mut hasher; + let thread_hasher = scope.spawn(move || { + while let Ok((buffer, n_bytes)) = rx_filled.recv() { + let valid = &buffer[..n_bytes]; - hasher.write(valid); + hasher.write(valid); - if tx.send(buffer).is_err() { - // The reading thread has exited and there's - // nowhere to return this buffer to. - continue; - } + if tx_empty.send(buffer).is_err() { + // The reading thread has exited and there's + // nowhere to return this buffer to. + continue; } - - Ok::<_, Error>(()) } + + Ok::<_, Error>(()) }); - t1.join().unwrap()?; - t2.join().unwrap()?; + thread_reader.join().unwrap()?; + thread_hasher.join().unwrap()?; Ok::<_, Error>(()) })?; From 41b899c7855c0dd94f03e2544457344ef942a60d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 13:08:47 -0400 Subject: [PATCH 011/166] twox-hash bench --- compare/Cargo.toml | 1 + compare/benches/benchmark.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/compare/Cargo.toml b/compare/Cargo.toml index f91e12f2a..44ef771ee 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -11,5 +11,6 @@ harness = false criterion = "0.5.1" proptest = "1.5.0" rand = "0.8.5" +twox-hash = "1.6.3" xx-renu = { path = ".." } xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 58fce0ebc..ae0ea4964 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -1,6 +1,8 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use rand::{Rng, RngCore, SeedableRng}; +use std::hash::Hasher; use std::{hint::black_box, iter}; +use twox_hash::XxHash64 as Old; use xx_hash_sys::Stream; use xx_renu::XxHash64; @@ -59,6 +61,16 @@ fn streaming_one_chunk(c: &mut Criterion) { black_box(hash); }) }); + + let id = format!("twox-hash/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = Old::with_seed(seed); + hasher.write(&data); + let hash = hasher.finish(); + black_box(hash); + }) + }); } g.finish(); From 19848d0bb76dc04a95ea664a60d90aea16558962 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 13:09:03 -0400 Subject: [PATCH 012/166] const it more --- src/lib.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index eadf5c4c8..fd939ba67 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![no_std] #![deny(rust_2018_idioms)] use core::mem; @@ -8,24 +9,33 @@ const PRIME64_3: u64 = 0x165667B19E3779F9; const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; -#[derive(Default)] #[repr(align(32))] struct AlignedData([u8; 32]); impl AlignedData { + const fn new() -> Self { + Self([0; 32]) + } + const fn as_u64s(&self) -> &[u64; 4] { // SAFETY: We are guaranteed to be aligned unsafe { mem::transmute(&self.0) } } } -#[derive(Default)] struct Buffer { offset: usize, data: AlignedData, } impl Buffer { + const fn new() -> Self { + Self { + offset: 0, + data: AlignedData::new(), + } + } + fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) { if self.offset == 0 { return (None, data); @@ -49,7 +59,9 @@ impl Buffer { fn set(&mut self, data: &[u8]) { let n_to_copy = data.len(); + debug_assert!(n_to_copy < self.data.0.len()); + self.data.0[..n_to_copy].copy_from_slice(data); self.offset = data.len(); } @@ -125,13 +137,13 @@ impl XxHash64 { this.finish() } - pub fn with_seed(seed: u64) -> Self { + pub const fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators Self { seed, accumulators: Accumulators::new(seed), - buffer: Buffer::default(), + buffer: Buffer::new(), length: 0, } } From c4fdb7f7dc8a4149302110d2b4a5dc697dd804e4 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 14:54:49 -0400 Subject: [PATCH 013/166] Document unchecked decision --- src/lib.rs | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fd939ba67..1235ea1c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,11 +10,17 @@ const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; #[repr(align(32))] -struct AlignedData([u8; 32]); +struct AlignedData([u8; Self::LEN]); impl AlignedData { + const LEN: usize = 32; + const fn new() -> Self { - Self([0; 32]) + Self([0; Self::LEN]) + } + + const fn len(&self) -> usize { + Self::LEN } const fn as_u64s(&self) -> &[u64; 4] { @@ -37,20 +43,34 @@ impl Buffer { } fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) { + // Most of the slice methods we use here have `_unchecked` variants, but + // + // 1. this method is called one time per `XxHash64::write` call + // 2. this method early exits if we don't have anything in the buffer + // + // Because of this, removing the panics via `unsafe` doesn't + // have much benefit other than reducing code size by a tiny + // bit. + + debug_assert!(self.offset <= self.data.len()); + if self.offset == 0 { return (None, data); }; - let (_filled, empty) = self.data.0.split_at_mut(self.offset); // todo unchecked? + let empty = &mut self.data.0[self.offset..]; let n_to_copy = usize::min(empty.len(), data.len()); let dst = &mut empty[..n_to_copy]; + let (src, rest) = data.split_at(n_to_copy); dst.copy_from_slice(src); self.offset += n_to_copy; - if self.offset == self.data.0.len() { + debug_assert!(self.offset <= self.data.len()); + + if self.offset == self.data.len() { (Some(self.data.as_u64s()), rest) } else { (None, rest) @@ -58,9 +78,11 @@ impl Buffer { } fn set(&mut self, data: &[u8]) { + debug_assert!([0, self.data.len()].contains(&self.offset)); + let n_to_copy = data.len(); - debug_assert!(n_to_copy < self.data.0.len()); + debug_assert!(n_to_copy < self.data.len()); self.data.0[..n_to_copy].copy_from_slice(data); self.offset = data.len(); From ebebd1b0eb7a694254c4a97f1b07bc8d19d590f1 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 16:45:43 -0400 Subject: [PATCH 014/166] cleaning --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 1235ea1c6..8479acb0d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ use core::mem; +// Keeping these constants in this form to match the C code. const PRIME64_1: u64 = 0x9E3779B185EBCA87; const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; const PRIME64_3: u64 = 0x165667B19E3779F9; @@ -153,12 +154,14 @@ pub struct XxHash64 { } impl XxHash64 { + #[must_use] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { let mut this = Self::with_seed(seed); this.write(data); this.finish() } + #[must_use] pub const fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators From bd9f19246fe1129a02674f51388ebf1bb0fbd524 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 16:49:06 -0400 Subject: [PATCH 015/166] Proptest oneshot methods --- compare/src/lib.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 02947ab45..be6a7c411 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -4,17 +4,35 @@ use proptest::{num, prelude::*, test_runner::TestCaseResult}; proptest! { #[test] - fn it_works(seed: u64, data: Vec) { - it_works_impl(seed, &data)?; + fn oneshot(seed: u64, data: Vec) { + oneshot_impl(seed, &data)?; } #[test] - fn it_works_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - it_works_impl(seed, &data[offset..])?; + fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; } + + #[test] + fn streaming_one_chunk(seed: u64, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } + + #[test] + fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } +} + +fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = xx_hash_sys::Stream::oneshot(seed, data); + let rust = xx_renu::XxHash64::oneshot(seed, data); + + prop_assert_eq!(native, rust); + Ok(()) } -fn it_works_impl(seed: u64, data: &[u8]) -> TestCaseResult { +fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { let native = { let mut hasher = xx_hash_sys::Stream::with_seed(seed); hasher.write(data); From addb9acfd242dd6800e3d7bbaec686db543d8a4d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 24 Jun 2024 21:08:26 -0400 Subject: [PATCH 016/166] moar tests --- src/lib.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8479acb0d..c21b48e57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,10 @@ #![no_std] #![deny(rust_2018_idioms)] -use core::mem; +#[cfg(test)] +extern crate std; + +use core::{fmt, mem, hash::Hasher}; // Keeping these constants in this form to match the C code. const PRIME64_1: u64 = 0x9E3779B185EBCA87; @@ -30,6 +33,13 @@ impl AlignedData { } } +impl fmt::Debug for AlignedData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.0.iter()).finish() + } +} + +#[derive(Debug)] struct Buffer { offset: usize, data: AlignedData, @@ -72,6 +82,7 @@ impl Buffer { debug_assert!(self.offset <= self.data.len()); if self.offset == self.data.len() { + self.offset = 0; (Some(self.data.as_u64s()), rest) } else { (None, rest) @@ -79,7 +90,11 @@ impl Buffer { } fn set(&mut self, data: &[u8]) { - debug_assert!([0, self.data.len()].contains(&self.offset)); + if data.is_empty() { + return; + } + + debug_assert_eq!(self.offset, 0); let n_to_copy = data.len(); @@ -146,6 +161,19 @@ impl Accumulators { } } +impl fmt::Debug for Accumulators { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let [acc1, acc2, acc3, acc4] = self.0; + f.debug_struct("Accumulators") + .field("acc1", &acc1) + .field("acc2", &acc2) + .field("acc3", &acc3) + .field("acc4", &acc4) + .finish() + } +} + +#[derive(Debug)] pub struct XxHash64 { seed: u64, accumulators: Accumulators, @@ -172,8 +200,10 @@ impl XxHash64 { length: 0, } } +} - pub fn write(&mut self, data: &[u8]) { +impl Hasher for XxHash64 { + fn write(&mut self, data: &[u8]) { let len = data.len(); // Step 2. Process stripes @@ -198,7 +228,8 @@ impl XxHash64 { self.length += len.into_u64(); } - pub fn finish(&mut self) -> u64 { + #[must_use] + fn finish(&self) -> u64 { // Step 3. Accumulator convergence let mut acc = if self.length < 32 { self.seed.wrapping_add(PRIME64_5) @@ -284,8 +315,27 @@ impl IntoU64 for usize { #[cfg(test)] mod test { + use core::array; + use super::*; + #[test] + fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { + let bytes = [0x9c; 32]; + + let mut byte_by_byte = XxHash64::with_seed(0); + for byte in bytes.chunks(1) { + byte_by_byte.write(byte); + } + let byte_by_byte = byte_by_byte.finish(); + + let mut one_chunk = XxHash64::with_seed(0); + one_chunk.write(&bytes); + let one_chunk = one_chunk.finish(); + + assert_eq!(byte_by_byte, one_chunk); + } + #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = XxHash64::with_seed(0); @@ -301,9 +351,32 @@ mod test { } #[test] - fn hash_of_exactly_32_bytes() { + fn hash_of_multiple_bytes_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); + } + + #[test] + fn hash_of_multiple_chunks_matches_c_implementation() { + let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = XxHash64::with_seed(0); - hasher.write(&[0; 32]); - assert_eq!(hasher.finish(), 0xf6e9_be5d_7063_2cf5); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); + } + + #[test] + fn hash_with_different_seed_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); + } + + #[test] + fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { + let bytes: [u8; 100] = array::from_fn(|i| i as u8); + let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); } } From 537f5f81d296ca70a6a2ded7d6da10524ad161fd Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 15:06:07 -0400 Subject: [PATCH 017/166] Simplify oneshot --- compare/src/lib.rs | 23 +++++++++++ src/lib.rs | 98 ++++++++++++++++++++++++++++------------------ 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index be6a7c411..3f5654e91 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -1,8 +1,19 @@ #![cfg(test)] use proptest::{num, prelude::*, test_runner::TestCaseResult}; +use std::hash::Hasher; proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } + + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } + #[test] fn oneshot(seed: u64, data: Vec) { oneshot_impl(seed, &data)?; @@ -24,6 +35,18 @@ proptest! { } } +fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let oneshot = xx_renu::XxHash64::oneshot(seed, data); + let one_chunk = { + let mut hasher = xx_renu::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(oneshot, one_chunk); + Ok(()) +} + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { let native = xx_hash_sys::Stream::oneshot(seed, data); let rust = xx_renu::XxHash64::oneshot(seed, data); diff --git a/src/lib.rs b/src/lib.rs index c21b48e57..2fbda2761 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ #[cfg(test)] extern crate std; -use core::{fmt, mem, hash::Hasher}; +use core::{fmt, hash::Hasher, mem}; // Keeping these constants in this form to match the C code. const PRIME64_1: u64 = 0x9E3779B185EBCA87; @@ -132,6 +132,17 @@ impl Accumulators { *acc4 = round(*acc4, lane4); } + fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { + while let Some((chunk, rest)) = data.split_first_chunk::<32>() { + // SAFETY: We have the right number of bytes and are + // handling the unaligned case. + let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; + self.write(lanes); + data = rest; + } + data + } + const fn finish(&self) -> u64 { let [acc1, acc2, acc3, acc4] = self.0; @@ -181,12 +192,22 @@ pub struct XxHash64 { length: u64, } +impl Default for XxHash64 { + fn default() -> Self { + Self::with_seed(0) + } +} + impl XxHash64 { #[must_use] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { - let mut this = Self::with_seed(seed); - this.write(data); - this.finish() + let len = data.len(); + + let mut accumulators = Accumulators::new(seed); + + let data = accumulators.write_many(data); + + Self::finish_with(seed, len.into_u64(), &accumulators, data) } #[must_use] @@ -200,49 +221,19 @@ impl XxHash64 { length: 0, } } -} -impl Hasher for XxHash64 { - fn write(&mut self, data: &[u8]) { - let len = data.len(); - - // Step 2. Process stripes - let (buffered_lanes, data) = self.buffer.extend(data); - - if let Some(&lanes) = buffered_lanes { - self.accumulators.write(lanes); - } - - let mut data = data; - while let Some((chunk, rest)) = data.split_first_chunk::<32>() { - // SAFETY: We have the right number of bytes and are - // handling the unaligned case. - let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; - self.accumulators.write(lanes); - data = rest; - } - let data = data; - - self.buffer.set(data); - - self.length += len.into_u64(); - } - - #[must_use] - fn finish(&self) -> u64 { + fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { // Step 3. Accumulator convergence - let mut acc = if self.length < 32 { - self.seed.wrapping_add(PRIME64_5) + let mut acc = if len < 32 { + seed.wrapping_add(PRIME64_5) } else { - self.accumulators.finish() + accumulators.finish() }; // Step 4. Add input length - acc += self.length; + acc += len; // Step 5. Consume remaining input - let mut remaining = self.buffer.remaining(); - while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() { let lane = u64::from_ne_bytes(*chunk); // todo: little-endian @@ -284,6 +275,35 @@ impl Hasher for XxHash64 { } } +impl Hasher for XxHash64 { + fn write(&mut self, data: &[u8]) { + let len = data.len(); + + // Step 2. Process stripes + let (buffered_lanes, data) = self.buffer.extend(data); + + if let Some(&lanes) = buffered_lanes { + self.accumulators.write(lanes); + } + + let data = self.accumulators.write_many(data); + + self.buffer.set(data); + + self.length += len.into_u64(); + } + + #[must_use] + fn finish(&self) -> u64 { + Self::finish_with( + self.seed, + self.length, + &self.accumulators, + self.buffer.remaining(), + ) + } +} + const fn round(mut acc: u64, lane: u64) -> u64 { acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); acc = acc.rotate_left(31); From 8012bef79f47fed1bb748d9b0b9036f44c2721ea Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 16:05:17 -0400 Subject: [PATCH 018/166] inline it --- compare/benches/benchmark.rs | 76 ++++++++++++++++++++++++++++++------ src/lib.rs | 9 ++++- 2 files changed, 73 insertions(+), 12 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index ae0ea4964..6cb801573 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -6,8 +6,63 @@ use twox_hash::XxHash64 as Old; use xx_hash_sys::Stream; use xx_renu::XxHash64; +const TINY_DATA_SIZE: usize = 32; +const BIG_DATA_SIZE: usize = 100 * 1024 * 1024; + +fn tiny_data(c: &mut Criterion) { + let (seed, data) = gen_data(TINY_DATA_SIZE); + let mut g = c.benchmark_group("tiny_data"); + + for size in 0..=data.len() { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("xxHash/oneshot/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = Stream::oneshot(seed, data); + black_box(hash); + }) + }); + + let id = format!("xxHash/streaming/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = { + let mut hasher = Stream::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + black_box(hash); + }) + }); + + let id = format!("renu/oneshot/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = XxHash64::oneshot(seed, data); + black_box(hash); + }) + }); + + let id = format!("renu/streaming/{size}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = { + let mut hasher = XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + black_box(hash); + }) + }); + } + + g.finish(); +} + fn oneshot(c: &mut Criterion) { - let (seed, data) = gen_data(); + let (seed, data) = gen_data(BIG_DATA_SIZE); let mut g = c.benchmark_group("oneshot"); for size in half_sizes(&data).take(10) { @@ -17,7 +72,7 @@ fn oneshot(c: &mut Criterion) { let id = format!("xxHash/{size}"); g.bench_function(id, |b| { b.iter(|| { - let hash = Stream::oneshot(seed, &data); + let hash = Stream::oneshot(seed, data); black_box(hash); }) }); @@ -25,7 +80,7 @@ fn oneshot(c: &mut Criterion) { let id = format!("renu/{size}"); g.bench_function(id, |b| { b.iter(|| { - let hash = XxHash64::oneshot(seed, &data); + let hash = XxHash64::oneshot(seed, data); black_box(hash); }) }); @@ -35,7 +90,7 @@ fn oneshot(c: &mut Criterion) { } fn streaming_one_chunk(c: &mut Criterion) { - let (seed, data) = gen_data(); + let (seed, data) = gen_data(BIG_DATA_SIZE); let mut g = c.benchmark_group("streaming_one_chunk"); for size in half_sizes(&data).take(10) { @@ -46,7 +101,7 @@ fn streaming_one_chunk(c: &mut Criterion) { g.bench_function(id, |b| { b.iter(|| { let mut hasher = Stream::with_seed(seed); - hasher.write(&data); + hasher.write(data); let hash = hasher.finish(); black_box(hash); }) @@ -56,7 +111,7 @@ fn streaming_one_chunk(c: &mut Criterion) { g.bench_function(id, |b| { b.iter(|| { let mut hasher = XxHash64::with_seed(seed); - hasher.write(&data); + hasher.write(data); let hash = hasher.finish(); black_box(hash); }) @@ -66,7 +121,7 @@ fn streaming_one_chunk(c: &mut Criterion) { g.bench_function(id, |b| { b.iter(|| { let mut hasher = Old::with_seed(seed); - hasher.write(&data); + hasher.write(data); let hash = hasher.finish(); black_box(hash); }) @@ -77,14 +132,13 @@ fn streaming_one_chunk(c: &mut Criterion) { } const SEED: u64 = 0xc651_4843_1995_363f; -const DATA_SIZE: usize = 100 * 1024 * 1024; -fn gen_data() -> (u64, Vec) { +fn gen_data(length: usize) -> (u64, Vec) { let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); let seed = rng.gen(); - let mut data = vec![0; DATA_SIZE]; + let mut data = vec![0; length]; rng.fill_bytes(&mut data); (seed, data) @@ -103,5 +157,5 @@ fn half_sizes(data: &[u8]) -> impl Iterator { ) } -criterion_group!(benches, oneshot, streaming_one_chunk); +criterion_group!(benches, tiny_data, oneshot, streaming_one_chunk); criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index 2fbda2761..9386c68e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -199,10 +199,16 @@ impl Default for XxHash64 { } impl XxHash64 { + /// Hash all data at once. If you can use this function, you may + /// see noticable speed gains for certain types of input. #[must_use] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { let len = data.len(); + // Notably, since we know that there's no more data coming, we + // don't need to construct the intermediate buffers or copy + // data to / from them. + let mut accumulators = Accumulators::new(seed); let data = accumulators.write_many(data); @@ -213,7 +219,6 @@ impl XxHash64 { #[must_use] pub const fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators - Self { seed, accumulators: Accumulators::new(seed), @@ -222,6 +227,8 @@ impl XxHash64 { } } + #[must_use] + #[inline(always)] fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { // Step 3. Accumulator convergence let mut acc = if len < 32 { From 8233b3669bf6c49ada4697d16ec5901e94f3ce37 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 16:07:12 -0400 Subject: [PATCH 019/166] add little endian --- src/lib.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9386c68e0..c89c430bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,12 +124,11 @@ impl Accumulators { fn write(&mut self, lanes: [u64; 4]) { let [acc1, acc2, acc3, acc4] = &mut self.0; let [lane1, lane2, lane3, lane4] = lanes; - // todo: little-endian transform - *acc1 = round(*acc1, lane1); - *acc2 = round(*acc2, lane2); - *acc3 = round(*acc3, lane3); - *acc4 = round(*acc4, lane4); + *acc1 = round(*acc1, lane1.to_le()); + *acc2 = round(*acc2, lane2.to_le()); + *acc3 = round(*acc3, lane3.to_le()); + *acc4 = round(*acc4, lane4.to_le()); } fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { @@ -242,8 +241,7 @@ impl XxHash64 { // Step 5. Consume remaining input while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() { - let lane = u64::from_ne_bytes(*chunk); - // todo: little-endian + let lane = u64::from_ne_bytes(*chunk).to_le(); acc ^= round(0, lane); acc = acc.rotate_left(27).wrapping_mul(PRIME64_1); @@ -252,8 +250,7 @@ impl XxHash64 { } while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() { - let lane = u32::from_ne_bytes(*chunk).into_u64(); - // todo: little-endian + let lane = u32::from_ne_bytes(*chunk).to_le().into_u64(); acc ^= lane.wrapping_mul(PRIME64_1); acc = acc.rotate_left(23).wrapping_mul(PRIME64_2); From 542a9cf06f734eb42ac13dbc7c0c1ae75f9aff2d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 16:11:00 -0400 Subject: [PATCH 020/166] std and serialize impls --- Cargo.toml | 14 ++++ src/lib.rs | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 195 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 09a2d4f94..818b3be61 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,17 @@ members = [ "renu-sum", "xx_hash-sys", ] + +[features] +default = ["std"] + +std = ["dep:rand"] + +serialize = ["dep:serde"] + +[dependencies] +rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } +serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } + +[dev-dependencies] +serde_json = "1.0.117" diff --git a/src/lib.rs b/src/lib.rs index c89c430bf..8a9ee551d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ const PRIME64_3: u64 = 0x165667B19E3779F9; const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; +#[derive(PartialEq)] #[repr(align(32))] struct AlignedData([u8; Self::LEN]); @@ -39,7 +40,7 @@ impl fmt::Debug for AlignedData { } } -#[derive(Debug)] +#[derive(Debug, PartialEq)] struct Buffer { offset: usize, data: AlignedData, @@ -109,6 +110,7 @@ impl Buffer { } } +#[derive(PartialEq)] struct Accumulators([u64; 4]); impl Accumulators { @@ -183,7 +185,7 @@ impl fmt::Debug for Accumulators { } } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct XxHash64 { seed: u64, accumulators: Accumulators, @@ -404,3 +406,180 @@ mod test { assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); } } + +#[cfg(feature = "std")] +mod std_impl { + use core::hash::BuildHasher; + + use super::*; + + pub struct RandomXxHashBuilder64(u64); + + impl Default for RandomXxHashBuilder64 { + fn default() -> Self { + Self::new() + } + } + + impl RandomXxHashBuilder64 { + fn new() -> Self { + Self(rand::random()) + } + } + + impl BuildHasher for RandomXxHashBuilder64 { + type Hasher = XxHash64; + + fn build_hasher(&self) -> Self::Hasher { + XxHash64::with_seed(self.0) + } + } + + #[cfg(test)] + mod test { + use core::hash::BuildHasherDefault; + use std::collections::HashMap; + + use super::*; + + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_random_seed() { + let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + } +} + +#[cfg(feature = "serialize")] +mod serialize_impl { + use serde::{Deserialize, Serialize}; + + use super::*; + + impl<'de> Deserialize<'de> for XxHash64 { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let shim = Deserialize::deserialize(deserializer)?; + + let Shim { + total_len, + seed, + core, + buffer, + buffer_usage, + } = shim; + let Core { v1, v2, v3, v4 } = core; + + Ok(XxHash64 { + seed, + accumulators: Accumulators([v1, v2, v3, v4]), + buffer: Buffer { + offset: buffer_usage, + data: AlignedData(buffer), + }, + length: total_len, + }) + } + } + + impl Serialize for XxHash64 { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let XxHash64 { + seed, + ref accumulators, + ref buffer, + length, + } = *self; + let [v1, v2, v3, v4] = accumulators.0; + let Buffer { offset, ref data } = *buffer; + + let shim = Shim { + total_len: length, + seed, + core: Core { v1, v2, v3, v4 }, + buffer: data.0, + buffer_usage: offset, + }; + + shim.serialize(serializer) + } + } + + #[derive(Serialize, Deserialize)] + struct Shim { + total_len: u64, + seed: u64, + core: Core, + buffer: [u8; 32], + buffer_usage: usize, + } + + #[derive(Serialize, Deserialize)] + struct Core { + v1: u64, + v2: u64, + v3: u64, + v4: u64, + } + + #[cfg(test)] + mod test { + use super::*; + + type Result = core::result::Result; + + #[test] + fn test_serialization_cycle() -> Result { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = serde_json::to_string(&hasher)?; + let unserialized: XxHash64 = serde_json::from_str(&serialized)?; + assert_eq!(hasher, unserialized); + Ok(()) + } + + #[test] + fn test_serialization_stability() -> Result { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = r#"{ + "total_len": 14, + "seed": 0, + "core": { + "v1": 6983438078262162902, + "v2": 14029467366897019727, + "v3": 0, + "v4": 7046029288634856825 + }, + "buffer": [ + 72, 101, 108, 108, 111, 44, 32, 119, + 111, 114, 108, 100, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ], + "buffer_usage": 14 + }"#; + + let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); + assert_eq!(hasher, unserialized); + Ok(()) + } + } +} From 678f579feac3cbea8c0d0bd726b7ec9d1499679a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 16:49:08 -0400 Subject: [PATCH 021/166] moar --- src/lib.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8a9ee551d..a0b6cd48f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; #[derive(PartialEq)] -#[repr(align(32))] +#[repr(align(8))] struct AlignedData([u8; Self::LEN]); impl AlignedData { @@ -203,12 +203,13 @@ impl XxHash64 { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] + #[inline] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { let len = data.len(); - // Notably, since we know that there's no more data coming, we - // don't need to construct the intermediate buffers or copy - // data to / from them. + // Since we know that there's no more data coming, we don't + // need to construct the intermediate buffers or copy data to + // or from the buffers. let mut accumulators = Accumulators::new(seed); @@ -458,6 +459,9 @@ mod std_impl { } } +#[cfg(feature = "std")] +pub use std_impl::RandomXxHashBuilder64; + #[cfg(feature = "serialize")] mod serialize_impl { use serde::{Deserialize, Serialize}; From 006bd68c0ff5c4ef2543c4eac748a7cc7ed7f529 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 21:11:55 -0400 Subject: [PATCH 022/166] tweaks --- compare/src/lib.rs | 26 +++++++++++++++++++++++ src/lib.rs | 53 ++++++++++++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 3f5654e91..c717eff7b 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -14,6 +14,11 @@ proptest! { oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; } + #[test] + fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } + #[test] fn oneshot(seed: u64, data: Vec) { oneshot_impl(seed, &data)?; @@ -47,6 +52,20 @@ fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { Ok(()) } +fn oneshot_same_as_many_chunks_impl(seed: u64, data: &[u8], chunks: &[Vec]) -> TestCaseResult { + let oneshot = xx_renu::XxHash64::oneshot(seed, data); + let many_chunks = { + let mut hasher = xx_renu::XxHash64::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish() + }; + + prop_assert_eq!(oneshot, many_chunks); + Ok(()) +} + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { let native = xx_hash_sys::Stream::oneshot(seed, data); let rust = xx_renu::XxHash64::oneshot(seed, data); @@ -78,3 +97,10 @@ fn vec_and_index() -> impl Strategy, usize)> { (Just(vec), 0..len) }) } + +fn data_and_chunks() -> impl Strategy, Vec>)> { + prop::collection::vec(prop::collection::vec(num::u8::ANY, 90..=100), 90..=100).prop_map(|vs| { + let data = vs.iter().flatten().copied().collect(); + (data, vs) + }) +} diff --git a/src/lib.rs b/src/lib.rs index a0b6cd48f..d0eb2ec73 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,27 +14,27 @@ const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; #[derive(PartialEq)] -#[repr(align(8))] -struct AlignedData([u8; Self::LEN]); - -impl AlignedData { - const LEN: usize = 32; +struct BufferData([u64; 4]); +impl BufferData { const fn new() -> Self { - Self([0; Self::LEN]) + Self([0; 4]) } - const fn len(&self) -> usize { - Self::LEN + fn bytes(&self) -> &[u8; 32] { + const { assert!(mem::align_of::() <= mem::align_of::()) } + // SAFETY[bytes]: The alignment of `u64` is at least that of + // `u8` and all the values are initialized. + unsafe { &*self.0.as_ptr().cast() } } - const fn as_u64s(&self) -> &[u64; 4] { - // SAFETY: We are guaranteed to be aligned - unsafe { mem::transmute(&self.0) } + fn bytes_mut(&mut self) -> &mut [u8; 32] { + // SAFETY: See SAFETY[bytes] + unsafe { &mut *self.0.as_mut_ptr().cast() } } } -impl fmt::Debug for AlignedData { +impl fmt::Debug for BufferData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.0.iter()).finish() } @@ -43,14 +43,14 @@ impl fmt::Debug for AlignedData { #[derive(Debug, PartialEq)] struct Buffer { offset: usize, - data: AlignedData, + data: BufferData, } impl Buffer { const fn new() -> Self { Self { offset: 0, - data: AlignedData::new(), + data: BufferData::new(), } } @@ -64,13 +64,14 @@ impl Buffer { // have much benefit other than reducing code size by a tiny // bit. - debug_assert!(self.offset <= self.data.len()); - if self.offset == 0 { return (None, data); }; - let empty = &mut self.data.0[self.offset..]; + let bytes = self.data.bytes_mut(); + debug_assert!(self.offset <= bytes.len()); + + let empty = &mut bytes[self.offset..]; let n_to_copy = usize::min(empty.len(), data.len()); let dst = &mut empty[..n_to_copy]; @@ -80,11 +81,11 @@ impl Buffer { dst.copy_from_slice(src); self.offset += n_to_copy; - debug_assert!(self.offset <= self.data.len()); + debug_assert!(self.offset <= bytes.len()); - if self.offset == self.data.len() { + if self.offset == bytes.len() { self.offset = 0; - (Some(self.data.as_u64s()), rest) + (Some(&self.data.0), rest) } else { (None, rest) } @@ -99,14 +100,15 @@ impl Buffer { let n_to_copy = data.len(); - debug_assert!(n_to_copy < self.data.len()); + let bytes = self.data.bytes_mut(); + debug_assert!(n_to_copy < bytes.len()); - self.data.0[..n_to_copy].copy_from_slice(data); + bytes[..n_to_copy].copy_from_slice(data); self.offset = data.len(); } fn remaining(&self) -> &[u8] { - &self.data.0[..self.offset] + &self.data.bytes()[..self.offset] } } @@ -203,6 +205,10 @@ impl XxHash64 { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] + // RATIONALE[inline]: In one case [1], this `inline` helps unlock a + // speedup from ~900µs to ~200µs. + // + // [1]: https://github.com/apache/datafusion-comet/pull/575 #[inline] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { let len = data.len(); @@ -230,6 +236,7 @@ impl XxHash64 { } #[must_use] + // RATIONALE: See RATIONALE[inline] #[inline(always)] fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { // Step 3. Accumulator convergence From efd13bd29d1dd0e42fc05a0b522fcb2ba24b6358 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 25 Jun 2024 21:13:08 -0400 Subject: [PATCH 023/166] simpelr --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index d0eb2ec73..db413b9bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,7 @@ impl BufferData { } fn bytes(&self) -> &[u8; 32] { - const { assert!(mem::align_of::() <= mem::align_of::()) } + const _: () = assert!(mem::align_of::() <= mem::align_of::()); // SAFETY[bytes]: The alignment of `u64` is at least that of // `u8` and all the values are initialized. unsafe { &*self.0.as_ptr().cast() } From 3f722df12e64d06521a147b0f488ba9a52b5b88e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 26 Jun 2024 14:25:08 -0400 Subject: [PATCH 024/166] to-test --- README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..1eca71ca2 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +cargo test # unit tests +cargo test -p comparison # proptests +cargo miri test # unsafe +cargo miri test --target s390x-unknown-linux-gnu # big-endian From ae7b3884443c584dc0a84806f773fea09c0c004e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 26 Jun 2024 14:30:52 -0400 Subject: [PATCH 025/166] moar --- README.md | 6 ++++++ compare/src/lib.rs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1eca71ca2..f2f88dd86 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,9 @@ cargo test # unit tests cargo test -p comparison # proptests cargo miri test # unsafe cargo miri test --target s390x-unknown-linux-gnu # big-endian + +minimal versions +no-features +all-features + +features for 32 / 64 / xx3 diff --git a/compare/src/lib.rs b/compare/src/lib.rs index c717eff7b..6e1a4e8be 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -99,7 +99,7 @@ fn vec_and_index() -> impl Strategy, usize)> { } fn data_and_chunks() -> impl Strategy, Vec>)> { - prop::collection::vec(prop::collection::vec(num::u8::ANY, 90..=100), 90..=100).prop_map(|vs| { + prop::collection::vec(prop::collection::vec(num::u8::ANY, 0..100), 0..100).prop_map(|vs| { let data = vs.iter().flatten().copied().collect(); (data, vs) }) From 5d455ff31a0ff398923d7cc40c0cf317ef3c6db9 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 26 Jun 2024 18:38:39 -0400 Subject: [PATCH 026/166] move to new file --- Cargo.toml | 4 +- src/lib.rs | 593 +---------------------------------------------- src/xxhash64.rs | 594 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 601 insertions(+), 590 deletions(-) create mode 100644 src/xxhash64.rs diff --git a/Cargo.toml b/Cargo.toml index 818b3be61..4e91fc7cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,12 +11,14 @@ members = [ ] [features] -default = ["std"] +default = ["std", "xxhash64"] std = ["dep:rand"] serialize = ["dep:serde"] +xxhash64 = [] + [dependencies] rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } diff --git a/src/lib.rs b/src/lib.rs index db413b9bd..fde3a8e96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,593 +4,8 @@ #[cfg(test)] extern crate std; -use core::{fmt, hash::Hasher, mem}; +#[cfg(feature = "xxhash64")] +mod xxhash64; -// Keeping these constants in this form to match the C code. -const PRIME64_1: u64 = 0x9E3779B185EBCA87; -const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; -const PRIME64_3: u64 = 0x165667B19E3779F9; -const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; -const PRIME64_5: u64 = 0x27D4EB2F165667C5; - -#[derive(PartialEq)] -struct BufferData([u64; 4]); - -impl BufferData { - const fn new() -> Self { - Self([0; 4]) - } - - fn bytes(&self) -> &[u8; 32] { - const _: () = assert!(mem::align_of::() <= mem::align_of::()); - // SAFETY[bytes]: The alignment of `u64` is at least that of - // `u8` and all the values are initialized. - unsafe { &*self.0.as_ptr().cast() } - } - - fn bytes_mut(&mut self) -> &mut [u8; 32] { - // SAFETY: See SAFETY[bytes] - unsafe { &mut *self.0.as_mut_ptr().cast() } - } -} - -impl fmt::Debug for BufferData { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.0.iter()).finish() - } -} - -#[derive(Debug, PartialEq)] -struct Buffer { - offset: usize, - data: BufferData, -} - -impl Buffer { - const fn new() -> Self { - Self { - offset: 0, - data: BufferData::new(), - } - } - - fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) { - // Most of the slice methods we use here have `_unchecked` variants, but - // - // 1. this method is called one time per `XxHash64::write` call - // 2. this method early exits if we don't have anything in the buffer - // - // Because of this, removing the panics via `unsafe` doesn't - // have much benefit other than reducing code size by a tiny - // bit. - - if self.offset == 0 { - return (None, data); - }; - - let bytes = self.data.bytes_mut(); - debug_assert!(self.offset <= bytes.len()); - - let empty = &mut bytes[self.offset..]; - let n_to_copy = usize::min(empty.len(), data.len()); - - let dst = &mut empty[..n_to_copy]; - - let (src, rest) = data.split_at(n_to_copy); - - dst.copy_from_slice(src); - self.offset += n_to_copy; - - debug_assert!(self.offset <= bytes.len()); - - if self.offset == bytes.len() { - self.offset = 0; - (Some(&self.data.0), rest) - } else { - (None, rest) - } - } - - fn set(&mut self, data: &[u8]) { - if data.is_empty() { - return; - } - - debug_assert_eq!(self.offset, 0); - - let n_to_copy = data.len(); - - let bytes = self.data.bytes_mut(); - debug_assert!(n_to_copy < bytes.len()); - - bytes[..n_to_copy].copy_from_slice(data); - self.offset = data.len(); - } - - fn remaining(&self) -> &[u8] { - &self.data.bytes()[..self.offset] - } -} - -#[derive(PartialEq)] -struct Accumulators([u64; 4]); - -impl Accumulators { - const fn new(seed: u64) -> Self { - Self([ - seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2), - seed.wrapping_add(PRIME64_2), - seed, - seed.wrapping_sub(PRIME64_1), - ]) - } - - fn write(&mut self, lanes: [u64; 4]) { - let [acc1, acc2, acc3, acc4] = &mut self.0; - let [lane1, lane2, lane3, lane4] = lanes; - - *acc1 = round(*acc1, lane1.to_le()); - *acc2 = round(*acc2, lane2.to_le()); - *acc3 = round(*acc3, lane3.to_le()); - *acc4 = round(*acc4, lane4.to_le()); - } - - fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { - while let Some((chunk, rest)) = data.split_first_chunk::<32>() { - // SAFETY: We have the right number of bytes and are - // handling the unaligned case. - let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; - self.write(lanes); - data = rest; - } - data - } - - const fn finish(&self) -> u64 { - let [acc1, acc2, acc3, acc4] = self.0; - - let mut acc = { - let acc1 = acc1.rotate_left(1); - let acc2 = acc2.rotate_left(7); - let acc3 = acc3.rotate_left(12); - let acc4 = acc4.rotate_left(18); - - acc1.wrapping_add(acc2) - .wrapping_add(acc3) - .wrapping_add(acc4) - }; - - acc = Self::merge_accumulator(acc, acc1); - acc = Self::merge_accumulator(acc, acc2); - acc = Self::merge_accumulator(acc, acc3); - acc = Self::merge_accumulator(acc, acc4); - - acc - } - - const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { - acc ^= round(0, acc_n); - acc = acc.wrapping_mul(PRIME64_1); - acc.wrapping_add(PRIME64_4) - } -} - -impl fmt::Debug for Accumulators { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let [acc1, acc2, acc3, acc4] = self.0; - f.debug_struct("Accumulators") - .field("acc1", &acc1) - .field("acc2", &acc2) - .field("acc3", &acc3) - .field("acc4", &acc4) - .finish() - } -} - -#[derive(Debug, PartialEq)] -pub struct XxHash64 { - seed: u64, - accumulators: Accumulators, - buffer: Buffer, - length: u64, -} - -impl Default for XxHash64 { - fn default() -> Self { - Self::with_seed(0) - } -} - -impl XxHash64 { - /// Hash all data at once. If you can use this function, you may - /// see noticable speed gains for certain types of input. - #[must_use] - // RATIONALE[inline]: In one case [1], this `inline` helps unlock a - // speedup from ~900µs to ~200µs. - // - // [1]: https://github.com/apache/datafusion-comet/pull/575 - #[inline] - pub fn oneshot(seed: u64, data: &[u8]) -> u64 { - let len = data.len(); - - // Since we know that there's no more data coming, we don't - // need to construct the intermediate buffers or copy data to - // or from the buffers. - - let mut accumulators = Accumulators::new(seed); - - let data = accumulators.write_many(data); - - Self::finish_with(seed, len.into_u64(), &accumulators, data) - } - - #[must_use] - pub const fn with_seed(seed: u64) -> Self { - // Step 1. Initialize internal accumulators - Self { - seed, - accumulators: Accumulators::new(seed), - buffer: Buffer::new(), - length: 0, - } - } - - #[must_use] - // RATIONALE: See RATIONALE[inline] - #[inline(always)] - fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { - // Step 3. Accumulator convergence - let mut acc = if len < 32 { - seed.wrapping_add(PRIME64_5) - } else { - accumulators.finish() - }; - - // Step 4. Add input length - acc += len; - - // Step 5. Consume remaining input - while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() { - let lane = u64::from_ne_bytes(*chunk).to_le(); - - acc ^= round(0, lane); - acc = acc.rotate_left(27).wrapping_mul(PRIME64_1); - acc = acc.wrapping_add(PRIME64_4); - remaining = rest; - } - - while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() { - let lane = u32::from_ne_bytes(*chunk).to_le().into_u64(); - - acc ^= lane.wrapping_mul(PRIME64_1); - acc = acc.rotate_left(23).wrapping_mul(PRIME64_2); - acc = acc.wrapping_add(PRIME64_3); - - remaining = rest; - } - - while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() { - let lane = chunk[0].into_u64(); - - acc ^= lane.wrapping_mul(PRIME64_5); - acc = acc.rotate_left(11).wrapping_mul(PRIME64_1); - - remaining = rest; - } - - // Step 6. Final mix (avalanche) - acc ^= acc >> 33; - acc = acc.wrapping_mul(PRIME64_2); - acc ^= acc >> 29; - acc = acc.wrapping_mul(PRIME64_3); - acc ^= acc >> 32; - - acc - } -} - -impl Hasher for XxHash64 { - fn write(&mut self, data: &[u8]) { - let len = data.len(); - - // Step 2. Process stripes - let (buffered_lanes, data) = self.buffer.extend(data); - - if let Some(&lanes) = buffered_lanes { - self.accumulators.write(lanes); - } - - let data = self.accumulators.write_many(data); - - self.buffer.set(data); - - self.length += len.into_u64(); - } - - #[must_use] - fn finish(&self) -> u64 { - Self::finish_with( - self.seed, - self.length, - &self.accumulators, - self.buffer.remaining(), - ) - } -} - -const fn round(mut acc: u64, lane: u64) -> u64 { - acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); - acc = acc.rotate_left(31); - acc.wrapping_mul(PRIME64_1) -} - -trait IntoU64 { - fn into_u64(self) -> u64; -} - -impl IntoU64 for u8 { - fn into_u64(self) -> u64 { - self.into() - } -} - -impl IntoU64 for u32 { - fn into_u64(self) -> u64 { - self.into() - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -impl IntoU64 for usize { - fn into_u64(self) -> u64 { - self as u64 - } -} - -#[cfg(test)] -mod test { - use core::array; - - use super::*; - - #[test] - fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { - let bytes = [0x9c; 32]; - - let mut byte_by_byte = XxHash64::with_seed(0); - for byte in bytes.chunks(1) { - byte_by_byte.write(byte); - } - let byte_by_byte = byte_by_byte.finish(); - - let mut one_chunk = XxHash64::with_seed(0); - one_chunk.write(&bytes); - let one_chunk = one_chunk.finish(); - - assert_eq!(byte_by_byte, one_chunk); - } - - #[test] - fn hash_of_nothing_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); - hasher.write(&[]); - assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); - } - - #[test] - fn hash_of_single_byte_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); - hasher.write(&[42]); - assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); - } - - #[test] - fn hash_of_multiple_bytes_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); - hasher.write(b"Hello, world!\0"); - assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); - } - - #[test] - fn hash_of_multiple_chunks_matches_c_implementation() { - let bytes: [u8; 100] = array::from_fn(|i| i as u8); - let mut hasher = XxHash64::with_seed(0); - hasher.write(&bytes); - assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); - } - - #[test] - fn hash_with_different_seed_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.write(&[]); - assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); - } - - #[test] - fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { - let bytes: [u8; 100] = array::from_fn(|i| i as u8); - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.write(&bytes); - assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); - } -} - -#[cfg(feature = "std")] -mod std_impl { - use core::hash::BuildHasher; - - use super::*; - - pub struct RandomXxHashBuilder64(u64); - - impl Default for RandomXxHashBuilder64 { - fn default() -> Self { - Self::new() - } - } - - impl RandomXxHashBuilder64 { - fn new() -> Self { - Self(rand::random()) - } - } - - impl BuildHasher for RandomXxHashBuilder64 { - type Hasher = XxHash64; - - fn build_hasher(&self) -> Self::Hasher { - XxHash64::with_seed(self.0) - } - } - - #[cfg(test)] - mod test { - use core::hash::BuildHasherDefault; - use std::collections::HashMap; - - use super::*; - - #[test] - fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - - #[test] - fn can_be_used_in_a_hashmap_with_a_random_seed() { - let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - } -} - -#[cfg(feature = "std")] -pub use std_impl::RandomXxHashBuilder64; - -#[cfg(feature = "serialize")] -mod serialize_impl { - use serde::{Deserialize, Serialize}; - - use super::*; - - impl<'de> Deserialize<'de> for XxHash64 { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let shim = Deserialize::deserialize(deserializer)?; - - let Shim { - total_len, - seed, - core, - buffer, - buffer_usage, - } = shim; - let Core { v1, v2, v3, v4 } = core; - - Ok(XxHash64 { - seed, - accumulators: Accumulators([v1, v2, v3, v4]), - buffer: Buffer { - offset: buffer_usage, - data: AlignedData(buffer), - }, - length: total_len, - }) - } - } - - impl Serialize for XxHash64 { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let XxHash64 { - seed, - ref accumulators, - ref buffer, - length, - } = *self; - let [v1, v2, v3, v4] = accumulators.0; - let Buffer { offset, ref data } = *buffer; - - let shim = Shim { - total_len: length, - seed, - core: Core { v1, v2, v3, v4 }, - buffer: data.0, - buffer_usage: offset, - }; - - shim.serialize(serializer) - } - } - - #[derive(Serialize, Deserialize)] - struct Shim { - total_len: u64, - seed: u64, - core: Core, - buffer: [u8; 32], - buffer_usage: usize, - } - - #[derive(Serialize, Deserialize)] - struct Core { - v1: u64, - v2: u64, - v3: u64, - v4: u64, - } - - #[cfg(test)] - mod test { - use super::*; - - type Result = core::result::Result; - - #[test] - fn test_serialization_cycle() -> Result { - let mut hasher = XxHash64::with_seed(0); - hasher.write(b"Hello, world!\0"); - hasher.finish(); - - let serialized = serde_json::to_string(&hasher)?; - let unserialized: XxHash64 = serde_json::from_str(&serialized)?; - assert_eq!(hasher, unserialized); - Ok(()) - } - - #[test] - fn test_serialization_stability() -> Result { - let mut hasher = XxHash64::with_seed(0); - hasher.write(b"Hello, world!\0"); - hasher.finish(); - - let serialized = r#"{ - "total_len": 14, - "seed": 0, - "core": { - "v1": 6983438078262162902, - "v2": 14029467366897019727, - "v3": 0, - "v4": 7046029288634856825 - }, - "buffer": [ - 72, 101, 108, 108, 111, 44, 32, 119, - 111, 114, 108, 100, 33, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - ], - "buffer_usage": 14 - }"#; - - let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); - assert_eq!(hasher, unserialized); - Ok(()) - } - } -} +#[cfg(feature = "xxhash64")] +pub use xxhash64::*; diff --git a/src/xxhash64.rs b/src/xxhash64.rs new file mode 100644 index 000000000..fbeaefc2e --- /dev/null +++ b/src/xxhash64.rs @@ -0,0 +1,594 @@ +use core::{fmt, hash::Hasher, mem}; + +// Keeping these constants in this form to match the C code. +const PRIME64_1: u64 = 0x9E3779B185EBCA87; +const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; +const PRIME64_3: u64 = 0x165667B19E3779F9; +const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; +const PRIME64_5: u64 = 0x27D4EB2F165667C5; + +#[derive(PartialEq)] +struct BufferData([u64; 4]); + +impl BufferData { + const fn new() -> Self { + Self([0; 4]) + } + + fn bytes(&self) -> &[u8; 32] { + const _: () = assert!(mem::align_of::() <= mem::align_of::()); + // SAFETY[bytes]: The alignment of `u64` is at least that of + // `u8` and all the values are initialized. + unsafe { &*self.0.as_ptr().cast() } + } + + fn bytes_mut(&mut self) -> &mut [u8; 32] { + // SAFETY: See SAFETY[bytes] + unsafe { &mut *self.0.as_mut_ptr().cast() } + } +} + +impl fmt::Debug for BufferData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.0.iter()).finish() + } +} + +#[derive(Debug, PartialEq)] +struct Buffer { + offset: usize, + data: BufferData, +} + +impl Buffer { + const fn new() -> Self { + Self { + offset: 0, + data: BufferData::new(), + } + } + + fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) { + // Most of the slice methods we use here have `_unchecked` variants, but + // + // 1. this method is called one time per `XxHash64::write` call + // 2. this method early exits if we don't have anything in the buffer + // + // Because of this, removing the panics via `unsafe` doesn't + // have much benefit other than reducing code size by a tiny + // bit. + + if self.offset == 0 { + return (None, data); + }; + + let bytes = self.data.bytes_mut(); + debug_assert!(self.offset <= bytes.len()); + + let empty = &mut bytes[self.offset..]; + let n_to_copy = usize::min(empty.len(), data.len()); + + let dst = &mut empty[..n_to_copy]; + + let (src, rest) = data.split_at(n_to_copy); + + dst.copy_from_slice(src); + self.offset += n_to_copy; + + debug_assert!(self.offset <= bytes.len()); + + if self.offset == bytes.len() { + self.offset = 0; + (Some(&self.data.0), rest) + } else { + (None, rest) + } + } + + fn set(&mut self, data: &[u8]) { + if data.is_empty() { + return; + } + + debug_assert_eq!(self.offset, 0); + + let n_to_copy = data.len(); + + let bytes = self.data.bytes_mut(); + debug_assert!(n_to_copy < bytes.len()); + + bytes[..n_to_copy].copy_from_slice(data); + self.offset = data.len(); + } + + fn remaining(&self) -> &[u8] { + &self.data.bytes()[..self.offset] + } +} + +#[derive(PartialEq)] +struct Accumulators([u64; 4]); + +impl Accumulators { + const fn new(seed: u64) -> Self { + Self([ + seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2), + seed.wrapping_add(PRIME64_2), + seed, + seed.wrapping_sub(PRIME64_1), + ]) + } + + fn write(&mut self, lanes: [u64; 4]) { + let [acc1, acc2, acc3, acc4] = &mut self.0; + let [lane1, lane2, lane3, lane4] = lanes; + + *acc1 = round(*acc1, lane1.to_le()); + *acc2 = round(*acc2, lane2.to_le()); + *acc3 = round(*acc3, lane3.to_le()); + *acc4 = round(*acc4, lane4.to_le()); + } + + fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { + while let Some((chunk, rest)) = data.split_first_chunk::<32>() { + // SAFETY: We have the right number of bytes and are + // handling the unaligned case. + let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; + self.write(lanes); + data = rest; + } + data + } + + const fn finish(&self) -> u64 { + let [acc1, acc2, acc3, acc4] = self.0; + + let mut acc = { + let acc1 = acc1.rotate_left(1); + let acc2 = acc2.rotate_left(7); + let acc3 = acc3.rotate_left(12); + let acc4 = acc4.rotate_left(18); + + acc1.wrapping_add(acc2) + .wrapping_add(acc3) + .wrapping_add(acc4) + }; + + acc = Self::merge_accumulator(acc, acc1); + acc = Self::merge_accumulator(acc, acc2); + acc = Self::merge_accumulator(acc, acc3); + acc = Self::merge_accumulator(acc, acc4); + + acc + } + + const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { + acc ^= round(0, acc_n); + acc = acc.wrapping_mul(PRIME64_1); + acc.wrapping_add(PRIME64_4) + } +} + +impl fmt::Debug for Accumulators { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let [acc1, acc2, acc3, acc4] = self.0; + f.debug_struct("Accumulators") + .field("acc1", &acc1) + .field("acc2", &acc2) + .field("acc3", &acc3) + .field("acc4", &acc4) + .finish() + } +} + +#[derive(Debug, PartialEq)] +pub struct XxHash64 { + seed: u64, + accumulators: Accumulators, + buffer: Buffer, + length: u64, +} + +impl Default for XxHash64 { + fn default() -> Self { + Self::with_seed(0) + } +} + +impl XxHash64 { + /// Hash all data at once. If you can use this function, you may + /// see noticable speed gains for certain types of input. + #[must_use] + // RATIONALE[inline]: In one case [1], this `inline` helps unlock a + // speedup from ~900µs to ~200µs. + // + // [1]: https://github.com/apache/datafusion-comet/pull/575 + #[inline] + pub fn oneshot(seed: u64, data: &[u8]) -> u64 { + let len = data.len(); + + // Since we know that there's no more data coming, we don't + // need to construct the intermediate buffers or copy data to + // or from the buffers. + + let mut accumulators = Accumulators::new(seed); + + let data = accumulators.write_many(data); + + Self::finish_with(seed, len.into_u64(), &accumulators, data) + } + + #[must_use] + pub const fn with_seed(seed: u64) -> Self { + // Step 1. Initialize internal accumulators + Self { + seed, + accumulators: Accumulators::new(seed), + buffer: Buffer::new(), + length: 0, + } + } + + #[must_use] + // RATIONALE: See RATIONALE[inline] + #[inline(always)] + fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { + // Step 3. Accumulator convergence + let mut acc = if len < 32 { + seed.wrapping_add(PRIME64_5) + } else { + accumulators.finish() + }; + + // Step 4. Add input length + acc += len; + + // Step 5. Consume remaining input + while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() { + let lane = u64::from_ne_bytes(*chunk).to_le(); + + acc ^= round(0, lane); + acc = acc.rotate_left(27).wrapping_mul(PRIME64_1); + acc = acc.wrapping_add(PRIME64_4); + remaining = rest; + } + + while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() { + let lane = u32::from_ne_bytes(*chunk).to_le().into_u64(); + + acc ^= lane.wrapping_mul(PRIME64_1); + acc = acc.rotate_left(23).wrapping_mul(PRIME64_2); + acc = acc.wrapping_add(PRIME64_3); + + remaining = rest; + } + + while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() { + let lane = chunk[0].into_u64(); + + acc ^= lane.wrapping_mul(PRIME64_5); + acc = acc.rotate_left(11).wrapping_mul(PRIME64_1); + + remaining = rest; + } + + // Step 6. Final mix (avalanche) + acc ^= acc >> 33; + acc = acc.wrapping_mul(PRIME64_2); + acc ^= acc >> 29; + acc = acc.wrapping_mul(PRIME64_3); + acc ^= acc >> 32; + + acc + } +} + +impl Hasher for XxHash64 { + fn write(&mut self, data: &[u8]) { + let len = data.len(); + + // Step 2. Process stripes + let (buffered_lanes, data) = self.buffer.extend(data); + + if let Some(&lanes) = buffered_lanes { + self.accumulators.write(lanes); + } + + let data = self.accumulators.write_many(data); + + self.buffer.set(data); + + self.length += len.into_u64(); + } + + #[must_use] + fn finish(&self) -> u64 { + Self::finish_with( + self.seed, + self.length, + &self.accumulators, + self.buffer.remaining(), + ) + } +} + +const fn round(mut acc: u64, lane: u64) -> u64 { + acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); + acc = acc.rotate_left(31); + acc.wrapping_mul(PRIME64_1) +} + +trait IntoU64 { + fn into_u64(self) -> u64; +} + +impl IntoU64 for u8 { + fn into_u64(self) -> u64 { + self.into() + } +} + +impl IntoU64 for u32 { + fn into_u64(self) -> u64 { + self.into() + } +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl IntoU64 for usize { + fn into_u64(self) -> u64 { + self as u64 + } +} + +#[cfg(test)] +mod test { + use core::array; + + use super::*; + + #[test] + fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { + let bytes = [0x9c; 32]; + + let mut byte_by_byte = XxHash64::with_seed(0); + for byte in bytes.chunks(1) { + byte_by_byte.write(byte); + } + let byte_by_byte = byte_by_byte.finish(); + + let mut one_chunk = XxHash64::with_seed(0); + one_chunk.write(&bytes); + let one_chunk = one_chunk.finish(); + + assert_eq!(byte_by_byte, one_chunk); + } + + #[test] + fn hash_of_nothing_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); + } + + #[test] + fn hash_of_single_byte_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[42]); + assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); + } + + #[test] + fn hash_of_multiple_bytes_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); + } + + #[test] + fn hash_of_multiple_chunks_matches_c_implementation() { + let bytes: [u8; 100] = array::from_fn(|i| i as u8); + let mut hasher = XxHash64::with_seed(0); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); + } + + #[test] + fn hash_with_different_seed_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); + } + + #[test] + fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { + let bytes: [u8; 100] = array::from_fn(|i| i as u8); + let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); + } +} + +#[cfg(feature = "std")] +mod std_impl { + use core::hash::BuildHasher; + + use super::*; + + pub struct RandomXxHashBuilder64(u64); + + impl Default for RandomXxHashBuilder64 { + fn default() -> Self { + Self::new() + } + } + + impl RandomXxHashBuilder64 { + fn new() -> Self { + Self(rand::random()) + } + } + + impl BuildHasher for RandomXxHashBuilder64 { + type Hasher = XxHash64; + + fn build_hasher(&self) -> Self::Hasher { + XxHash64::with_seed(self.0) + } + } + + #[cfg(test)] + mod test { + use core::hash::BuildHasherDefault; + use std::collections::HashMap; + + use super::*; + + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_random_seed() { + let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + } +} + +#[cfg(feature = "std")] +pub use std_impl::RandomXxHashBuilder64; + +#[cfg(feature = "serialize")] +mod serialize_impl { + use serde::{Deserialize, Serialize}; + + use super::*; + + impl<'de> Deserialize<'de> for XxHash64 { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let shim = Deserialize::deserialize(deserializer)?; + + let Shim { + total_len, + seed, + core, + buffer, + buffer_usage, + } = shim; + let Core { v1, v2, v3, v4 } = core; + + let mut buffer_data = BufferData::new(); + buffer_data.bytes_mut().copy_from_slice(&buffer); + + Ok(XxHash64 { + seed, + accumulators: Accumulators([v1, v2, v3, v4]), + buffer: Buffer { + offset: buffer_usage, + data: buffer_data, + }, + length: total_len, + }) + } + } + + impl Serialize for XxHash64 { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let XxHash64 { + seed, + ref accumulators, + ref buffer, + length, + } = *self; + let [v1, v2, v3, v4] = accumulators.0; + let Buffer { offset, ref data } = *buffer; + let buffer = *data.bytes(); + + let shim = Shim { + total_len: length, + seed, + core: Core { v1, v2, v3, v4 }, + buffer, + buffer_usage: offset, + }; + + shim.serialize(serializer) + } + } + + #[derive(Serialize, Deserialize)] + struct Shim { + total_len: u64, + seed: u64, + core: Core, + buffer: [u8; 32], + buffer_usage: usize, + } + + #[derive(Serialize, Deserialize)] + struct Core { + v1: u64, + v2: u64, + v3: u64, + v4: u64, + } + + #[cfg(test)] + mod test { + use super::*; + + type Result = core::result::Result; + + #[test] + fn test_serialization_cycle() -> Result { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = serde_json::to_string(&hasher)?; + let unserialized: XxHash64 = serde_json::from_str(&serialized)?; + assert_eq!(hasher, unserialized); + Ok(()) + } + + #[test] + fn test_serialization_stability() -> Result { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = r#"{ + "total_len": 14, + "seed": 0, + "core": { + "v1": 6983438078262162902, + "v2": 14029467366897019727, + "v3": 0, + "v4": 7046029288634856825 + }, + "buffer": [ + 72, 101, 108, 108, 111, 44, 32, 119, + 111, 114, 108, 100, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ], + "buffer_usage": 14 + }"#; + + let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); + assert_eq!(hasher, unserialized); + Ok(()) + } + } +} From 6e3961cb2f9974ea9c472d7a9b3c03062d433b08 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 26 Jun 2024 21:19:03 -0400 Subject: [PATCH 027/166] 32-bit too --- Cargo.toml | 3 +- src/lib.rs | 39 ++++++ src/xxhash32.rs | 337 ++++++++++++++++++++++++++++++++++++++++++++++++ src/xxhash64.rs | 49 +++---- 4 files changed, 395 insertions(+), 33 deletions(-) create mode 100644 src/xxhash32.rs diff --git a/Cargo.toml b/Cargo.toml index 4e91fc7cd..2c44331ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,12 +11,13 @@ members = [ ] [features] -default = ["std", "xxhash64"] +default = ["std", "xxhash32", "xxhash64"] std = ["dep:rand"] serialize = ["dep:serde"] +xxhash32 = [] xxhash64 = [] [dependencies] diff --git a/src/lib.rs b/src/lib.rs index fde3a8e96..64a4d3f3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,8 +4,47 @@ #[cfg(test)] extern crate std; +#[cfg(feature = "xxhash32")] +mod xxhash32; + +#[cfg(feature = "xxhash32")] +pub use xxhash32::*; + #[cfg(feature = "xxhash64")] mod xxhash64; #[cfg(feature = "xxhash64")] pub use xxhash64::*; + +trait IntoU32 { + fn into_u32(self) -> u32; +} + +impl IntoU32 for u8 { + fn into_u32(self) -> u32 { + self.into() + } +} + +trait IntoU64 { + fn into_u64(self) -> u64; +} + +impl IntoU64 for u8 { + fn into_u64(self) -> u64 { + self.into() + } +} + +impl IntoU64 for u32 { + fn into_u64(self) -> u64 { + self.into() + } +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl IntoU64 for usize { + fn into_u64(self) -> u64 { + self as u64 + } +} diff --git a/src/xxhash32.rs b/src/xxhash32.rs new file mode 100644 index 000000000..41bf7f11d --- /dev/null +++ b/src/xxhash32.rs @@ -0,0 +1,337 @@ +use core::{fmt, hash::Hasher, mem}; + +use crate::{IntoU32, IntoU64}; + +// Keeping these constants in this form to match the C code. +const PRIME32_1: u32 = 0x9E3779B1; +const PRIME32_2: u32 = 0x85EBCA77; +const PRIME32_3: u32 = 0xC2B2AE3D; +const PRIME32_4: u32 = 0x27D4EB2F; +const PRIME32_5: u32 = 0x165667B1; + +type Lane = u32; +type Lanes = [Lane; 4]; +type Bytes = [u8; 16]; + +const BYTES_IN_LANE: usize = mem::size_of::(); + +#[derive(PartialEq)] +struct BufferData(Lanes); + +impl BufferData { + const fn new() -> Self { + Self([0; 4]) + } + + const fn bytes(&self) -> &Bytes { + const _: () = assert!(mem::align_of::() <= mem::align_of::()); + // SAFETY[bytes]: The alignment of `u32` is at least that of + // `u8` and all the values are initialized. + unsafe { &*self.0.as_ptr().cast() } + } + + fn bytes_mut(&mut self) -> &mut Bytes { + // SAFETY: See SAFETY[bytes] + unsafe { &mut *self.0.as_mut_ptr().cast() } + } +} + +impl fmt::Debug for BufferData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.0.iter()).finish() + } +} + +#[derive(Debug, PartialEq)] +struct Buffer { + offset: usize, + data: BufferData, +} + +impl Buffer { + const fn new() -> Self { + Self { + offset: 0, + data: BufferData::new(), + } + } + + fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { + // Most of the slice methods we use here have `_unchecked` variants, but + // + // 1. this method is called one time per `XxHash64::write` call + // 2. this method early exits if we don't have anything in the buffer + // + // Because of this, removing the panics via `unsafe` doesn't + // have much benefit other than reducing code size by a tiny + // bit. + + if self.offset == 0 { + return (None, data); + }; + + let bytes = self.data.bytes_mut(); + debug_assert!(self.offset <= bytes.len()); + + let empty = &mut bytes[self.offset..]; + let n_to_copy = usize::min(empty.len(), data.len()); + + let dst = &mut empty[..n_to_copy]; + + let (src, rest) = data.split_at(n_to_copy); + + dst.copy_from_slice(src); + self.offset += n_to_copy; + + debug_assert!(self.offset <= bytes.len()); + + if self.offset == bytes.len() { + self.offset = 0; + (Some(&self.data.0), rest) + } else { + (None, rest) + } + } + + fn set(&mut self, data: &[u8]) { + if data.is_empty() { + return; + } + + debug_assert_eq!(self.offset, 0); + + let n_to_copy = data.len(); + + let bytes = self.data.bytes_mut(); + debug_assert!(n_to_copy < bytes.len()); + + bytes[..n_to_copy].copy_from_slice(data); + self.offset = data.len(); + } + + fn remaining(&self) -> &[u8] { + &self.data.bytes()[..self.offset] + } +} + +struct Accumulators(Lanes); + +impl Accumulators { + const fn new(seed: u32) -> Self { + Self([ + seed.wrapping_add(PRIME32_1).wrapping_add(PRIME32_2), + seed.wrapping_add(PRIME32_2), + seed, + seed.wrapping_sub(PRIME32_1), + ]) + } + + fn write(&mut self, lanes: Lanes) { + let [acc1, acc2, acc3, acc4] = &mut self.0; + let [lane1, lane2, lane3, lane4] = lanes; + + *acc1 = round(*acc1, lane1.to_le()); + *acc2 = round(*acc2, lane2.to_le()); + *acc3 = round(*acc3, lane3.to_le()); + *acc4 = round(*acc4, lane4.to_le()); + } + + fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { + while let Some((chunk, rest)) = data.split_first_chunk::() { + // SAFETY: We have the right number of bytes and are + // handling the unaligned case. + let lanes = unsafe { chunk.as_ptr().cast::().read_unaligned() }; + self.write(lanes); + data = rest; + } + data + } + + const fn finish(&self) -> u32 { + let [acc1, acc2, acc3, acc4] = self.0; + + let acc1 = acc1.rotate_left(1); + let acc2 = acc2.rotate_left(7); + let acc3 = acc3.rotate_left(12); + let acc4 = acc4.rotate_left(18); + + acc1.wrapping_add(acc2) + .wrapping_add(acc3) + .wrapping_add(acc4) + } +} + +pub struct XxHash32 { + seed: u32, + accumulators: Accumulators, + buffer: Buffer, + length: u64, +} + +impl XxHash32 { + pub const fn with_seed(seed: u32) -> Self { + // Step 1. Initialize internal accumulators + Self { + seed, + accumulators: Accumulators::new(seed), + buffer: Buffer::new(), + length: 0, + } + } + + #[must_use] + // RATIONALE: See RATIONALE[inline] + #[inline(always)] + fn finish_32(&self) -> u32 { + Self::finish_with( + self.seed, + self.length, + &self.accumulators, + self.buffer.remaining(), + ) + } + + #[must_use] + // RATIONALE: See RATIONALE[inline] + #[inline(always)] + fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 { + // Step 3. Accumulator convergence + let mut acc = if len < BYTES_IN_LANE.into_u64() { + seed.wrapping_add(PRIME32_5) + } else { + accumulators.finish() + }; + + // Step 4. Add input length + // + // "Note that, if input length is so large that it requires + // more than 32-bits, only the lower 32-bits are added to the + // accumulator." + acc += len as u32; + + // Step 5. Consume remaining input + while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { + let lane = u32::from_ne_bytes(*chunk).to_le(); + + acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3)); + acc = acc.rotate_left(17).wrapping_mul(PRIME32_4); + + remaining = rest; + } + + while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { + let lane = chunk[0].into_u32(); + + acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5)); + acc = acc.rotate_left(11).wrapping_mul(PRIME32_1); + + remaining = rest; + } + + // Step 6. Final mix (avalanche) + acc ^= acc >> 15; + acc = acc.wrapping_mul(PRIME32_2); + acc ^= acc >> 13; + acc = acc.wrapping_mul(PRIME32_3); + acc ^= acc >> 16; + + acc + } +} + +impl Hasher for XxHash32 { + fn write(&mut self, data: &[u8]) { + let len = data.len(); + + // Step 2. Process stripes + let (buffered_lanes, data) = self.buffer.extend(data); + + if let Some(&lanes) = buffered_lanes { + self.accumulators.write(lanes); + } + + let data = self.accumulators.write_many(data); + + self.buffer.set(data); + + self.length += len.into_u64(); + } + + fn finish(&self) -> u64 { + XxHash32::finish_32(self).into() + } +} + +const fn round(mut acc: u32, lane: u32) -> u32 { + acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2)); + acc = acc.rotate_left(13); + acc.wrapping_mul(PRIME32_1) +} + +#[cfg(test)] +mod test { + use core::array; + + use super::*; + + #[test] + fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { + let bytes = [0; 32]; + + let mut byte_by_byte = XxHash32::with_seed(0); + for byte in bytes.chunks(1) { + byte_by_byte.write(byte); + } + let byte_by_byte = byte_by_byte.finish_32(); + + let mut one_chunk = XxHash32::with_seed(0); + one_chunk.write(&bytes); + let one_chunk = one_chunk.finish_32(); + + assert_eq!(byte_by_byte, one_chunk); + } + + #[test] + fn hash_of_nothing_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0); + hasher.write(&[]); + assert_eq!(hasher.finish_32(), 0x02cc_5d05); + } + + #[test] + fn hash_of_single_byte_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0); + hasher.write(&[42]); + assert_eq!(hasher.finish_32(), 0xe0fe_705f); + } + + #[test] + fn hash_of_multiple_bytes_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0); + hasher.write(b"Hello, world!\0"); + assert_eq!(hasher.finish_32(), 0x9e5e_7e93); + } + + #[test] + fn hash_of_multiple_chunks_matches_c_implementation() { + let bytes: [u8; 100] = array::from_fn(|i| i as u8); + let mut hasher = XxHash32::with_seed(0); + hasher.write(&bytes); + assert_eq!(hasher.finish_32(), 0x7f89_ba44); + } + + #[test] + fn hash_with_different_seed_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0x42c9_1977); + hasher.write(&[]); + assert_eq!(hasher.finish_32(), 0xd6bf_8459); + } + + #[test] + fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { + let bytes: [u8; 100] = array::from_fn(|i| i as u8); + let mut hasher = XxHash32::with_seed(0x42c9_1977); + hasher.write(&bytes); + assert_eq!(hasher.finish_32(), 0x6d2f_6c17); + } +} diff --git a/src/xxhash64.rs b/src/xxhash64.rs index fbeaefc2e..9be69096b 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -1,5 +1,7 @@ use core::{fmt, hash::Hasher, mem}; +use crate::IntoU64; + // Keeping these constants in this form to match the C code. const PRIME64_1: u64 = 0x9E3779B185EBCA87; const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; @@ -7,22 +9,28 @@ const PRIME64_3: u64 = 0x165667B19E3779F9; const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; +type Lane = u64; +type Lanes = [Lane; 4]; +type Bytes = [u8; 32]; + +const BYTES_IN_LANE: usize = mem::size_of::(); + #[derive(PartialEq)] -struct BufferData([u64; 4]); +struct BufferData(Lanes); impl BufferData { const fn new() -> Self { Self([0; 4]) } - fn bytes(&self) -> &[u8; 32] { - const _: () = assert!(mem::align_of::() <= mem::align_of::()); + fn bytes(&self) -> &Bytes { + const _: () = assert!(mem::align_of::() <= mem::align_of::()); // SAFETY[bytes]: The alignment of `u64` is at least that of // `u8` and all the values are initialized. unsafe { &*self.0.as_ptr().cast() } } - fn bytes_mut(&mut self) -> &mut [u8; 32] { + fn bytes_mut(&mut self) -> &mut Bytes { // SAFETY: See SAFETY[bytes] unsafe { &mut *self.0.as_mut_ptr().cast() } } @@ -48,7 +56,7 @@ impl Buffer { } } - fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&[u64; 4]>, &'d [u8]) { + fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // // 1. this method is called one time per `XxHash64::write` call @@ -107,7 +115,7 @@ impl Buffer { } #[derive(PartialEq)] -struct Accumulators([u64; 4]); +struct Accumulators(Lanes); impl Accumulators { const fn new(seed: u64) -> Self { @@ -119,7 +127,7 @@ impl Accumulators { ]) } - fn write(&mut self, lanes: [u64; 4]) { + fn write(&mut self, lanes: Lanes) { let [acc1, acc2, acc3, acc4] = &mut self.0; let [lane1, lane2, lane3, lane4] = lanes; @@ -133,7 +141,7 @@ impl Accumulators { while let Some((chunk, rest)) = data.split_first_chunk::<32>() { // SAFETY: We have the right number of bytes and are // handling the unaligned case. - let lanes = unsafe { chunk.as_ptr().cast::<[u64; 4]>().read_unaligned() }; + let lanes = unsafe { chunk.as_ptr().cast::().read_unaligned() }; self.write(lanes); data = rest; } @@ -234,7 +242,7 @@ impl XxHash64 { #[inline(always)] fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { // Step 3. Accumulator convergence - let mut acc = if len < 32 { + let mut acc = if len < BYTES_IN_LANE.into_u64() { seed.wrapping_add(PRIME64_5) } else { accumulators.finish() @@ -318,29 +326,6 @@ const fn round(mut acc: u64, lane: u64) -> u64 { acc.wrapping_mul(PRIME64_1) } -trait IntoU64 { - fn into_u64(self) -> u64; -} - -impl IntoU64 for u8 { - fn into_u64(self) -> u64 { - self.into() - } -} - -impl IntoU64 for u32 { - fn into_u64(self) -> u64 { - self.into() - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -impl IntoU64 for usize { - fn into_u64(self) -> u64 { - self as u64 - } -} - #[cfg(test)] mod test { use core::array; From a4eb4bd7b1269e94cc4337182782b779699a6c21 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 26 Jun 2024 21:33:42 -0400 Subject: [PATCH 028/166] align --- src/xxhash32.rs | 61 +++++++++++++++++++++++++++++++++++++++++-------- src/xxhash64.rs | 10 ++++---- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 41bf7f11d..65eeb9206 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -114,6 +114,7 @@ impl Buffer { } } +#[derive(PartialEq)] struct Accumulators(Lanes); impl Accumulators { @@ -161,6 +162,19 @@ impl Accumulators { } } +impl fmt::Debug for Accumulators { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let [acc1, acc2, acc3, acc4] = self.0; + f.debug_struct("Accumulators") + .field("acc1", &acc1) + .field("acc2", &acc2) + .field("acc3", &acc3) + .field("acc4", &acc4) + .finish() + } +} + +#[derive(Debug, PartialEq)] pub struct XxHash32 { seed: u32, accumulators: Accumulators, @@ -168,7 +182,35 @@ pub struct XxHash32 { length: u64, } +impl Default for XxHash32 { + fn default() -> Self { + Self::with_seed(0) + } +} + impl XxHash32 { + /// Hash all data at once. If you can use this function, you may + /// see noticable speed gains for certain types of input. + #[must_use] + // RATIONALE[inline]: Keeping parallel to the XxHash64 + // implementation, even though the performance gains for XxHash32 + // haven't been tested. + #[inline] + pub fn oneshot(seed: u32, data: &[u8]) -> u32 { + let len = data.len(); + + // Since we know that there's no more data coming, we don't + // need to construct the intermediate buffers or copy data to + // or from the buffers. + + let mut accumulators = Accumulators::new(seed); + + let data = accumulators.write_many(data); + + Self::finish_with(seed, len.into_u64(), &accumulators, data) + } + + #[must_use] pub const fn with_seed(seed: u32) -> Self { // Step 1. Initialize internal accumulators Self { @@ -182,7 +224,7 @@ impl XxHash32 { #[must_use] // RATIONALE: See RATIONALE[inline] #[inline(always)] - fn finish_32(&self) -> u32 { + pub fn finish_32(&self) -> u32 { Self::finish_with( self.seed, self.length, @@ -257,6 +299,7 @@ impl Hasher for XxHash32 { self.length += len.into_u64(); } + #[must_use] fn finish(&self) -> u64 { XxHash32::finish_32(self).into() } @@ -282,11 +325,11 @@ mod test { for byte in bytes.chunks(1) { byte_by_byte.write(byte); } - let byte_by_byte = byte_by_byte.finish_32(); + let byte_by_byte = byte_by_byte.finish(); let mut one_chunk = XxHash32::with_seed(0); one_chunk.write(&bytes); - let one_chunk = one_chunk.finish_32(); + let one_chunk = one_chunk.finish(); assert_eq!(byte_by_byte, one_chunk); } @@ -295,21 +338,21 @@ mod test { fn hash_of_nothing_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0); hasher.write(&[]); - assert_eq!(hasher.finish_32(), 0x02cc_5d05); + assert_eq!(hasher.finish(), 0x02cc_5d05); } #[test] fn hash_of_single_byte_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0); hasher.write(&[42]); - assert_eq!(hasher.finish_32(), 0xe0fe_705f); + assert_eq!(hasher.finish(), 0xe0fe_705f); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0); hasher.write(b"Hello, world!\0"); - assert_eq!(hasher.finish_32(), 0x9e5e_7e93); + assert_eq!(hasher.finish(), 0x9e5e_7e93); } #[test] @@ -317,14 +360,14 @@ mod test { let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = XxHash32::with_seed(0); hasher.write(&bytes); - assert_eq!(hasher.finish_32(), 0x7f89_ba44); + assert_eq!(hasher.finish(), 0x7f89_ba44); } #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = XxHash32::with_seed(0x42c9_1977); hasher.write(&[]); - assert_eq!(hasher.finish_32(), 0xd6bf_8459); + assert_eq!(hasher.finish(), 0xd6bf_8459); } #[test] @@ -332,6 +375,6 @@ mod test { let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = XxHash32::with_seed(0x42c9_1977); hasher.write(&bytes); - assert_eq!(hasher.finish_32(), 0x6d2f_6c17); + assert_eq!(hasher.finish(), 0x6d2f_6c17); } } diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 9be69096b..cef2180f3 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -23,7 +23,7 @@ impl BufferData { Self([0; 4]) } - fn bytes(&self) -> &Bytes { + const fn bytes(&self) -> &Bytes { const _: () = assert!(mem::align_of::() <= mem::align_of::()); // SAFETY[bytes]: The alignment of `u64` is at least that of // `u8` and all the values are initialized. @@ -138,7 +138,7 @@ impl Accumulators { } fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { - while let Some((chunk, rest)) = data.split_first_chunk::<32>() { + while let Some((chunk, rest)) = data.split_first_chunk::() { // SAFETY: We have the right number of bytes and are // handling the unaligned case. let lanes = unsafe { chunk.as_ptr().cast::().read_unaligned() }; @@ -252,7 +252,7 @@ impl XxHash64 { acc += len; // Step 5. Consume remaining input - while let Some((chunk, rest)) = remaining.split_first_chunk::<8>() { + while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { let lane = u64::from_ne_bytes(*chunk).to_le(); acc ^= round(0, lane); @@ -261,7 +261,7 @@ impl XxHash64 { remaining = rest; } - while let Some((chunk, rest)) = remaining.split_first_chunk::<4>() { + while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { let lane = u32::from_ne_bytes(*chunk).to_le().into_u64(); acc ^= lane.wrapping_mul(PRIME64_1); @@ -271,7 +271,7 @@ impl XxHash64 { remaining = rest; } - while let Some((chunk, rest)) = remaining.split_first_chunk::<1>() { + while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { let lane = chunk[0].into_u64(); acc ^= lane.wrapping_mul(PRIME64_5); From f6156b35ac408a459f14ee58316a215aa6a34ce7 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 27 Jun 2024 09:13:21 -0400 Subject: [PATCH 029/166] more --- compare/benches/benchmark.rs | 51 +++---- compare/src/lib.rs | 248 +++++++++++++++++++++++++---------- xx_hash-sys/src/lib.rs | 72 +++++++++- 3 files changed, 266 insertions(+), 105 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 6cb801573..685f2f907 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -1,10 +1,9 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use rand::{Rng, RngCore, SeedableRng}; -use std::hash::Hasher; -use std::{hint::black_box, iter}; -use twox_hash::XxHash64 as Old; -use xx_hash_sys::Stream; -use xx_renu::XxHash64; +use std::{hash::Hasher, hint::black_box, iter}; + +use xx_hash_sys as c; +use xx_renu as rust; const TINY_DATA_SIZE: usize = 32; const BIG_DATA_SIZE: usize = 100 * 1024 * 1024; @@ -17,19 +16,19 @@ fn tiny_data(c: &mut Criterion) { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("xxHash/oneshot/{size}"); + let id = format!("c/oneshot/{size}"); g.bench_function(id, |b| { b.iter(|| { - let hash = Stream::oneshot(seed, data); + let hash = c::XxHash64::oneshot(seed, data); black_box(hash); }) }); - let id = format!("xxHash/streaming/{size}"); + let id = format!("c/streaming/{size}"); g.bench_function(id, |b| { b.iter(|| { let hash = { - let mut hasher = Stream::with_seed(seed); + let mut hasher = c::XxHash64::with_seed(seed); hasher.write(data); hasher.finish() }; @@ -37,19 +36,19 @@ fn tiny_data(c: &mut Criterion) { }) }); - let id = format!("renu/oneshot/{size}"); + let id = format!("rust/oneshot/{size}"); g.bench_function(id, |b| { b.iter(|| { - let hash = XxHash64::oneshot(seed, data); + let hash = rust::XxHash64::oneshot(seed, data); black_box(hash); }) }); - let id = format!("renu/streaming/{size}"); + let id = format!("rust/streaming/{size}"); g.bench_function(id, |b| { b.iter(|| { let hash = { - let mut hasher = XxHash64::with_seed(seed); + let mut hasher = rust::XxHash64::with_seed(seed); hasher.write(data); hasher.finish() }; @@ -69,18 +68,18 @@ fn oneshot(c: &mut Criterion) { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("xxHash/{size}"); + let id = format!("c/{size}"); g.bench_function(id, |b| { b.iter(|| { - let hash = Stream::oneshot(seed, data); + let hash = c::XxHash64::oneshot(seed, data); black_box(hash); }) }); - let id = format!("renu/{size}"); + let id = format!("rust/{size}"); g.bench_function(id, |b| { b.iter(|| { - let hash = XxHash64::oneshot(seed, data); + let hash = rust::XxHash64::oneshot(seed, data); black_box(hash); }) }); @@ -97,30 +96,20 @@ fn streaming_one_chunk(c: &mut Criterion) { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("xxHash/{size}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = Stream::with_seed(seed); - hasher.write(data); - let hash = hasher.finish(); - black_box(hash); - }) - }); - - let id = format!("renu/{size}"); + let id = format!("c/{size}"); g.bench_function(id, |b| { b.iter(|| { - let mut hasher = XxHash64::with_seed(seed); + let mut hasher = c::XxHash64::with_seed(seed); hasher.write(data); let hash = hasher.finish(); black_box(hash); }) }); - let id = format!("twox-hash/{size}"); + let id = format!("rust/{size}"); g.bench_function(id, |b| { b.iter(|| { - let mut hasher = Old::with_seed(seed); + let mut hasher = rust::XxHash64::with_seed(seed); hasher.write(data); let hash = hasher.finish(); black_box(hash); diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 6e1a4e8be..f17554919 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -1,94 +1,206 @@ #![cfg(test)] -use proptest::{num, prelude::*, test_runner::TestCaseResult}; -use std::hash::Hasher; +use proptest::{num, prelude::*}; -proptest! { - #[test] - fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { - oneshot_same_as_one_chunk_impl(seed, &data)?; - } +use xx_hash_sys as c; +use xx_renu as rust; - #[test] - fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; - } +mod xxhash32 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use std::hash::Hasher; + + use super::*; + + proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed: u32, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } + + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_same_as_many_chunks(seed: u32, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } + + #[test] + fn oneshot(seed: u32, data: Vec) { + oneshot_impl(seed, &data)?; + } + + #[test] + fn oneshot_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } + + #[test] + fn streaming_one_chunk(seed: u32, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } - #[test] - fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { - oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + #[test] + fn streaming_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } } - #[test] - fn oneshot(seed: u64, data: Vec) { - oneshot_impl(seed, &data)?; + fn oneshot_same_as_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash32::oneshot(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash32::with_seed(seed); + hasher.write(data); + hasher.finish_32() + }; + + prop_assert_eq!(oneshot, one_chunk); + Ok(()) } - #[test] - fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - oneshot_impl(seed, &data[offset..])?; + fn oneshot_same_as_many_chunks_impl( + seed: u32, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash32::oneshot(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash32::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish_32() + }; + + prop_assert_eq!(oneshot, many_chunks); + Ok(()) } - #[test] - fn streaming_one_chunk(seed: u64, data: Vec) { - streaming_one_chunk_impl(seed, &data)?; + fn oneshot_impl(seed: u32, data: &[u8]) -> TestCaseResult { + let native = c::XxHash32::oneshot(seed, data); + let rust = rust::XxHash32::oneshot(seed, data); + + prop_assert_eq!(native, rust); + Ok(()) } - #[test] - fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - streaming_one_chunk_impl(seed, &data[offset..])?; + fn streaming_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash32::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash32::with_seed(seed); + hasher.write(data); + hasher.finish_32() + }; + + prop_assert_eq!(native, rust); + Ok(()) } } -fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let oneshot = xx_renu::XxHash64::oneshot(seed, data); - let one_chunk = { - let mut hasher = xx_renu::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; +mod xxhash64 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use std::hash::Hasher; - prop_assert_eq!(oneshot, one_chunk); - Ok(()) -} + use super::*; -fn oneshot_same_as_many_chunks_impl(seed: u64, data: &[u8], chunks: &[Vec]) -> TestCaseResult { - let oneshot = xx_renu::XxHash64::oneshot(seed, data); - let many_chunks = { - let mut hasher = xx_renu::XxHash64::with_seed(seed); - for chunk in chunks { - hasher.write(chunk); + proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; } - hasher.finish() - }; - prop_assert_eq!(oneshot, many_chunks); - Ok(()) -} + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } -fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = xx_hash_sys::Stream::oneshot(seed, data); - let rust = xx_renu::XxHash64::oneshot(seed, data); + #[test] + fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } - prop_assert_eq!(native, rust); - Ok(()) -} + #[test] + fn oneshot(seed: u64, data: Vec) { + oneshot_impl(seed, &data)?; + } + + #[test] + fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } + + #[test] + fn streaming_one_chunk(seed: u64, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } + + #[test] + fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } + } + + fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash64::oneshot(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(oneshot, one_chunk); + Ok(()) + } + + fn oneshot_same_as_many_chunks_impl( + seed: u64, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash64::oneshot(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash64::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish() + }; + + prop_assert_eq!(oneshot, many_chunks); + Ok(()) + } + + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = c::XxHash64::oneshot(seed, data); + let rust = rust::XxHash64::oneshot(seed, data); -fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = { - let mut hasher = xx_hash_sys::Stream::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - let rust = { - let mut hasher = xx_renu::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - prop_assert_eq!(native, rust); - Ok(()) + prop_assert_eq!(native, rust); + Ok(()) + } + + fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } } fn vec_and_index() -> impl Strategy, usize)> { diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 2c3ed6e8a..0d691790f 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -1,15 +1,75 @@ #![allow(non_camel_case_types)] +type XXH_errorcode = libc::c_int; +const XXH_OK: XXH_errorcode = 0; + +// ---------- + +type XXH32_hash_t = u32; + +#[repr(C)] +pub struct XXH32_state_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +extern "C" { + fn XXH32(input: *const libc::c_void, length: libc::size_t, seed: XXH32_hash_t) -> XXH32_hash_t; + + fn XXH32_createState() -> *mut XXH32_state_t; + fn XXH32_reset(state: *mut XXH32_state_t, seed: XXH32_hash_t) -> XXH_errorcode; + fn XXH32_update( + state: *mut XXH32_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn XXH32_digest(state: *mut XXH32_state_t) -> XXH32_hash_t; + fn XXH32_freeState(state: *mut XXH32_state_t); +} + +pub struct XxHash32(*mut XXH32_state_t); + +impl XxHash32 { + pub fn oneshot(seed: u32, data: &[u8]) -> u32 { + unsafe { XXH32(data.as_ptr().cast(), data.len(), seed) } + } + + pub fn with_seed(seed: u32) -> Self { + let state = unsafe { + let state = XXH32_createState(); + XXH32_reset(state, seed); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = unsafe { XXH32_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u32 { + unsafe { XXH32_digest(self.0) } + } +} + +impl Drop for XxHash32 { + fn drop(&mut self) { + unsafe { XXH32_freeState(self.0) } + } +} + +// ---------- + type XXH64_hash_t = u64; + #[repr(C)] pub struct XXH64_state_t { _data: [u8; 0], _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } -type XXH_errorcode = libc::c_int; -const XXH_OK: XXH_errorcode = 0; - extern "C" { fn XXH64(input: *const libc::c_void, length: libc::size_t, seed: XXH64_hash_t) -> XXH64_hash_t; @@ -24,9 +84,9 @@ extern "C" { fn XXH64_freeState(state: *mut XXH64_state_t); } -pub struct Stream(*mut XXH64_state_t); +pub struct XxHash64(*mut XXH64_state_t); -impl Stream { +impl XxHash64 { pub fn oneshot(seed: u64, data: &[u8]) -> u64 { unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) } } @@ -51,7 +111,7 @@ impl Stream { } } -impl Drop for Stream { +impl Drop for XxHash64 { fn drop(&mut self) { unsafe { XXH64_freeState(self.0) } } From c0fdd56a46145f5c213f4a4017938da9d164d102 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 27 Jun 2024 14:28:29 -0400 Subject: [PATCH 030/166] benchmark --- compare/benches/benchmark.rs | 114 ++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 47 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 685f2f907..2bce74e6c 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -6,7 +6,10 @@ use xx_hash_sys as c; use xx_renu as rust; const TINY_DATA_SIZE: usize = 32; -const BIG_DATA_SIZE: usize = 100 * 1024 * 1024; +const BIG_DATA_SIZE: usize = 4 * 1024 * 1024; +const MIN_BIG_DATA_SIZE: usize = 256 * 1024; +const MAX_CHUNKS: usize = 64; +const SEED: u64 = 0xc651_4843_1995_363f; fn tiny_data(c: &mut Criterion) { let (seed, data) = gen_data(TINY_DATA_SIZE); @@ -16,7 +19,7 @@ fn tiny_data(c: &mut Criterion) { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("c/oneshot/{size}"); + let id = format!("impl-c/fn-oneshot/size-{size:02}"); g.bench_function(id, |b| { b.iter(|| { let hash = c::XxHash64::oneshot(seed, data); @@ -24,7 +27,7 @@ fn tiny_data(c: &mut Criterion) { }) }); - let id = format!("c/streaming/{size}"); + let id = format!("impl-c/fn-streaming/size-{size:02}"); g.bench_function(id, |b| { b.iter(|| { let hash = { @@ -36,7 +39,7 @@ fn tiny_data(c: &mut Criterion) { }) }); - let id = format!("rust/oneshot/{size}"); + let id = format!("impl-rust/fn-oneshot/size-{size:02}"); g.bench_function(id, |b| { b.iter(|| { let hash = rust::XxHash64::oneshot(seed, data); @@ -44,7 +47,7 @@ fn tiny_data(c: &mut Criterion) { }) }); - let id = format!("rust/streaming/{size}"); + let id = format!("impl-rust/fn-streaming/size-{size:02}"); g.bench_function(id, |b| { b.iter(|| { let hash = { @@ -64,11 +67,11 @@ fn oneshot(c: &mut Criterion) { let (seed, data) = gen_data(BIG_DATA_SIZE); let mut g = c.benchmark_group("oneshot"); - for size in half_sizes(&data).take(10) { + for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("c/{size}"); + let id = format!("impl-c/size-{size:07}"); g.bench_function(id, |b| { b.iter(|| { let hash = c::XxHash64::oneshot(seed, data); @@ -76,7 +79,7 @@ fn oneshot(c: &mut Criterion) { }) }); - let id = format!("rust/{size}"); + let id = format!("impl-rust/size-{size:07}"); g.bench_function(id, |b| { b.iter(|| { let hash = rust::XxHash64::oneshot(seed, data); @@ -88,40 +91,43 @@ fn oneshot(c: &mut Criterion) { g.finish(); } -fn streaming_one_chunk(c: &mut Criterion) { - let (seed, data) = gen_data(BIG_DATA_SIZE); - let mut g = c.benchmark_group("streaming_one_chunk"); - - for size in half_sizes(&data).take(10) { - let data = &data[..size]; - g.throughput(Throughput::Bytes(data.len() as _)); +fn streaming(c: &mut Criterion) { + let mut g = c.benchmark_group("streaming_many_chunks"); - let id = format!("c/{size}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = c::XxHash64::with_seed(seed); - hasher.write(data); - let hash = hasher.finish(); - black_box(hash); - }) - }); + for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { + for n_chunks in half_sizes(MAX_CHUNKS) { + let (seed, chunks) = gen_chunked_data(size, n_chunks); + g.throughput(Throughput::Bytes(size as _)); - let id = format!("rust/{size}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = rust::XxHash64::with_seed(seed); - hasher.write(data); - let hash = hasher.finish(); - black_box(hash); - }) - }); + let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::XxHash64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + + let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = rust::XxHash64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + } } g.finish(); } -const SEED: u64 = 0xc651_4843_1995_363f; - fn gen_data(length: usize) -> (u64, Vec) { let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); @@ -133,18 +139,32 @@ fn gen_data(length: usize) -> (u64, Vec) { (seed, data) } -fn half_sizes(data: &[u8]) -> impl Iterator { - iter::successors( - Some(data.len()), - |&v| { - if v == 1 { - None - } else { - Some(v / 2) - } - }, - ) +fn gen_chunked_data(length: usize, n_chunks: usize) -> (u64, Vec>) { + assert!(length > n_chunks); + + let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); + + let seed = rng.gen(); + + let chunk_size = length / n_chunks; + + let mut total = 0; + let mut chunks = Vec::with_capacity(2 * n_chunks); + + while total < length { + let mut data = vec![0; chunk_size]; + rng.fill_bytes(&mut data); + + total += data.len(); + chunks.push(data) + } + + (seed, chunks) +} + +fn half_sizes(max: usize) -> impl Iterator { + iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) }) } -criterion_group!(benches, tiny_data, oneshot, streaming_one_chunk); +criterion_group!(benches, tiny_data, oneshot, streaming); criterion_main!(benches); From c1fc63c6dbce788086fb7cb4c7547ea204cb7f57 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 27 Jun 2024 14:47:00 -0400 Subject: [PATCH 031/166] faster --- src/xxhash64.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/xxhash64.rs b/src/xxhash64.rs index cef2180f3..7958a9c06 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -127,6 +127,11 @@ impl Accumulators { ]) } + // RATIONALE[inline2]: Inspecting the disassembly showed that + // these helper functions were not being inlined. Avoiding a few + // function calls wins us the tiniest performance increase, just + // enough so that we are neck-and-neck with the C code. + #[inline] fn write(&mut self, lanes: Lanes) { let [acc1, acc2, acc3, acc4] = &mut self.0; let [lane1, lane2, lane3, lane4] = lanes; @@ -137,6 +142,8 @@ impl Accumulators { *acc4 = round(*acc4, lane4.to_le()); } + // RATIONALE: See RATIONALE[inline2] + #[inline] fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { while let Some((chunk, rest)) = data.split_first_chunk::() { // SAFETY: We have the right number of bytes and are @@ -148,6 +155,8 @@ impl Accumulators { data } + // RATIONALE: See RATIONALE[inline2] + #[inline] const fn finish(&self) -> u64 { let [acc1, acc2, acc3, acc4] = self.0; @@ -170,6 +179,8 @@ impl Accumulators { acc } + // RATIONALE: See RATIONALE[inline2] + #[inline] const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { acc ^= round(0, acc_n); acc = acc.wrapping_mul(PRIME64_1); @@ -320,6 +331,8 @@ impl Hasher for XxHash64 { } } +// RATIONALE: See RATIONALE[inline2] +#[inline] const fn round(mut acc: u64, lane: u64) -> u64 { acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); acc = acc.rotate_left(31); From 926f257dda4d121922bbb3edcff79e50438beee7 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 27 Jun 2024 14:47:24 -0400 Subject: [PATCH 032/166] more --- src/xxhash64.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 7958a9c06..28053d4f8 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -413,21 +413,21 @@ mod std_impl { use super::*; - pub struct RandomXxHashBuilder64(u64); + pub struct RandomXxHash64Builder(u64); - impl Default for RandomXxHashBuilder64 { + impl Default for RandomXxHash64Builder { fn default() -> Self { Self::new() } } - impl RandomXxHashBuilder64 { + impl RandomXxHash64Builder { fn new() -> Self { Self(rand::random()) } } - impl BuildHasher for RandomXxHashBuilder64 { + impl BuildHasher for RandomXxHash64Builder { type Hasher = XxHash64; fn build_hasher(&self) -> Self::Hasher { @@ -451,7 +451,7 @@ mod std_impl { #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { - let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); + let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } @@ -459,7 +459,7 @@ mod std_impl { } #[cfg(feature = "std")] -pub use std_impl::RandomXxHashBuilder64; +pub use std_impl::RandomXxHash64Builder; #[cfg(feature = "serialize")] mod serialize_impl { @@ -566,7 +566,7 @@ mod serialize_impl { hasher.write(b"Hello, world!\0"); hasher.finish(); - let serialized = r#"{ + let expected_serialized = r#"{ "total_len": 14, "seed": 0, "core": { @@ -584,8 +584,13 @@ mod serialize_impl { "buffer_usage": 14 }"#; - let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); + let unserialized: XxHash64 = serde_json::from_str(expected_serialized)?; assert_eq!(hasher, unserialized); + + let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?; + let actual_value = serde_json::to_value(&hasher)?; + assert_eq!(expected_value, actual_value); + Ok(()) } } From e568a2ed0bb8bdd5ba1f3059356f85a4780f7599 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 09:21:55 -0400 Subject: [PATCH 033/166] inline --- src/xxhash64.rs | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 28053d4f8..b35066407 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -56,6 +56,8 @@ impl Buffer { } } + // RATIONALE: See RATIONALE[inline] + #[inline] fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // @@ -93,6 +95,8 @@ impl Buffer { } } + // RATIONALE: See RATIONALE[inline] + #[inline] fn set(&mut self, data: &[u8]) { if data.is_empty() { return; @@ -109,6 +113,8 @@ impl Buffer { self.offset = data.len(); } + // RATIONALE: See RATIONALE[inline] + #[inline] fn remaining(&self) -> &[u8] { &self.data.bytes()[..self.offset] } @@ -127,10 +133,7 @@ impl Accumulators { ]) } - // RATIONALE[inline2]: Inspecting the disassembly showed that - // these helper functions were not being inlined. Avoiding a few - // function calls wins us the tiniest performance increase, just - // enough so that we are neck-and-neck with the C code. + // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, lanes: Lanes) { let [acc1, acc2, acc3, acc4] = &mut self.0; @@ -142,7 +145,7 @@ impl Accumulators { *acc4 = round(*acc4, lane4.to_le()); } - // RATIONALE: See RATIONALE[inline2] + // RATIONALE: See RATIONALE[inline] #[inline] fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { while let Some((chunk, rest)) = data.split_first_chunk::() { @@ -155,7 +158,7 @@ impl Accumulators { data } - // RATIONALE: See RATIONALE[inline2] + // RATIONALE: See RATIONALE[inline] #[inline] const fn finish(&self) -> u64 { let [acc1, acc2, acc3, acc4] = self.0; @@ -179,7 +182,7 @@ impl Accumulators { acc } - // RATIONALE: See RATIONALE[inline2] + // RATIONALE: See RATIONALE[inline] #[inline] const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { acc ^= round(0, acc_n); @@ -218,8 +221,19 @@ impl XxHash64 { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] - // RATIONALE[inline]: In one case [1], this `inline` helps unlock a - // speedup from ~900µs to ~200µs. + // RATIONALE[inline]: + // + // These `inline`s help unlock a speedup in one benchmark [1] from + // ~900µs to ~200µs. + // + // Further inspection of the disassembly showed that various + // helper functions were not being inlined. Avoiding these few + // function calls wins us the tiniest performance increase, just + // enough so that we are neck-and-neck with (or slightly faster + // than!) the C code. + // + // This results in the entire hash computation being inlined at + // the call site. // // [1]: https://github.com/apache/datafusion-comet/pull/575 #[inline] @@ -250,7 +264,7 @@ impl XxHash64 { #[must_use] // RATIONALE: See RATIONALE[inline] - #[inline(always)] + #[inline] fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { // Step 3. Accumulator convergence let mut acc = if len < BYTES_IN_LANE.into_u64() { @@ -303,6 +317,8 @@ impl XxHash64 { } impl Hasher for XxHash64 { + // RATIONALE: See RATIONALE[inline] + #[inline] fn write(&mut self, data: &[u8]) { let len = data.len(); @@ -320,7 +336,8 @@ impl Hasher for XxHash64 { self.length += len.into_u64(); } - #[must_use] + // RATIONALE: See RATIONALE[inline] + #[inline] fn finish(&self) -> u64 { Self::finish_with( self.seed, @@ -331,7 +348,7 @@ impl Hasher for XxHash64 { } } -// RATIONALE: See RATIONALE[inline2] +// RATIONALE: See RATIONALE[inline] #[inline] const fn round(mut acc: u64, lane: u64) -> u64 { acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); From 84220308e3ff5f2e15ae5d9ecac7ddbaaebbab4d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 09:22:02 -0400 Subject: [PATCH 034/166] offset --- src/xxhash64.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/xxhash64.rs b/src/xxhash64.rs index b35066407..6bff23ede 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -422,6 +422,22 @@ mod test { hasher.write(&bytes); assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); } + + #[test] + fn hashes_with_different_offsets_are_the_same() { + let bytes = [0x7c; 4096]; + let expected = XxHash64::oneshot(0, &[0x7c; 64]); + + let the_same = bytes + .windows(64) + .map(|w| { + let mut hasher = XxHash64::with_seed(0); + hasher.write(w); + hasher.finish() + }) + .all(|h| h == expected); + assert!(the_same); + } } #[cfg(feature = "std")] From 59836ed06c7b95f40a5be3bf0ec923ba1b229115 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 09:31:27 -0400 Subject: [PATCH 035/166] simpla --- src/xxhash32.rs | 45 +++++++++++++++++++++++++++++++++++++-------- src/xxhash64.rs | 10 ++++------ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 65eeb9206..fdd55c443 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -56,6 +56,8 @@ impl Buffer { } } + // RATIONALE: See RATIONALE[inline] + #[inline] fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // @@ -93,6 +95,8 @@ impl Buffer { } } + // RATIONALE: See RATIONALE[inline] + #[inline] fn set(&mut self, data: &[u8]) { if data.is_empty() { return; @@ -109,6 +113,8 @@ impl Buffer { self.offset = data.len(); } + // RATIONALE: See RATIONALE[inline] + #[inline] fn remaining(&self) -> &[u8] { &self.data.bytes()[..self.offset] } @@ -127,6 +133,8 @@ impl Accumulators { ]) } + // RATIONALE: See RATIONALE[inline] + #[inline] fn write(&mut self, lanes: Lanes) { let [acc1, acc2, acc3, acc4] = &mut self.0; let [lane1, lane2, lane3, lane4] = lanes; @@ -137,6 +145,8 @@ impl Accumulators { *acc4 = round(*acc4, lane4.to_le()); } + // RATIONALE: See RATIONALE[inline] + #[inline] fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { while let Some((chunk, rest)) = data.split_first_chunk::() { // SAFETY: We have the right number of bytes and are @@ -148,6 +158,8 @@ impl Accumulators { data } + // RATIONALE: See RATIONALE[inline] + #[inline] const fn finish(&self) -> u32 { let [acc1, acc2, acc3, acc4] = self.0; @@ -223,7 +235,7 @@ impl XxHash32 { #[must_use] // RATIONALE: See RATIONALE[inline] - #[inline(always)] + #[inline] pub fn finish_32(&self) -> u32 { Self::finish_with( self.seed, @@ -235,7 +247,7 @@ impl XxHash32 { #[must_use] // RATIONALE: See RATIONALE[inline] - #[inline(always)] + #[inline] fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 { // Step 3. Accumulator convergence let mut acc = if len < BYTES_IN_LANE.into_u64() { @@ -252,7 +264,7 @@ impl XxHash32 { acc += len as u32; // Step 5. Consume remaining input - while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { + while let Some((chunk, rest)) = remaining.split_first_chunk() { let lane = u32::from_ne_bytes(*chunk).to_le(); acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3)); @@ -261,13 +273,11 @@ impl XxHash32 { remaining = rest; } - while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { - let lane = chunk[0].into_u32(); + for &byte in remaining { + let lane = byte.into_u32(); acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5)); acc = acc.rotate_left(11).wrapping_mul(PRIME32_1); - - remaining = rest; } // Step 6. Final mix (avalanche) @@ -282,6 +292,8 @@ impl XxHash32 { } impl Hasher for XxHash32 { + // RATIONALE: See RATIONALE[inline] + #[inline] fn write(&mut self, data: &[u8]) { let len = data.len(); @@ -299,7 +311,8 @@ impl Hasher for XxHash32 { self.length += len.into_u64(); } - #[must_use] + // RATIONALE: See RATIONALE[inline] + #[inline] fn finish(&self) -> u64 { XxHash32::finish_32(self).into() } @@ -377,4 +390,20 @@ mod test { hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6d2f_6c17); } + + #[test] + fn hashes_with_different_offsets_are_the_same() { + let bytes = [0x7c; 4096]; + let expected = XxHash32::oneshot(0, &[0x7c; 64]); + + let the_same = bytes + .windows(64) + .map(|w| { + let mut hasher = XxHash32::with_seed(0); + hasher.write(w); + hasher.finish_32() + }) + .all(|h| h == expected); + assert!(the_same); + } } diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 6bff23ede..251e63985 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -277,7 +277,7 @@ impl XxHash64 { acc += len; // Step 5. Consume remaining input - while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { + while let Some((chunk, rest)) = remaining.split_first_chunk() { let lane = u64::from_ne_bytes(*chunk).to_le(); acc ^= round(0, lane); @@ -286,7 +286,7 @@ impl XxHash64 { remaining = rest; } - while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { + while let Some((chunk, rest)) = remaining.split_first_chunk() { let lane = u32::from_ne_bytes(*chunk).to_le().into_u64(); acc ^= lane.wrapping_mul(PRIME64_1); @@ -296,13 +296,11 @@ impl XxHash64 { remaining = rest; } - while let Some((chunk, rest)) = remaining.split_first_chunk::<{ mem::size_of::() }>() { - let lane = chunk[0].into_u64(); + for &byte in remaining { + let lane = byte.into_u64(); acc ^= lane.wrapping_mul(PRIME64_5); acc = acc.rotate_left(11).wrapping_mul(PRIME64_1); - - remaining = rest; } // Step 6. Final mix (avalanche) From 3e30866e62c8d03a4b34b3fea6ce7cdcb05fab28 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 10:18:37 -0400 Subject: [PATCH 036/166] inline --- src/xxhash32.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/xxhash32.rs b/src/xxhash32.rs index fdd55c443..19147d4c2 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -318,6 +318,8 @@ impl Hasher for XxHash32 { } } +// RATIONALE: See RATIONALE[inline] +#[inline] const fn round(mut acc: u32, lane: u32) -> u32 { acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2)); acc = acc.rotate_left(13); From 5c1f9771c8cebd94e120f7aa7f9847f9d5c80cb7 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 10:36:55 -0400 Subject: [PATCH 037/166] moar tests --- src/xxhash32.rs | 210 ++++++++++++++++++++++++++++++++++++++++++++++++ src/xxhash64.rs | 34 ++++---- 2 files changed, 227 insertions(+), 17 deletions(-) diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 19147d4c2..1b90dd3f8 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -408,4 +408,214 @@ mod test { .all(|h| h == expected); assert!(the_same); } + + // This test validates wraparound/truncation behavior for very + // large inputs of a 32-bit hash, but runs very slowly in the + // normal "cargo test" build config since it hashes 4.3GB of + // data. It runs reasonably quick under "cargo test --release". + #[ignore] + #[test] + fn length_overflows_32bit() { + // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32. + let bytes200: [u8; 200] = array::from_fn(|i| i as _); + + let mut hasher = XxHash32::with_seed(0); + for _ in 0..(4_300_000_000 / bytes200.len()) { + hasher.write(&bytes200); + } + + // assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00); + // assert_eq!(hasher.total_len(), 0x004c_cb00); + + // compared against the C implementation + assert_eq!(hasher.finish(), 0x1522_4ca7); + } +} + +#[cfg(feature = "std")] +mod std_impl { + use core::hash::BuildHasher; + + use super::*; + + pub struct RandomXxHash32Builder(u32); + + impl Default for RandomXxHash32Builder { + fn default() -> Self { + Self::new() + } + } + + impl RandomXxHash32Builder { + fn new() -> Self { + Self(rand::random()) + } + } + + impl BuildHasher for RandomXxHash32Builder { + type Hasher = XxHash32; + + fn build_hasher(&self) -> Self::Hasher { + XxHash32::with_seed(self.0) + } + } + + #[cfg(test)] + mod test { + use core::hash::BuildHasherDefault; + use std::collections::HashMap; + + use super::*; + + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_random_seed() { + let mut hash: HashMap<_, _, RandomXxHash32Builder> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + } +} + +#[cfg(feature = "std")] +pub use std_impl::*; + + +#[cfg(feature = "serialize")] +mod serialize_impl { + use serde::{Deserialize, Serialize}; + + use super::*; + + impl<'de> Deserialize<'de> for XxHash32 { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let shim = Deserialize::deserialize(deserializer)?; + + let Shim { + total_len, + seed, + core, + buffer, + buffer_usage, + } = shim; + let Core { v1, v2, v3, v4 } = core; + + let mut buffer_data = BufferData::new(); + buffer_data.bytes_mut().copy_from_slice(&buffer); + + Ok(XxHash32 { + seed, + accumulators: Accumulators([v1, v2, v3, v4]), + buffer: Buffer { + offset: buffer_usage, + data: buffer_data, + }, + length: total_len, + }) + } + } + + impl Serialize for XxHash32 { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let XxHash32 { + seed, + ref accumulators, + ref buffer, + length, + } = *self; + let [v1, v2, v3, v4] = accumulators.0; + let Buffer { offset, ref data } = *buffer; + let buffer = *data.bytes(); + + let shim = Shim { + total_len: length, + seed, + core: Core { v1, v2, v3, v4 }, + buffer, + buffer_usage: offset, + }; + + shim.serialize(serializer) + } + } + + #[derive(Serialize, Deserialize)] + struct Shim { + total_len: u64, + seed: u32, + core: Core, + buffer: [u8; 16], + buffer_usage: usize, + } + + #[derive(Serialize, Deserialize)] + struct Core { + v1: u32, + v2: u32, + v3: u32, + v4: u32, + } + + #[cfg(test)] + mod test { + use super::*; + + type Result = core::result::Result; + + #[test] + fn test_serialization_cycle() -> Result { + let mut hasher = XxHash32::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = serde_json::to_string(&hasher)?; + let unserialized: XxHash32 = serde_json::from_str(&serialized)?; + assert_eq!(hasher, unserialized); + Ok(()) + } + + #[test] + fn test_serialization_stability() -> Result { + let mut hasher = XxHash32::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let expected_serialized = r#"{ + "total_len": 14, + "seed": 0, + "core": { + "v1": 606290984, + "v2": 2246822519, + "v3": 0, + "v4": 1640531535 + }, + "buffer": [ + 72, 101, 108, 108, 111, 44, 32, 119, + 111, 114, 108, 100, 33, 0, 0, 0 + ], + "buffer_usage": 14 + }"#; + + let unserialized: XxHash32 = serde_json::from_str(expected_serialized)?; + assert_eq!(hasher, unserialized); + + let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?; + let actual_value = serde_json::to_value(&hasher)?; + assert_eq!(expected_value, actual_value); + + Ok(()) + } + } } diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 251e63985..fa2f63681 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -490,7 +490,7 @@ mod std_impl { } #[cfg(feature = "std")] -pub use std_impl::RandomXxHash64Builder; +pub use std_impl::*; #[cfg(feature = "serialize")] mod serialize_impl { @@ -598,22 +598,22 @@ mod serialize_impl { hasher.finish(); let expected_serialized = r#"{ - "total_len": 14, - "seed": 0, - "core": { - "v1": 6983438078262162902, - "v2": 14029467366897019727, - "v3": 0, - "v4": 7046029288634856825 - }, - "buffer": [ - 72, 101, 108, 108, 111, 44, 32, 119, - 111, 114, 108, 100, 33, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - ], - "buffer_usage": 14 - }"#; + "total_len": 14, + "seed": 0, + "core": { + "v1": 6983438078262162902, + "v2": 14029467366897019727, + "v3": 0, + "v4": 7046029288634856825 + }, + "buffer": [ + 72, 101, 108, 108, 111, 44, 32, 119, + 111, 114, 108, 100, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ], + "buffer_usage": 14 + }"#; let unserialized: XxHash64 = serde_json::from_str(expected_serialized)?; assert_eq!(hasher, unserialized); From 63b17995baf143ddb3089319722c86c989e4405f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 11:03:38 -0400 Subject: [PATCH 038/166] dox --- src/lib.rs | 41 ++++++++++++++++++++++++++++++++++++++++- src/xxhash32.rs | 34 +++++++++++++++++++++++++++++++--- src/xxhash64.rs | 14 ++++++++++++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 64a4d3f3f..9e382cc68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,46 @@ +//! A Rust implementation of the [XXHash][] algorithm. +//! +//! [XXHash]: https://github.com/Cyan4973/xxHash +//! +//! ## Hashing arbitrary data +//! +//! ```rust +//! use xx_renu::XxHash64; +//! +//! let seed = 1234; +//! let hash = XxHash64::oneshot(seed, b"some bytes"); +//! assert_eq!(0xeab5_5659_a496_d78b, hash); +//! ``` +//! +//! ## In a [`HashMap`](std::collections::HashMap) +//! +//! ### With a fixed seed +//! +//! ```rust +//! use std::{collections::HashMap, hash::BuildHasherDefault}; +//! use xx_renu::XxHash64; +//! +//! let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); +//! hash.insert(42, "the answer"); +//! assert_eq!(hash.get(&42), Some(&"the answer")); +//! ``` +//! +//! ### With a random seed +//! +//! ```rust +//! use std::collections::HashMap; +//! use xx_renu::RandomXxHash64Builder; +//! +//! let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default(); +//! hash.insert(42, "the answer"); +//! assert_eq!(hash.get(&42), Some(&"the answer")); +//! ``` + #![no_std] #![deny(rust_2018_idioms)] +#![deny(missing_docs)] -#[cfg(test)] +#[cfg(any(doc, test))] extern crate std; #[cfg(feature = "xxhash32")] diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 1b90dd3f8..d6ec93879 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -186,6 +186,12 @@ impl fmt::Debug for Accumulators { } } +/// Calculates the 32-bit hash. Care should be taken when using this +/// hash. +/// +/// Although this struct implements `Hasher`, it only calculates a +/// 32-bit number, leaving the upper bits as 0. This means it is +/// unlikely to be correct to use this in places like a `HashMap`. #[derive(Debug, PartialEq)] pub struct XxHash32 { seed: u32, @@ -222,6 +228,7 @@ impl XxHash32 { Self::finish_with(seed, len.into_u64(), &accumulators, data) } + /// Constructs the hasher with an initial seed. #[must_use] pub const fn with_seed(seed: u32) -> Self { // Step 1. Initialize internal accumulators @@ -233,6 +240,26 @@ impl XxHash32 { } } + /// The seed this hasher was created with. + pub const fn seed(&self) -> u32 { + self.seed + } + + /// The total number of bytes hashed. + pub const fn total_len(&self) -> u64 { + self.length + } + + /// The total number of bytes hashed, truncated to 32 bits. + /// + /// For the full 64-bit byte count, use [`total_len`](Self::total_len). + pub const fn total_len_32(&self) -> u64 { + self.length + } + + /// Returns the hash value for the values written so far. Unlike + /// [`Hasher::finish`][], this method returns the actual 32-bit + /// value calculated, not a 64-bit value. #[must_use] // RATIONALE: See RATIONALE[inline] #[inline] @@ -424,8 +451,8 @@ mod test { hasher.write(&bytes200); } - // assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00); - // assert_eq!(hasher.total_len(), 0x004c_cb00); + assert_eq!(hasher.total_len(), 0x0000_0001_004c_cb00); + assert_eq!(hasher.total_len_32(), 0x004c_cb00); // compared against the C implementation assert_eq!(hasher.finish(), 0x1522_4ca7); @@ -438,6 +465,8 @@ mod std_impl { use super::*; + /// Constructs a randomized seed and reuses it for multiple hasher + /// instances. See the usage warning on [`XxHash32`][]. pub struct RandomXxHash32Builder(u32); impl Default for RandomXxHash32Builder { @@ -486,7 +515,6 @@ mod std_impl { #[cfg(feature = "std")] pub use std_impl::*; - #[cfg(feature = "serialize")] mod serialize_impl { use serde::{Deserialize, Serialize}; diff --git a/src/xxhash64.rs b/src/xxhash64.rs index fa2f63681..7c988a098 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -203,6 +203,7 @@ impl fmt::Debug for Accumulators { } } +/// Calculates the 64-bit hash. #[derive(Debug, PartialEq)] pub struct XxHash64 { seed: u64, @@ -251,6 +252,7 @@ impl XxHash64 { Self::finish_with(seed, len.into_u64(), &accumulators, data) } + /// Constructs the hasher with an initial seed. #[must_use] pub const fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators @@ -262,6 +264,16 @@ impl XxHash64 { } } + /// The seed this hasher was created with. + pub const fn seed(&self) -> u64 { + self.seed + } + + /// The total number of bytes hashed. + pub const fn total_len(&self) -> u64 { + self.length + } + #[must_use] // RATIONALE: See RATIONALE[inline] #[inline] @@ -444,6 +456,8 @@ mod std_impl { use super::*; + /// Constructs a randomized seed and reuses it for multiple hasher + /// instances. pub struct RandomXxHash64Builder(u64); impl Default for RandomXxHash64Builder { From 3dd65268a006cf448a4f3c3fda0ae3a2a45aa5ce Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 28 Jun 2024 12:35:07 -0400 Subject: [PATCH 039/166] dox --- src/lib.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 9e382cc68..ae8755496 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,8 @@ //! //! ## Hashing arbitrary data //! +//! ### When all the data is available at once +//! //! ```rust //! use xx_renu::XxHash64; //! @@ -12,6 +14,21 @@ //! assert_eq!(0xeab5_5659_a496_d78b, hash); //! ``` //! +//! ### When the data is streaming +//! +//! ```rust +//! use std::hash::Hasher; +//! use xx_renu::XxHash64; +//! +//! let seed = 1234; +//! let mut hasher = XxHash64::with_seed(seed); +//! hasher.write(b"some"); +//! hasher.write(b" "); +//! hasher.write(b"bytes"); +//! let hash = hasher.finish(); +//! assert_eq!(0xeab5_5659_a496_d78b, hash); +//! ``` +//! //! ## In a [`HashMap`](std::collections::HashMap) //! //! ### With a fixed seed From 424f847f693cbe4b657ebcf2fbd277a65c42ae77 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 29 Jun 2024 12:50:24 -0400 Subject: [PATCH 040/166] rename --- README.md | 4 ++ compare/src/lib.rs | 4 +- renu-sum/src/main.rs | 1 + src/lib.rs | 29 +++++++---- src/xxhash32.rs | 114 ++++++++++++++++++++++++++----------------- src/xxhash64.rs | 86 +++++++++++++++++++------------- 6 files changed, 148 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index f2f88dd86..e3e37f9c9 100644 --- a/README.md +++ b/README.md @@ -8,3 +8,7 @@ no-features all-features features for 32 / 64 / xx3 + + +rand feature instead of `std`? +remove digest as we aren't crypto? diff --git a/compare/src/lib.rs b/compare/src/lib.rs index f17554919..5be3f3f73 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -7,7 +7,7 @@ use xx_renu as rust; mod xxhash32 { use proptest::{prelude::*, test_runner::TestCaseResult}; - use std::hash::Hasher; + use std::hash::Hasher as _; use super::*; @@ -106,7 +106,7 @@ mod xxhash32 { mod xxhash64 { use proptest::{prelude::*, test_runner::TestCaseResult}; - use std::hash::Hasher; + use std::hash::Hasher as _; use super::*; diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs index 409ee35a7..172b164c3 100644 --- a/renu-sum/src/main.rs +++ b/renu-sum/src/main.rs @@ -2,6 +2,7 @@ use std::{ env, fs::File, io::Read, + hash::Hasher as _, path::{Path, PathBuf}, sync::mpsc::{self, SendError}, thread, diff --git a/src/lib.rs b/src/lib.rs index ae8755496..3d6713a41 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,7 @@ //! ### When the data is streaming //! //! ```rust -//! use std::hash::Hasher; +//! use std::hash::Hasher as _; //! use xx_renu::XxHash64; //! //! let seed = 1234; @@ -31,13 +31,13 @@ //! //! ## In a [`HashMap`](std::collections::HashMap) //! -//! ### With a fixed seed +//! ### With a default seed //! //! ```rust //! use std::{collections::HashMap, hash::BuildHasherDefault}; //! use xx_renu::XxHash64; //! -//! let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); +//! let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); //! hash.insert(42, "the answer"); //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` @@ -46,9 +46,20 @@ //! //! ```rust //! use std::collections::HashMap; -//! use xx_renu::RandomXxHash64Builder; +//! use xx_renu::xxhash64; +//! +//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); +//! hash.insert(42, "the answer"); +//! assert_eq!(hash.get(&42), Some(&"the answer")); +//! ``` +//! +//! ### With a fixed seed +//! +//! ```rust +//! use std::collections::HashMap; +//! use xx_renu::xxhash64; //! -//! let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default(); +//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); //! hash.insert(42, "the answer"); //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` @@ -61,16 +72,16 @@ extern crate std; #[cfg(feature = "xxhash32")] -mod xxhash32; +pub mod xxhash32; #[cfg(feature = "xxhash32")] -pub use xxhash32::*; +pub use xxhash32::Hasher as XxHash32; #[cfg(feature = "xxhash64")] -mod xxhash64; +pub mod xxhash64; #[cfg(feature = "xxhash64")] -pub use xxhash64::*; +pub use xxhash64::Hasher as XxHash64; trait IntoU32 { fn into_u32(self) -> u32; diff --git a/src/xxhash32.rs b/src/xxhash32.rs index d6ec93879..ad2b4e988 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -1,4 +1,6 @@ -use core::{fmt, hash::Hasher, mem}; +//! The implementation of XXH32. + +use core::{fmt, hash, mem}; use crate::{IntoU32, IntoU64}; @@ -61,7 +63,7 @@ impl Buffer { fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // - // 1. this method is called one time per `XxHash64::write` call + // 1. this method is called one time per `Hasher::write` call // 2. this method early exits if we don't have anything in the buffer // // Because of this, removing the panics via `unsafe` doesn't @@ -186,33 +188,34 @@ impl fmt::Debug for Accumulators { } } -/// Calculates the 32-bit hash. Care should be taken when using this -/// hash. +/// Calculates the 32-bit hash. +/// +/// ### Caution /// -/// Although this struct implements `Hasher`, it only calculates a +/// Although this struct implements [`hash::Hasher`][], it only calculates a /// 32-bit number, leaving the upper bits as 0. This means it is -/// unlikely to be correct to use this in places like a `HashMap`. +/// unlikely to be correct to use this in places like a [`HashMap`][std::collections::HashMap]. #[derive(Debug, PartialEq)] -pub struct XxHash32 { +pub struct Hasher { seed: u32, accumulators: Accumulators, buffer: Buffer, length: u64, } -impl Default for XxHash32 { +impl Default for Hasher { fn default() -> Self { Self::with_seed(0) } } -impl XxHash32 { +impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] - // RATIONALE[inline]: Keeping parallel to the XxHash64 - // implementation, even though the performance gains for XxHash32 - // haven't been tested. + // RATIONALE[inline]: Keeping parallel to the 64-bit + // implementation, even though the performance gains for the + // 32-bit version haven't been tested. #[inline] pub fn oneshot(seed: u32, data: &[u8]) -> u32 { let len = data.len(); @@ -253,12 +256,12 @@ impl XxHash32 { /// The total number of bytes hashed, truncated to 32 bits. /// /// For the full 64-bit byte count, use [`total_len`](Self::total_len). - pub const fn total_len_32(&self) -> u64 { - self.length + pub const fn total_len_32(&self) -> u32 { + self.length as u32 } /// Returns the hash value for the values written so far. Unlike - /// [`Hasher::finish`][], this method returns the actual 32-bit + /// [`hash::Hasher::finish`][], this method returns the actual 32-bit /// value calculated, not a 64-bit value. #[must_use] // RATIONALE: See RATIONALE[inline] @@ -318,7 +321,7 @@ impl XxHash32 { } } -impl Hasher for XxHash32 { +impl hash::Hasher for Hasher { // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, data: &[u8]) { @@ -341,7 +344,7 @@ impl Hasher for XxHash32 { // RATIONALE: See RATIONALE[inline] #[inline] fn finish(&self) -> u64 { - XxHash32::finish_32(self).into() + Hasher::finish_32(self).into() } } @@ -355,7 +358,7 @@ const fn round(mut acc: u32, lane: u32) -> u32 { #[cfg(test)] mod test { - use core::array; + use core::{array, hash::Hasher as _}; use super::*; @@ -363,13 +366,13 @@ mod test { fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes = [0; 32]; - let mut byte_by_byte = XxHash32::with_seed(0); + let mut byte_by_byte = Hasher::with_seed(0); for byte in bytes.chunks(1) { byte_by_byte.write(byte); } let byte_by_byte = byte_by_byte.finish(); - let mut one_chunk = XxHash32::with_seed(0); + let mut one_chunk = Hasher::with_seed(0); one_chunk.write(&bytes); let one_chunk = one_chunk.finish(); @@ -378,21 +381,21 @@ mod test { #[test] fn hash_of_nothing_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(&[]); assert_eq!(hasher.finish(), 0x02cc_5d05); } #[test] fn hash_of_single_byte_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(&[42]); assert_eq!(hasher.finish(), 0xe0fe_705f); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); assert_eq!(hasher.finish(), 0x9e5e_7e93); } @@ -400,14 +403,14 @@ mod test { #[test] fn hash_of_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x7f89_ba44); } #[test] fn hash_with_different_seed_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0x42c9_1977); + let mut hasher = Hasher::with_seed(0x42c9_1977); hasher.write(&[]); assert_eq!(hasher.finish(), 0xd6bf_8459); } @@ -415,7 +418,7 @@ mod test { #[test] fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); - let mut hasher = XxHash32::with_seed(0x42c9_1977); + let mut hasher = Hasher::with_seed(0x42c9_1977); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6d2f_6c17); } @@ -423,12 +426,12 @@ mod test { #[test] fn hashes_with_different_offsets_are_the_same() { let bytes = [0x7c; 4096]; - let expected = XxHash32::oneshot(0, &[0x7c; 64]); + let expected = Hasher::oneshot(0, &[0x7c; 64]); let the_same = bytes .windows(64) .map(|w| { - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(w); hasher.finish_32() }) @@ -446,7 +449,7 @@ mod test { // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32. let bytes200: [u8; 200] = array::from_fn(|i| i as _); - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); for _ in 0..(4_300_000_000 / bytes200.len()) { hasher.write(&bytes200); } @@ -465,27 +468,46 @@ mod std_impl { use super::*; + /// Constructs [`Hasher`][] for multiple hasher instances. See + /// the [usage warning][Hasher#caution]. + pub struct State(u32); + + impl State { + /// Constructs the hasher with an initial seed. + pub fn with_seed(seed: u32) -> Self { + Self(seed) + } + } + + impl BuildHasher for State { + type Hasher = Hasher; + + fn build_hasher(&self) -> Self::Hasher { + Hasher::with_seed(self.0) + } + } + /// Constructs a randomized seed and reuses it for multiple hasher - /// instances. See the usage warning on [`XxHash32`][]. - pub struct RandomXxHash32Builder(u32); + /// instances. See the [usage warning][Hasher#caution]. + pub struct RandomState(u32); - impl Default for RandomXxHash32Builder { + impl Default for RandomState { fn default() -> Self { Self::new() } } - impl RandomXxHash32Builder { + impl RandomState { fn new() -> Self { Self(rand::random()) } } - impl BuildHasher for RandomXxHash32Builder { - type Hasher = XxHash32; + impl BuildHasher for RandomState { + type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { - XxHash32::with_seed(self.0) + Hasher::with_seed(self.0) } } @@ -498,14 +520,14 @@ mod std_impl { #[test] fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { - let mut hash: HashMap<_, _, RandomXxHash32Builder> = Default::default(); + let mut hash: HashMap<_, _, RandomState> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } @@ -521,7 +543,7 @@ mod serialize_impl { use super::*; - impl<'de> Deserialize<'de> for XxHash32 { + impl<'de> Deserialize<'de> for Hasher { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, @@ -540,7 +562,7 @@ mod serialize_impl { let mut buffer_data = BufferData::new(); buffer_data.bytes_mut().copy_from_slice(&buffer); - Ok(XxHash32 { + Ok(Hasher { seed, accumulators: Accumulators([v1, v2, v3, v4]), buffer: Buffer { @@ -552,12 +574,12 @@ mod serialize_impl { } } - impl Serialize for XxHash32 { + impl Serialize for Hasher { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - let XxHash32 { + let Hasher { seed, ref accumulators, ref buffer, @@ -604,19 +626,19 @@ mod serialize_impl { #[test] fn test_serialization_cycle() -> Result { - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); let serialized = serde_json::to_string(&hasher)?; - let unserialized: XxHash32 = serde_json::from_str(&serialized)?; + let unserialized: Hasher = serde_json::from_str(&serialized)?; assert_eq!(hasher, unserialized); Ok(()) } #[test] fn test_serialization_stability() -> Result { - let mut hasher = XxHash32::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); @@ -636,7 +658,7 @@ mod serialize_impl { "buffer_usage": 14 }"#; - let unserialized: XxHash32 = serde_json::from_str(expected_serialized)?; + let unserialized: Hasher = serde_json::from_str(expected_serialized)?; assert_eq!(hasher, unserialized); let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?; diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 7c988a098..2deb0d997 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -1,4 +1,6 @@ -use core::{fmt, hash::Hasher, mem}; +//! The implementation of XXH64. + +use core::{fmt, hash, mem}; use crate::IntoU64; @@ -61,7 +63,7 @@ impl Buffer { fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // - // 1. this method is called one time per `XxHash64::write` call + // 1. this method is called one time per `Hasher::write` call // 2. this method early exits if we don't have anything in the buffer // // Because of this, removing the panics via `unsafe` doesn't @@ -205,20 +207,20 @@ impl fmt::Debug for Accumulators { /// Calculates the 64-bit hash. #[derive(Debug, PartialEq)] -pub struct XxHash64 { +pub struct Hasher { seed: u64, accumulators: Accumulators, buffer: Buffer, length: u64, } -impl Default for XxHash64 { +impl Default for Hasher { fn default() -> Self { Self::with_seed(0) } } -impl XxHash64 { +impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] @@ -326,7 +328,7 @@ impl XxHash64 { } } -impl Hasher for XxHash64 { +impl hash::Hasher for Hasher { // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, data: &[u8]) { @@ -368,7 +370,7 @@ const fn round(mut acc: u64, lane: u64) -> u64 { #[cfg(test)] mod test { - use core::array; + use core::{array, hash::Hasher as _}; use super::*; @@ -376,13 +378,13 @@ mod test { fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes = [0x9c; 32]; - let mut byte_by_byte = XxHash64::with_seed(0); + let mut byte_by_byte = Hasher::with_seed(0); for byte in bytes.chunks(1) { byte_by_byte.write(byte); } let byte_by_byte = byte_by_byte.finish(); - let mut one_chunk = XxHash64::with_seed(0); + let mut one_chunk = Hasher::with_seed(0); one_chunk.write(&bytes); let one_chunk = one_chunk.finish(); @@ -391,21 +393,21 @@ mod test { #[test] fn hash_of_nothing_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(&[]); assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); } #[test] fn hash_of_single_byte_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(&[42]); assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); } @@ -413,14 +415,14 @@ mod test { #[test] fn hash_of_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); } #[test] fn hash_with_different_seed_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91); hasher.write(&[]); assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); } @@ -428,7 +430,7 @@ mod test { #[test] fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); } @@ -436,12 +438,12 @@ mod test { #[test] fn hashes_with_different_offsets_are_the_same() { let bytes = [0x7c; 4096]; - let expected = XxHash64::oneshot(0, &[0x7c; 64]); + let expected = Hasher::oneshot(0, &[0x7c; 64]); let the_same = bytes .windows(64) .map(|w| { - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(w); hasher.finish() }) @@ -456,27 +458,45 @@ mod std_impl { use super::*; + /// Constructs [`Hasher`][] for multiple hasher instances. + pub struct State(u64); + + impl State { + /// Constructs the hasher with an initial seed. + pub fn with_seed(seed: u64) -> Self { + Self(seed) + } + } + + impl BuildHasher for State { + type Hasher = Hasher; + + fn build_hasher(&self) -> Self::Hasher { + Hasher::with_seed(self.0) + } + } + /// Constructs a randomized seed and reuses it for multiple hasher /// instances. - pub struct RandomXxHash64Builder(u64); + pub struct RandomState(u64); - impl Default for RandomXxHash64Builder { + impl Default for RandomState { fn default() -> Self { Self::new() } } - impl RandomXxHash64Builder { + impl RandomState { fn new() -> Self { Self(rand::random()) } } - impl BuildHasher for RandomXxHash64Builder { - type Hasher = XxHash64; + impl BuildHasher for RandomState { + type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { - XxHash64::with_seed(self.0) + Hasher::with_seed(self.0) } } @@ -489,14 +509,14 @@ mod std_impl { #[test] fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { - let mut hash: HashMap<_, _, RandomXxHash64Builder> = Default::default(); + let mut hash: HashMap<_, _, RandomState> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } @@ -512,7 +532,7 @@ mod serialize_impl { use super::*; - impl<'de> Deserialize<'de> for XxHash64 { + impl<'de> Deserialize<'de> for Hasher { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, @@ -531,7 +551,7 @@ mod serialize_impl { let mut buffer_data = BufferData::new(); buffer_data.bytes_mut().copy_from_slice(&buffer); - Ok(XxHash64 { + Ok(Hasher { seed, accumulators: Accumulators([v1, v2, v3, v4]), buffer: Buffer { @@ -543,12 +563,12 @@ mod serialize_impl { } } - impl Serialize for XxHash64 { + impl Serialize for Hasher { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - let XxHash64 { + let Hasher { seed, ref accumulators, ref buffer, @@ -595,19 +615,19 @@ mod serialize_impl { #[test] fn test_serialization_cycle() -> Result { - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); let serialized = serde_json::to_string(&hasher)?; - let unserialized: XxHash64 = serde_json::from_str(&serialized)?; + let unserialized: Hasher = serde_json::from_str(&serialized)?; assert_eq!(hasher, unserialized); Ok(()) } #[test] fn test_serialization_stability() -> Result { - let mut hasher = XxHash64::with_seed(0); + let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); hasher.finish(); @@ -629,7 +649,7 @@ mod serialize_impl { "buffer_usage": 14 }"#; - let unserialized: XxHash64 = serde_json::from_str(expected_serialized)?; + let unserialized: Hasher = serde_json::from_str(expected_serialized)?; assert_eq!(hasher, unserialized); let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?; From 7c0f281a4a4e324bd713926e58b6f76f05ee566e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 4 Jul 2024 08:31:43 -0400 Subject: [PATCH 041/166] error check --- xx_hash-sys/src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 0d691790f..6f915cbb7 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -24,7 +24,7 @@ extern "C" { length: libc::size_t, ) -> XXH_errorcode; fn XXH32_digest(state: *mut XXH32_state_t) -> XXH32_hash_t; - fn XXH32_freeState(state: *mut XXH32_state_t); + fn XXH32_freeState(state: *mut XXH32_state_t) -> XXH_errorcode; } pub struct XxHash32(*mut XXH32_state_t); @@ -56,7 +56,8 @@ impl XxHash32 { impl Drop for XxHash32 { fn drop(&mut self) { - unsafe { XXH32_freeState(self.0) } + let retval = unsafe { XXH32_freeState(self.0) }; + assert_eq!(retval, XXH_OK); } } @@ -81,7 +82,7 @@ extern "C" { length: libc::size_t, ) -> XXH_errorcode; fn XXH64_digest(state: *mut XXH64_state_t) -> XXH64_hash_t; - fn XXH64_freeState(state: *mut XXH64_state_t); + fn XXH64_freeState(state: *mut XXH64_state_t) -> XXH_errorcode; } pub struct XxHash64(*mut XXH64_state_t); @@ -113,6 +114,7 @@ impl XxHash64 { impl Drop for XxHash64 { fn drop(&mut self) { - unsafe { XXH64_freeState(self.0) } + let retval = unsafe { XXH64_freeState(self.0) }; + assert_eq!(retval, XXH_OK); } } From b784c6181ce192fc94f27428022796b53beb61fa Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 4 Jul 2024 08:40:21 -0400 Subject: [PATCH 042/166] rename random --- Cargo.toml | 4 +-- src/xxhash32.rs | 77 +++++++++++++++++++++++++------------------------ src/xxhash64.rs | 72 ++++++++++++++++++++++----------------------- 3 files changed, 76 insertions(+), 77 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2c44331ff..99b104214 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,9 @@ members = [ ] [features] -default = ["std", "xxhash32", "xxhash64"] +default = ["random", "xxhash32", "xxhash64"] -std = ["dep:rand"] +random = ["dep:rand"] serialize = ["dep:serde"] diff --git a/src/xxhash32.rs b/src/xxhash32.rs index ad2b4e988..725976b44 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -1,6 +1,6 @@ //! The implementation of XXH32. -use core::{fmt, hash, mem}; +use core::{fmt, hash::{self, BuildHasher}, mem}; use crate::{IntoU32, IntoU64}; @@ -356,9 +356,32 @@ const fn round(mut acc: u32, lane: u32) -> u32 { acc.wrapping_mul(PRIME32_1) } + +/// Constructs [`Hasher`][] for multiple hasher instances. See +/// the [usage warning][Hasher#caution]. +pub struct State(u32); + +impl State { + /// Constructs the hasher with an initial seed. + pub fn with_seed(seed: u32) -> Self { + Self(seed) + } +} + +impl BuildHasher for State { + type Hasher = Hasher; + + fn build_hasher(&self) -> Self::Hasher { + Hasher::with_seed(self.0) + } +} + + + #[cfg(test)] mod test { - use core::{array, hash::Hasher as _}; + use core::{array, hash::{BuildHasherDefault, Hasher as _}}; + use std::collections::HashMap; use super::*; @@ -460,36 +483,22 @@ mod test { // compared against the C implementation assert_eq!(hasher.finish(), 0x1522_4ca7); } -} - -#[cfg(feature = "std")] -mod std_impl { - use core::hash::BuildHasher; - - use super::*; - /// Constructs [`Hasher`][] for multiple hasher instances. See - /// the [usage warning][Hasher#caution]. - pub struct State(u32); - - impl State { - /// Constructs the hasher with an initial seed. - pub fn with_seed(seed: u32) -> Self { - Self(seed) - } + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); } +} - impl BuildHasher for State { - type Hasher = Hasher; - - fn build_hasher(&self) -> Self::Hasher { - Hasher::with_seed(self.0) - } - } +#[cfg(feature = "random")] +mod random_impl { + use super::*; /// Constructs a randomized seed and reuses it for multiple hasher /// instances. See the [usage warning][Hasher#caution]. - pub struct RandomState(u32); + pub struct RandomState(State); impl Default for RandomState { fn default() -> Self { @@ -499,7 +508,7 @@ mod std_impl { impl RandomState { fn new() -> Self { - Self(rand::random()) + Self(State::with_seed(rand::random())) } } @@ -507,24 +516,16 @@ mod std_impl { type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { - Hasher::with_seed(self.0) + self.0.build_hasher() } } #[cfg(test)] mod test { - use core::hash::BuildHasherDefault; use std::collections::HashMap; use super::*; - #[test] - fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomState> = Default::default(); @@ -534,8 +535,8 @@ mod std_impl { } } -#[cfg(feature = "std")] -pub use std_impl::*; +#[cfg(feature = "random")] +pub use random_impl::*; #[cfg(feature = "serialize")] mod serialize_impl { diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 2deb0d997..086a21d4e 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -1,6 +1,6 @@ //! The implementation of XXH64. -use core::{fmt, hash, mem}; +use core::{fmt, hash::{self, BuildHasher}, mem}; use crate::IntoU64; @@ -368,9 +368,28 @@ const fn round(mut acc: u64, lane: u64) -> u64 { acc.wrapping_mul(PRIME64_1) } +/// Constructs [`Hasher`][] for multiple hasher instances. +pub struct State(u64); + +impl State { + /// Constructs the hasher with an initial seed. + pub fn with_seed(seed: u64) -> Self { + Self(seed) + } +} + +impl BuildHasher for State { + type Hasher = Hasher; + + fn build_hasher(&self) -> Self::Hasher { + Hasher::with_seed(self.0) + } +} + #[cfg(test)] mod test { - use core::{array, hash::Hasher as _}; + use core::{array, hash::{BuildHasherDefault, Hasher as _}}; + use std::collections::HashMap; use super::*; @@ -450,35 +469,22 @@ mod test { .all(|h| h == expected); assert!(the_same); } -} - -#[cfg(feature = "std")] -mod std_impl { - use core::hash::BuildHasher; - - use super::*; - - /// Constructs [`Hasher`][] for multiple hasher instances. - pub struct State(u64); - impl State { - /// Constructs the hasher with an initial seed. - pub fn with_seed(seed: u64) -> Self { - Self(seed) - } + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); } +} - impl BuildHasher for State { - type Hasher = Hasher; - - fn build_hasher(&self) -> Self::Hasher { - Hasher::with_seed(self.0) - } - } +#[cfg(feature = "random")] +mod random_impl { + use super::*; /// Constructs a randomized seed and reuses it for multiple hasher /// instances. - pub struct RandomState(u64); + pub struct RandomState(State); impl Default for RandomState { fn default() -> Self { @@ -488,7 +494,7 @@ mod std_impl { impl RandomState { fn new() -> Self { - Self(rand::random()) + Self(State::with_seed(rand::random())) } } @@ -496,24 +502,16 @@ mod std_impl { type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { - Hasher::with_seed(self.0) + self.0.build_hasher() } } #[cfg(test)] mod test { - use core::hash::BuildHasherDefault; use std::collections::HashMap; use super::*; - #[test] - fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomState> = Default::default(); @@ -523,8 +521,8 @@ mod std_impl { } } -#[cfg(feature = "std")] -pub use std_impl::*; +#[cfg(feature = "random")] +pub use random_impl::*; #[cfg(feature = "serialize")] mod serialize_impl { From 16a5f73ea3eef9379e9a28fe61d042b3c301806f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 4 Jul 2024 11:50:46 -0400 Subject: [PATCH 043/166] junk --- renu-sum/src/main.rs | 2 +- src/xxhash32.rs | 14 +++++++++----- src/xxhash64.rs | 11 +++++++++-- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs index 172b164c3..7a1055778 100644 --- a/renu-sum/src/main.rs +++ b/renu-sum/src/main.rs @@ -1,8 +1,8 @@ use std::{ env, fs::File, - io::Read, hash::Hasher as _, + io::Read, path::{Path, PathBuf}, sync::mpsc::{self, SendError}, thread, diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 725976b44..d5f8a272e 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -1,6 +1,10 @@ //! The implementation of XXH32. -use core::{fmt, hash::{self, BuildHasher}, mem}; +use core::{ + fmt, + hash::{self, BuildHasher}, + mem, +}; use crate::{IntoU32, IntoU64}; @@ -356,7 +360,6 @@ const fn round(mut acc: u32, lane: u32) -> u32 { acc.wrapping_mul(PRIME32_1) } - /// Constructs [`Hasher`][] for multiple hasher instances. See /// the [usage warning][Hasher#caution]. pub struct State(u32); @@ -376,11 +379,12 @@ impl BuildHasher for State { } } - - #[cfg(test)] mod test { - use core::{array, hash::{BuildHasherDefault, Hasher as _}}; + use core::{ + array, + hash::{BuildHasherDefault, Hasher as _}, + }; use std::collections::HashMap; use super::*; diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 086a21d4e..fcfa55642 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -1,6 +1,10 @@ //! The implementation of XXH64. -use core::{fmt, hash::{self, BuildHasher}, mem}; +use core::{ + fmt, + hash::{self, BuildHasher}, + mem, +}; use crate::IntoU64; @@ -388,7 +392,10 @@ impl BuildHasher for State { #[cfg(test)] mod test { - use core::{array, hash::{BuildHasherDefault, Hasher as _}}; + use core::{ + array, + hash::{BuildHasherDefault, Hasher as _}, + }; use std::collections::HashMap; use super::*; From 7cf082695ce69c44690eb96d0f2ee19fe90d1c26 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 4 Jul 2024 11:50:59 -0400 Subject: [PATCH 044/166] xxh3 --- src/lib.rs | 12 ++ src/xxhash3_64.rs | 383 +++++++++++++++++++++++++++++++++++++++++ xx_hash-sys/src/lib.rs | 58 +++++++ 3 files changed, 453 insertions(+) create mode 100644 src/xxhash3_64.rs diff --git a/src/lib.rs b/src/lib.rs index 3d6713a41..130976999 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,6 +83,8 @@ pub mod xxhash64; #[cfg(feature = "xxhash64")] pub use xxhash64::Hasher as XxHash64; +pub mod xxhash3_64; + trait IntoU32 { fn into_u32(self) -> u32; } @@ -115,3 +117,13 @@ impl IntoU64 for usize { self as u64 } } + +trait IntoU128 { + fn into_u128(self) -> u128; +} + +impl IntoU128 for u64 { + fn into_u128(self) -> u128 { + u128::from(self) + } +} diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs new file mode 100644 index 000000000..26244fedd --- /dev/null +++ b/src/xxhash3_64.rs @@ -0,0 +1,383 @@ +#![allow(missing_docs, dead_code, non_snake_case)] + +use core::{mem, slice}; + +use crate::{IntoU128, IntoU32, IntoU64}; + +const PRIME32_1: u64 = 0x9E3779B1; +const PRIME32_2: u64 = 0x85EBCA77; +const PRIME32_3: u64 = 0xC2B2AE3D; +const PRIME64_1: u64 = 0x9E3779B185EBCA87; +const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; +const PRIME64_3: u64 = 0x165667B19E3779F9; +const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; +const PRIME64_5: u64 = 0x27D4EB2F165667C5; +const PRIME_MX1: u64 = 0x165667919E3779F9; +const PRIME_MX2: u64 = 0x9FB21C651E98DF25; + +const DEFAULT_SECRET: [u8; 192] = [ + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +]; + +pub struct XxHash3_64; + +impl XxHash3_64 { + #[inline] + pub fn oneshot(input: &[u8]) -> u64 { + let seed = 0; + let secret = DEFAULT_SECRET; + + match input.len() { + 0 => { + let secret_words = + unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }; + avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) + } + + 1..=3 => { + let input_length = input.len() as u8; // OK as we checked that the length fits + + let combined = input[input.len() - 1].into_u32() + | input_length.into_u32() << 8 + | input[0].into_u32() << 16 + | input[input.len() >> 1].into_u32() << 24; + + let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() }; + let value = + ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64(); + + // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result." + avalanche_xxh64(value) + } + + 4..=8 => { + let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; + let input_last = unsafe { + input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::()) + .cast::() + .read_unaligned() + }; + let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); + + let secret_words = + unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }; + let combined = input_last.into_u64() | (input_first.into_u64() << 32); + + let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined; + value ^= value.rotate_left(49) ^ value.rotate_left(24); + value = value.wrapping_mul(PRIME_MX2); + value ^= (value >> 35).wrapping_add(input.len().into_u64()); + value = value.wrapping_mul(PRIME_MX2); + value ^= value >> 28; + value + } + + 9..=16 => { + let inputFirst: u64 = unsafe { input.as_ptr().cast::().read_unaligned() }; + let inputLast: u64 = unsafe { + input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::()) + .cast::() + .read_unaligned() + }; + + let secretWords = + unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }; + let low: u64 = ((secretWords[0] ^ secretWords[1]).wrapping_add(seed)) ^ inputFirst; + let high: u64 = ((secretWords[2] ^ secretWords[3]).wrapping_sub(seed)) ^ inputLast; + let mulResult: u128 = low.into_u128().wrapping_mul(high.into_u128()); + let value: u64 = input + .len() + .into_u64() + .wrapping_add(low.swap_bytes()) + .wrapping_add(high) + .wrapping_add(mulResult.lower_half() ^ mulResult.upper_half()); + + avalanche(value) + } + + 17..=128 => { + let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1); + + let numRounds = ((input.len() - 1) >> 5) + 1; + + let mut ff = input; + let mut rr = input; + + for i in (0..numRounds).rev() { + let (ffc, ffn) = ff.split_first_chunk().unwrap(); + let (rrn, rrc) = rr.split_last_chunk().unwrap(); + + acc = acc.wrapping_add(mix_step(ffc, &secret, i * 32, seed)); + acc = acc.wrapping_add(mix_step(rrc, &secret, i * 32 + 16, seed)); + + ff = ffn; + rr = rrn; + } + + avalanche(acc) + } + + 129..=240 => { + let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1); + + let (head, _tail) = input.bp_as_chunks(); + let mut head = head.into_iter(); + + for (i, chunk) in head.by_ref().take(8).enumerate() { + acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed)); + } + + acc = avalanche(acc); + + for (i, chunk) in head.enumerate() { + acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed)); + } + + acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed)); + + avalanche(acc) + } + + _ => todo!(), + } + } +} + +fn avalanche(mut x: u64) -> u64 { + x ^= x >> 37; + x = x.wrapping_mul(PRIME_MX1); + x ^= x >> 32; + x +} + +fn avalanche_xxh64(mut x: u64) -> u64 { + x ^= x >> 33; + x = x.wrapping_mul(PRIME64_2); + x ^= x >> 29; + x = x.wrapping_mul(PRIME64_3); + x ^= x >> 32; + x +} + +fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 { + // TODO: Should these casts / reads happen outside this function? + let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; + let secret_words = unsafe { + secret + .as_ptr() + .add(secret_offset) + .cast::<[u64; 2]>() + .read_unaligned() + }; + + let mul_result: u128 = { + let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); + let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); + + a.wrapping_mul(b) + }; + + mul_result.lower_half() ^ mul_result.upper_half() +} + +fn mixTwoChunks( + acc: &mut [u64; 2], + data1: &[u8; 16], + data2: &[u8; 16], + secret: &[u8], + secretOffset: usize, + seed: u64, +) { + // TODO: Should these casts / reads happen outside this function? + let dataWords1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion + let dataWords2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion + + acc[0] = acc[0] + mix_step(data1, secret, secretOffset, seed); + acc[1] = acc[1] + mix_step(data2, secret, secretOffset + 16, seed); + acc[0] = acc[0] ^ dataWords2[0].wrapping_add(dataWords2[1]); + acc[1] = acc[1] ^ dataWords1[0].wrapping_add(dataWords1[1]); +} + +trait Halves { + type Output; + + fn upper_half(self) -> Self::Output; + fn lower_half(self) -> Self::Output; +} + +impl Halves for u64 { + type Output = u32; + + #[inline] + fn upper_half(self) -> Self::Output { + (self >> 32) as _ + } + + #[inline] + fn lower_half(self) -> Self::Output { + self as _ + } +} + +impl Halves for u128 { + type Output = u64; + + #[inline] + fn upper_half(self) -> Self::Output { + (self >> 64) as _ + } + + #[inline] + fn lower_half(self) -> Self::Output { + self as _ + } +} + +trait SliceBackport { + fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); +} + +impl SliceBackport for [T] { + fn bp_as_chunks(&self) -> (&[[T; N]], &[T]) { + assert_ne!(N, 0); + let len = self.len() / N; + let (head, tail) = unsafe { self.split_at_unchecked(len) }; + let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) }; + (head, tail) + } + + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { + assert_ne!(N, 0); + let len = self.len() / N; + let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; + let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; + (head, tail) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn hash_64bit_empty_matches_c_implementation() { + let hash = XxHash3_64::oneshot(&[]); + assert_eq!(hash, 0x2d06_8005_38d3_94c2); + } + + #[test] + fn hash_64bit_1_to_3_bytes_matches_c_implementation() { + let inputs: &[&[u8]] = &[&[0; 1], &[0; 2], &[0; 3]]; + let expected = [ + 0xc44b_dff4_074e_ecdb, + 0x3325_230e_1f28_5505, + 0xeb5d_658b_b22f_286b, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot(input); + assert_eq!(hash, expected, "input was {input:?}"); + } + } + + #[test] + fn hash_64bit_4_to_8_bytes_matches_c_implementation() { + let inputs: &[&[u8]] = &[&[0; 4], &[0; 5], &[0; 6], &[0; 7], &[0; 8]]; + + let expected = [ + 0x48b2_c926_16fc_193d, + 0xe864_e589_3a27_3242, + 0x06df_7381_3892_fde7, + 0xa691_8fec_1ae6_5b70, + 0xc77b_3abb_6f87_acd9, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot(input); + assert_eq!(hash, expected, "input was {input:?}"); + } + } + + #[test] + fn hash_64bit_9_to_16_bytes_matches_c_implementation() { + let inputs: &[&[u8]] = &[ + &[0; 9], &[0; 10], &[0; 11], &[0; 12], &[0; 13], &[0; 14], &[0; 15], &[0; 16], + ]; + + let expected = [ + 0x3449_9569_f039_1857, + 0x4a9f_fcfb_2837_fbcc, + 0xae43_2800_a160_9968, + 0xc499_8f91_69c2_a4f0, + 0xdaef_f723_917d_5279, + 0xf146_5eb4_188c_41e7, + 0xba50_02d3_c3ed_6bc7, + 0xd0a6_6a65_c752_8968, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot(input); + assert_eq!(hash, expected, "input was {input:?}"); + } + } + + #[test] + fn hash_64bit_17_to_128_bytes_matches_c_implementation() { + let inputs: &[&[u8]] = &[ + &[0; 17], &[0; 18], &[0; 19], &[0; 126], &[0; 127], &[0; 128], + ]; + + let expected = [ + 0xc291_5ca0_df7a_d4c1, + 0xff78_21dd_f836_d020, + 0x8711_2824_6eb4_52b8, + 0x3133_805e_2401_c842, + 0x759e_ea08_c3b7_7cae, + 0x093c_29f2_7ecf_cf21, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot(input); + assert_eq!(hash, expected, "input was {input:?}"); + } + } + + #[test] + fn hash_64bit_129_to_240_bytes_matches_c_implementation() { + let inputs: &[&[u8]] = &[ + &[0; 129], &[0; 130], &[0; 131], &[0; 238], &[0; 239], &[0; 240], + ]; + + let expected = [ + 0x37f7_943e_b2f5_1359, + 0x9cc8_599a_c6e3_f7c5, + 0x9a3c_cf6f_257e_b24d, + 0xb980_bcaf_ae82_6b6a, + 0xf01b_b3be_cb26_4837, + 0x053f_0744_4f70_da08, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot(input); + assert_eq!(hash, expected, "input was {input:?}"); + } + } +} diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 6f915cbb7..28b2298b8 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -118,3 +118,61 @@ impl Drop for XxHash64 { assert_eq!(retval, XXH_OK); } } + +// ---------- + +// type XXH_hash_t = u64; + +#[repr(C)] +pub struct XXH3_state_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +extern "C" { + fn XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + + fn XXH3_createState() -> *mut XXH3_state_t; + fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; +} + +pub struct XxHash3_64(*mut XXH3_state_t); + +impl XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { XXH3_64bits(data.as_ptr().cast(), data.len()) } + } + + pub fn with_seed() -> Self { + let state = unsafe { + let state = XXH3_createState(); + XXH3_64bits_reset(state); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = unsafe { XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { XXH3_64bits_digest(self.0) } + } +} + +impl Drop for XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); + } +} From 67eb967bfd9083db7fc68897de2c0e139de58045 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 5 Jul 2024 11:57:20 -0400 Subject: [PATCH 045/166] xxh3 --- src/xxhash3_64.rs | 191 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 157 insertions(+), 34 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 26244fedd..ccadcd12f 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -87,8 +87,8 @@ impl XxHash3_64 { } 9..=16 => { - let inputFirst: u64 = unsafe { input.as_ptr().cast::().read_unaligned() }; - let inputLast: u64 = unsafe { + let input_first: u64 = unsafe { input.as_ptr().cast::().read_unaligned() }; + let input_last: u64 = unsafe { input .as_ptr() .add(input.len()) @@ -97,17 +97,19 @@ impl XxHash3_64 { .read_unaligned() }; - let secretWords = + let secret_words = unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }; - let low: u64 = ((secretWords[0] ^ secretWords[1]).wrapping_add(seed)) ^ inputFirst; - let high: u64 = ((secretWords[2] ^ secretWords[3]).wrapping_sub(seed)) ^ inputLast; - let mulResult: u128 = low.into_u128().wrapping_mul(high.into_u128()); + let low: u64 = + ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; + let high: u64 = + ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; + let mul_result: u128 = low.into_u128().wrapping_mul(high.into_u128()); let value: u64 = input .len() .into_u64() .wrapping_add(low.swap_bytes()) .wrapping_add(high) - .wrapping_add(mulResult.lower_half() ^ mulResult.upper_half()); + .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); avalanche(value) } @@ -115,12 +117,13 @@ impl XxHash3_64 { 17..=128 => { let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1); - let numRounds = ((input.len() - 1) >> 5) + 1; + let num_rounds = ((input.len() - 1) >> 5) + 1; + // TODO: use some chunks let mut ff = input; let mut rr = input; - for i in (0..numRounds).rev() { + for i in (0..num_rounds).rev() { let (ffc, ffn) = ff.split_first_chunk().unwrap(); let (rrn, rrc) = rr.split_last_chunk().unwrap(); @@ -135,7 +138,7 @@ impl XxHash3_64 { } 129..=240 => { - let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1); + let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let (head, _tail) = input.bp_as_chunks(); let mut head = head.into_iter(); @@ -155,7 +158,42 @@ impl XxHash3_64 { avalanche(acc) } - _ => todo!(), + _ => { + #[rustfmt::skip] + let mut acc = [ + PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, + PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, + ]; + + let secret_length = secret.len(); + let stripes_per_block = (secret_length - 64) / 8; + let block_size = 64 * stripes_per_block; + + let mut cc = input.chunks(block_size).fuse(); + + let last_block = cc.next_back().unwrap(); + + for block in cc { + round(&mut acc, block, &secret); + } + + let last_stripe = unsafe { + &*input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::<[u64; 8]>()) + .cast::<[u64; 8]>() + }; + + last_round(&mut acc, last_block, last_stripe, &secret); + + final_merge( + &mut acc, + input.len().into_u64().wrapping_mul(PRIME64_1), + &secret, + 11, + ) + } } } } @@ -187,7 +225,7 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> .read_unaligned() }; - let mul_result: u128 = { + let mul_result = { let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); @@ -197,22 +235,90 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> mul_result.lower_half() ^ mul_result.upper_half() } -fn mixTwoChunks( - acc: &mut [u64; 2], - data1: &[u8; 16], - data2: &[u8; 16], - secret: &[u8], - secretOffset: usize, - seed: u64, -) { +// fn mix_two_chunks( +// acc: &mut [u64; 2], +// data1: &[u8; 16], +// data2: &[u8; 16], +// secret: &[u8], +// secret_offset: usize, +// seed: u64, +// ) { +// // TODO: Should these casts / reads happen outside this function? +// let data_words1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion +// let data_words2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion + +// acc[0] = acc[0] + mix_step(data1, secret, secret_offset, seed); +// acc[1] = acc[1] + mix_step(data2, secret, secret_offset + 16, seed); +// acc[0] = acc[0] ^ data_words2[0].wrapping_add(data_words2[1]); +// acc[1] = acc[1] ^ data_words1[0].wrapping_add(data_words1[1]); +// } + +// Step 2-1. Process stripes in the block +fn accumulate(acc: &mut [u64; 8], stripe: &[u64; 8], secret: &[u8], secret_offset: usize) { // TODO: Should these casts / reads happen outside this function? - let dataWords1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion - let dataWords2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion + let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() }; + + for i in 0..8 { + let value = stripe[i] ^ secret_words[i]; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]); + acc[i] = acc[i].wrapping_add( + value + .lower_half() + .into_u64() + .wrapping_mul(value.upper_half().into_u64()), + ); + } +} - acc[0] = acc[0] + mix_step(data1, secret, secretOffset, seed); - acc[1] = acc[1] + mix_step(data2, secret, secretOffset + 16, seed); - acc[0] = acc[0] ^ dataWords2[0].wrapping_add(dataWords2[1]); - acc[1] = acc[1] ^ dataWords1[0].wrapping_add(dataWords1[1]); +fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { + let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<[u64; 8]>() }>(); + for (n, stripe) in stripes.iter().enumerate() { + let stripe = unsafe { &*stripe.as_ptr().cast() }; + accumulate(acc, stripe, secret, n * 8); + } +} + +fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { + let secret_words = unsafe { + secret + .as_ptr() + .add(secret.len()) + .sub(mem::size_of::<[u64; 8]>()) + .cast::<[u64; 8]>() + .read_unaligned() + }; + + for i in 0..8 { + acc[i] ^= acc[i] >> 47; + acc[i] ^= secret_words[i]; + acc[i] = acc[i] * PRIME32_1; + } +} + +fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { + round_accumulate(acc, block, secret); + round_scramble(acc, secret); +} + +fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: &[u8]) { + let n_full_stripes: usize = (block.len() - 1) / 64; + for n in 0..n_full_stripes { + let stripe = unsafe { &*block.as_ptr().add(n * 64).cast::<[u64; 8]>() }; + accumulate(acc, stripe, secret, n * 8); + } + accumulate(acc, last_stripe, secret, secret.len() - 71); +} + +fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { + let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() }; + let mut result: u64 = init_value; + for i in 0..4 { + // 64-bit by 64-bit multiplication to 128-bit full result + let mul_result: u128 = (acc[i * 2] ^ secret_words[i * 2]).into_u128() + * (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); + result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); + } + avalanche(result) } trait Halves { @@ -252,7 +358,7 @@ impl Halves for u128 { trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); - fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); + // fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); } impl SliceBackport for [T] { @@ -264,13 +370,13 @@ impl SliceBackport for [T] { (head, tail) } - fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { - assert_ne!(N, 0); - let len = self.len() / N; - let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; - let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; - (head, tail) - } + // fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { + // assert_ne!(N, 0); + // let len = self.len() / N; + // let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; + // let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; + // (head, tail) + // } } #[cfg(test)] @@ -380,4 +486,21 @@ mod test { assert_eq!(hash, expected, "input was {input:?}"); } } + + #[test] + fn hash_64bit_240_plus_bytes_matches_c_implementation() { + let inputs: &[&[u8]] = &[&[0; 241], &[0; 242], &[0; 243], &[0; 244]]; + + let expected = [ + 0x5c5b_5d5d_40c5_9ce3, + 0xd619_7ac3_0eb7_e67b, + 0x6a04_3c8a_cf2e_dfe5, + 0x83cf_eefc_38e1_35af, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot(input); + assert_eq!(hash, expected, "input was {input:?}"); + } + } } From 56a91e71545349b82afea9c93de6d1592c066cc9 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 5 Jul 2024 13:30:19 -0400 Subject: [PATCH 046/166] xxh3 --- compare/src/lib.rs | 99 +++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/xxhash3_64.rs | 143 +++++++++++++++++++++++++++------------------ 3 files changed, 187 insertions(+), 57 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 5be3f3f73..597a7254e 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -203,6 +203,105 @@ mod xxhash64 { } } +mod xxhash3_64 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use std::hash::Hasher as _; + + use super::*; + + proptest! { + // #[test] + // fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { + // oneshot_same_as_one_chunk_impl(seed, &data)?; + // } + + // #[test] + // fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + // oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + // } + + // #[test] + // fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { + // oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + // } + + #[test] + fn oneshot(seed: u64, data: Vec) { + oneshot_impl(seed, &data)?; + } + + #[test] + fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } + + // #[test] + // fn streaming_one_chunk(seed: u64, data: Vec) { + // streaming_one_chunk_impl(seed, &data)?; + // } + + // #[test] + // fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + // streaming_one_chunk_impl(seed, &data[offset..])?; + // } + } + + // fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + // let oneshot = rust::XxHash64::oneshot(seed, data); + // let one_chunk = { + // let mut hasher = rust::XxHash64::with_seed(seed); + // hasher.write(data); + // hasher.finish() + // }; + + // prop_assert_eq!(oneshot, one_chunk); + // Ok(()) + // } + + // fn oneshot_same_as_many_chunks_impl( + // seed: u64, + // data: &[u8], + // chunks: &[Vec], + // ) -> TestCaseResult { + // let oneshot = rust::XxHash64::oneshot(seed, data); + // let many_chunks = { + // let mut hasher = rust::XxHash64::with_seed(seed); + // for chunk in chunks { + // hasher.write(chunk); + // } + // hasher.finish() + // }; + + // prop_assert_eq!(oneshot, many_chunks); + // Ok(()) + // } + + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_64::oneshot(data); + let rust = rust::XxHash3_64::oneshot(data); + + prop_assert_eq!(native, rust); + Ok(()) + } + + // fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + // let native = { + // let mut hasher = c::XxHash64::with_seed(seed); + // hasher.write(data); + // hasher.finish() + // }; + + // let rust = { + // let mut hasher = rust::XxHash64::with_seed(seed); + // hasher.write(data); + // hasher.finish() + // }; + + // prop_assert_eq!(native, rust); + // Ok(()) + // } +} + fn vec_and_index() -> impl Strategy, usize)> { prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| { let len = vec.len(); diff --git a/src/lib.rs b/src/lib.rs index 130976999..f060aeb8d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -85,6 +85,8 @@ pub use xxhash64::Hasher as XxHash64; pub mod xxhash3_64; +pub use xxhash3_64::XxHash3_64; + trait IntoU32 { fn into_u32(self) -> u32; } diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index ccadcd12f..e59b799b7 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -123,7 +123,7 @@ impl XxHash3_64 { let mut ff = input; let mut rr = input; - for i in (0..num_rounds).rev() { + for i in 0..num_rounds { let (ffc, ffn) = ff.split_first_chunk().unwrap(); let (rrn, rrc) = rr.split_last_chunk().unwrap(); @@ -141,7 +141,7 @@ impl XxHash3_64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let (head, _tail) = input.bp_as_chunks(); - let mut head = head.into_iter(); + let mut head = head.iter(); for (i, chunk) in head.by_ref().take(8).enumerate() { acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed)); @@ -291,7 +291,7 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { for i in 0..8 { acc[i] ^= acc[i] >> 47; acc[i] ^= secret_words[i]; - acc[i] = acc[i] * PRIME32_1; + acc[i] = acc[i].wrapping_mul(PRIME32_1); } } @@ -314,8 +314,11 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset let mut result: u64 = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result - let mul_result: u128 = (acc[i * 2] ^ secret_words[i * 2]).into_u128() - * (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); + let mul_result: u128 = { + let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128(); + let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); + a.wrapping_mul(b) + }; result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); } avalanche(result) @@ -381,21 +384,35 @@ impl SliceBackport for [T] { #[cfg(test)] mod test { + use std::array; + use super::*; + macro_rules! bytes { + ($($n: literal),* $(,)?) => { + &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]] + }; + } + + fn gen_bytes() -> [u8; N] { + // Picking 251 as it's a prime number, which will hopefully + // help avoid incidental power-of-two alignment. + array::from_fn(|i| (i % 251) as u8) + } + #[test] - fn hash_64bit_empty_matches_c_implementation() { + fn hash_empty() { let hash = XxHash3_64::oneshot(&[]); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } #[test] - fn hash_64bit_1_to_3_bytes_matches_c_implementation() { - let inputs: &[&[u8]] = &[&[0; 1], &[0; 2], &[0; 3]]; + fn hash_1_to_3_bytes() { + let inputs = bytes![1, 2, 3]; let expected = [ 0xc44b_dff4_074e_ecdb, - 0x3325_230e_1f28_5505, - 0xeb5d_658b_b22f_286b, + 0xd664_5fc3_051a_9457, + 0x5f42_99fc_161c_9cbb, ]; for (input, expected) in inputs.iter().zip(expected) { @@ -405,15 +422,15 @@ mod test { } #[test] - fn hash_64bit_4_to_8_bytes_matches_c_implementation() { - let inputs: &[&[u8]] = &[&[0; 4], &[0; 5], &[0; 6], &[0; 7], &[0; 8]]; + fn hash_4_to_8_bytes() { + let inputs = bytes![4, 5, 6, 7, 8]; let expected = [ - 0x48b2_c926_16fc_193d, - 0xe864_e589_3a27_3242, - 0x06df_7381_3892_fde7, - 0xa691_8fec_1ae6_5b70, - 0xc77b_3abb_6f87_acd9, + 0x60da_b036_a582_11f2, + 0xb075_753a_84ca_0fbe, + 0xa658_4d1d_9a6a_e704, + 0x0cd2_084a_6240_6b69, + 0x3a1c_2d7c_85af_88f8, ]; for (input, expected) in inputs.iter().zip(expected) { @@ -423,20 +440,18 @@ mod test { } #[test] - fn hash_64bit_9_to_16_bytes_matches_c_implementation() { - let inputs: &[&[u8]] = &[ - &[0; 9], &[0; 10], &[0; 11], &[0; 12], &[0; 13], &[0; 14], &[0; 15], &[0; 16], - ]; + fn hash_9_to_16_bytes() { + let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16]; let expected = [ - 0x3449_9569_f039_1857, - 0x4a9f_fcfb_2837_fbcc, - 0xae43_2800_a160_9968, - 0xc499_8f91_69c2_a4f0, - 0xdaef_f723_917d_5279, - 0xf146_5eb4_188c_41e7, - 0xba50_02d3_c3ed_6bc7, - 0xd0a6_6a65_c752_8968, + 0xe961_2598_145b_b9dc, + 0xab69_a08e_f83d_8f77, + 0x1cf3_96aa_4de6_198d, + 0x5ace_6a51_1c10_894b, + 0xb7a5_d8a8_309a_2cb9, + 0x4cf4_5c94_4a9a_2237, + 0x55ec_edc2_b87b_b042, + 0x8355_e3a6_f617_70db, ]; for (input, expected) in inputs.iter().zip(expected) { @@ -446,56 +461,70 @@ mod test { } #[test] - fn hash_64bit_17_to_128_bytes_matches_c_implementation() { - let inputs: &[&[u8]] = &[ - &[0; 17], &[0; 18], &[0; 19], &[0; 126], &[0; 127], &[0; 128], - ]; + fn hash_17_to_128_bytes() { + let lower_boundary = bytes![17, 18, 19]; + let chunk_boundary = bytes![31, 32, 33]; + let upper_boundary = bytes![126, 127, 128]; + + let inputs = lower_boundary + .iter() + .chain(chunk_boundary) + .chain(upper_boundary); let expected = [ - 0xc291_5ca0_df7a_d4c1, - 0xff78_21dd_f836_d020, - 0x8711_2824_6eb4_52b8, - 0x3133_805e_2401_c842, - 0x759e_ea08_c3b7_7cae, - 0x093c_29f2_7ecf_cf21, + // lower_boundary + 0x9ef3_41a9_9de3_7328, + 0xf691_2490_d4c0_eed5, + 0x60e7_2614_3cf5_0312, + // chunk_boundary + 0x4f36_db8e_4df3_78fd, + 0x3523_581f_e96e_4c05, + 0xe68c_56ba_8899_1e58, + // upper_boundary + 0x6c2a_9eb7_459c_dc61, + 0x120b_9787_f842_5f2f, + 0x85c6_174c_7ff4_c46b, ]; - for (input, expected) in inputs.iter().zip(expected) { + for (input, expected) in inputs.zip(expected) { let hash = XxHash3_64::oneshot(input); assert_eq!(hash, expected, "input was {input:?}"); } } #[test] - fn hash_64bit_129_to_240_bytes_matches_c_implementation() { - let inputs: &[&[u8]] = &[ - &[0; 129], &[0; 130], &[0; 131], &[0; 238], &[0; 239], &[0; 240], - ]; + fn hash_129_to_240_bytes() { + let lower_boundary = bytes![129, 130, 131]; + let upper_boundary = bytes![238, 239, 240]; + + let inputs = lower_boundary.iter().chain(upper_boundary); let expected = [ - 0x37f7_943e_b2f5_1359, - 0x9cc8_599a_c6e3_f7c5, - 0x9a3c_cf6f_257e_b24d, - 0xb980_bcaf_ae82_6b6a, - 0xf01b_b3be_cb26_4837, - 0x053f_0744_4f70_da08, + // lower_boundary + 0xec76_42b4_31ba_3e5a, + 0x4d32_24b1_0090_8a87, + 0xe57f_7ea6_741f_e3a0, + // upper_boundary + 0x3044_9a0b_4899_dee9, + 0x972b_14e3_c46f_214b, + 0x375a_384d_957f_e865, ]; - for (input, expected) in inputs.iter().zip(expected) { + for (input, expected) in inputs.zip(expected) { let hash = XxHash3_64::oneshot(input); assert_eq!(hash, expected, "input was {input:?}"); } } #[test] - fn hash_64bit_240_plus_bytes_matches_c_implementation() { - let inputs: &[&[u8]] = &[&[0; 241], &[0; 242], &[0; 243], &[0; 244]]; + fn hash_240_plus_bytes() { + let inputs = bytes![241, 242, 243, 244]; let expected = [ - 0x5c5b_5d5d_40c5_9ce3, - 0xd619_7ac3_0eb7_e67b, - 0x6a04_3c8a_cf2e_dfe5, - 0x83cf_eefc_38e1_35af, + 0x02e8_cd95_421c_6d02, + 0xddcb_33c4_9405_1832, + 0x8835_f952_9193_e3dc, + 0xbc17_c91e_c3cf_8d7f, ]; for (input, expected) in inputs.iter().zip(expected) { From 4c8b99e5e5dd33d020f99da5a275854c1be82bb2 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 5 Jul 2024 13:33:01 -0400 Subject: [PATCH 047/166] xxh3 --- src/xxhash3_64.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index e59b799b7..52be0f04c 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -87,8 +87,8 @@ impl XxHash3_64 { } 9..=16 => { - let input_first: u64 = unsafe { input.as_ptr().cast::().read_unaligned() }; - let input_last: u64 = unsafe { + let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; + let input_last = unsafe { input .as_ptr() .add(input.len()) @@ -99,12 +99,10 @@ impl XxHash3_64 { let secret_words = unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }; - let low: u64 = - ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; - let high: u64 = - ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; - let mul_result: u128 = low.into_u128().wrapping_mul(high.into_u128()); - let value: u64 = input + let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; + let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; + let mul_result = low.into_u128().wrapping_mul(high.into_u128()); + let value = input .len() .into_u64() .wrapping_add(low.swap_bytes()) @@ -115,7 +113,7 @@ impl XxHash3_64 { } 17..=128 => { - let mut acc: u64 = input.len().into_u64().wrapping_mul(PRIME64_1); + let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let num_rounds = ((input.len() - 1) >> 5) + 1; @@ -301,7 +299,7 @@ fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { } fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: &[u8]) { - let n_full_stripes: usize = (block.len() - 1) / 64; + let n_full_stripes = (block.len() - 1) / 64; for n in 0..n_full_stripes { let stripe = unsafe { &*block.as_ptr().add(n * 64).cast::<[u64; 8]>() }; accumulate(acc, stripe, secret, n * 8); @@ -311,10 +309,10 @@ fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() }; - let mut result: u64 = init_value; + let mut result = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result - let mul_result: u128 = { + let mul_result = { let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128(); let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); a.wrapping_mul(b) From 48683cacbd057b7d2179c22222edc95c4dc6684d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 5 Jul 2024 16:34:57 -0400 Subject: [PATCH 048/166] chunks --- src/xxhash3_64.rs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 52be0f04c..642140911 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -366,7 +366,7 @@ impl SliceBackport for [T] { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]) { assert_ne!(N, 0); let len = self.len() / N; - let (head, tail) = unsafe { self.split_at_unchecked(len) }; + let (head, tail) = unsafe { self.split_at_unchecked(len * N) }; let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) }; (head, tail) } @@ -530,4 +530,33 @@ mod test { assert_eq!(hash, expected, "input was {input:?}"); } } + + #[test] + fn backported_as_chunks() { + let x = [1, 2, 3, 4, 5]; + + let (a, b) = x.bp_as_chunks::<1>(); + assert_eq!(a, &[[1], [2], [3], [4], [5]]); + assert_eq!(b, &[]); + + let (a, b) = x.bp_as_chunks::<2>(); + assert_eq!(a, &[[1, 2], [3, 4]]); + assert_eq!(b, &[5]); + + let (a, b) = x.bp_as_chunks::<3>(); + assert_eq!(a, &[[1, 2, 3]]); + assert_eq!(b, &[4, 5]); + + let (a, b) = x.bp_as_chunks::<4>(); + assert_eq!(a, &[[1, 2, 3, 4]]); + assert_eq!(b, &[5]); + + let (a, b) = x.bp_as_chunks::<5>(); + assert_eq!(a, &[[1, 2, 3, 4, 5]]); + assert_eq!(b, &[]); + + let (a, b) = x.bp_as_chunks::<6>(); + assert_eq!(a, &[] as &[[i32; 6]]); + assert_eq!(b, &[1, 2, 3, 4, 5]); + } } From 2934e73d6ca9c9ddec0c9876df249804e3962a9f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 13:32:58 -0400 Subject: [PATCH 049/166] moar --- src/xxhash3_64.rs | 107 ++++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 36 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 642140911..9564bd4c6 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -32,8 +32,10 @@ const DEFAULT_SECRET: [u8; 192] = [ pub struct XxHash3_64; +type Stripe = [u64; 8]; + impl XxHash3_64 { - #[inline] + #[inline(never)] pub fn oneshot(input: &[u8]) -> u64 { let seed = 0; let secret = DEFAULT_SECRET; @@ -117,19 +119,15 @@ impl XxHash3_64 { let num_rounds = ((input.len() - 1) >> 5) + 1; - // TODO: use some chunks - let mut ff = input; - let mut rr = input; - - for i in 0..num_rounds { - let (ffc, ffn) = ff.split_first_chunk().unwrap(); - let (rrn, rrc) = rr.split_last_chunk().unwrap(); + let (fwd, _) = input.bp_as_chunks(); + let (_, bwd) = input.bp_as_rchunks(); - acc = acc.wrapping_add(mix_step(ffc, &secret, i * 32, seed)); - acc = acc.wrapping_add(mix_step(rrc, &secret, i * 32 + 16, seed)); + let fwd = fwd.iter(); + let bwd = bwd.iter().rev(); - ff = ffn; - rr = rrn; + for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) { + acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed)); + acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed)); } avalanche(acc) @@ -163,24 +161,23 @@ impl XxHash3_64 { PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, ]; - let secret_length = secret.len(); - let stripes_per_block = (secret_length - 64) / 8; + let stripes_per_block = (secret.len() - 64) / 8; let block_size = 64 * stripes_per_block; - let mut cc = input.chunks(block_size).fuse(); + let mut blocks = input.chunks(block_size).fuse(); + let last_block = blocks.next_back().unwrap(); - let last_block = cc.next_back().unwrap(); - - for block in cc { + for block in blocks { round(&mut acc, block, &secret); } let last_stripe = unsafe { - &*input + input .as_ptr() .add(input.len()) - .sub(mem::size_of::<[u64; 8]>()) - .cast::<[u64; 8]>() + .sub(mem::size_of::()) + .cast::() + .read_unaligned() }; last_round(&mut acc, last_block, last_stripe, &secret); @@ -212,6 +209,7 @@ fn avalanche_xxh64(mut x: u64) -> u64 { x } +#[inline] fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 { // TODO: Should these casts / reads happen outside this function? let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; @@ -252,9 +250,11 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> // } // Step 2-1. Process stripes in the block -fn accumulate(acc: &mut [u64; 8], stripe: &[u64; 8], secret: &[u8], secret_offset: usize) { +#[inline] +fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) { // TODO: Should these casts / reads happen outside this function? - let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() }; + let secret = &secret[secret_offset..]; + let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() }; for i in 0..8 { let value = stripe[i] ^ secret_words[i]; @@ -268,14 +268,16 @@ fn accumulate(acc: &mut [u64; 8], stripe: &[u64; 8], secret: &[u8], secret_offse } } +#[inline] fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { - let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::<[u64; 8]>() }>(); + let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::() }>(); for (n, stripe) in stripes.iter().enumerate() { - let stripe = unsafe { &*stripe.as_ptr().cast() }; + let stripe = unsafe { stripe.as_ptr().cast::().read_unaligned() }; accumulate(acc, stripe, secret, n * 8); } } +#[inline] fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { let secret_words = unsafe { secret @@ -293,22 +295,25 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { } } +#[inline] fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { round_accumulate(acc, block, secret); round_scramble(acc, secret); } -fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u64; 8], secret: &[u8]) { +#[inline] +fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u8]) { let n_full_stripes = (block.len() - 1) / 64; for n in 0..n_full_stripes { - let stripe = unsafe { &*block.as_ptr().add(n * 64).cast::<[u64; 8]>() }; + let stripe = unsafe { block.as_ptr().add(n * 64).cast::().read_unaligned() }; accumulate(acc, stripe, secret, n * 8); } accumulate(acc, last_stripe, secret, secret.len() - 71); } +#[inline] fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { - let secret_words = unsafe { &*secret.as_ptr().add(secret_offset).cast::<[u64; 8]>() }; + let secret_words = unsafe { secret.as_ptr().add(secret_offset).cast::<[u64; 8]>().read_unaligned() }; let mut result = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result @@ -359,7 +364,7 @@ impl Halves for u128 { trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); - // fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); } impl SliceBackport for [T] { @@ -371,13 +376,13 @@ impl SliceBackport for [T] { (head, tail) } - // fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { - // assert_ne!(N, 0); - // let len = self.len() / N; - // let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; - // let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; - // (head, tail) - // } + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { + assert_ne!(N, 0); + let len = self.len() / N; + let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; + let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; + (head, tail) + } } #[cfg(test)] @@ -407,6 +412,7 @@ mod test { #[test] fn hash_1_to_3_bytes() { let inputs = bytes![1, 2, 3]; + let expected = [ 0xc44b_dff4_074e_ecdb, 0xd664_5fc3_051a_9457, @@ -559,4 +565,33 @@ mod test { assert_eq!(a, &[] as &[[i32; 6]]); assert_eq!(b, &[1, 2, 3, 4, 5]); } + + #[test] + fn backported_as_rchunks() { + let x = [1, 2, 3, 4, 5]; + + let (a, b) = x.bp_as_rchunks::<1>(); + assert_eq!(a, &[]); + assert_eq!(b, &[[1], [2], [3], [4], [5]]); + + let (a, b) = x.bp_as_rchunks::<2>(); + assert_eq!(a, &[1]); + assert_eq!(b, &[[2, 3], [4, 5]]); + + let (a, b) = x.bp_as_rchunks::<3>(); + assert_eq!(a, &[1, 2]); + assert_eq!(b, &[[3, 4, 5]]); + + let (a, b) = x.bp_as_rchunks::<4>(); + assert_eq!(a, &[1]); + assert_eq!(b, &[[2, 3, 4, 5]]); + + let (a, b) = x.bp_as_rchunks::<5>(); + assert_eq!(a, &[]); + assert_eq!(b, &[[1, 2, 3, 4, 5]]); + + let (a, b) = x.bp_as_rchunks::<6>(); + assert_eq!(a, &[1, 2, 3, 4, 5]); + assert_eq!(b, &[] as &[[i32; 6]]); + } } From 7b233ea7694b774448cb1b0d579655bfdde18d23 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 14:24:56 -0400 Subject: [PATCH 050/166] fmt --- src/xxhash3_64.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 9564bd4c6..0c77f7f11 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -259,12 +259,11 @@ fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: for i in 0..8 { let value = stripe[i] ^ secret_words[i]; acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]); - acc[i] = acc[i].wrapping_add( - value - .lower_half() - .into_u64() - .wrapping_mul(value.upper_half().into_u64()), - ); + acc[i] = acc[i].wrapping_add({ + let a = value.lower_half().into_u64(); + let b = value.upper_half().into_u64(); + a.wrapping_mul(b) + }); } } From 4116f6b901a207ac6b05c8f356977e5544692371 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 14:55:34 -0400 Subject: [PATCH 051/166] recover --- src/xxhash3_64.rs | 55 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 0c77f7f11..340f6bb76 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -268,11 +268,34 @@ fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: } #[inline] -fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { +fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + for i in 0..8 { + // TODO: Should these casts / reads happen outside this function? + let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + acc[i] = acc[i].wrapping_add({ + let a = value.lower_half().into_u64(); + let b = value.upper_half().into_u64(); + a.wrapping_mul(b) + }); + } +} + +#[inline] +fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) { let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::() }>(); - for (n, stripe) in stripes.iter().enumerate() { - let stripe = unsafe { stripe.as_ptr().cast::().read_unaligned() }; - accumulate(acc, stripe, secret, n * 8); + + let secrets = iter::from_fn(|| { + let (c, _) = secret.split_first_chunk()?; + secret = &secret[8..]; + Some(c) + }); + + for (stripe, secret) in stripes.iter().zip(secrets) { + accumulate_hot(acc, stripe, secret); } } @@ -312,7 +335,13 @@ fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u #[inline] fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { - let secret_words = unsafe { secret.as_ptr().add(secret_offset).cast::<[u64; 8]>().read_unaligned() }; + let secret_words = unsafe { + secret + .as_ptr() + .add(secret_offset) + .cast::<[u64; 8]>() + .read_unaligned() + }; let mut result = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result @@ -420,7 +449,7 @@ mod test { for (input, expected) in inputs.iter().zip(expected) { let hash = XxHash3_64::oneshot(input); - assert_eq!(hash, expected, "input was {input:?}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); } } @@ -438,7 +467,7 @@ mod test { for (input, expected) in inputs.iter().zip(expected) { let hash = XxHash3_64::oneshot(input); - assert_eq!(hash, expected, "input was {input:?}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); } } @@ -459,7 +488,7 @@ mod test { for (input, expected) in inputs.iter().zip(expected) { let hash = XxHash3_64::oneshot(input); - assert_eq!(hash, expected, "input was {input:?}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); } } @@ -491,7 +520,7 @@ mod test { for (input, expected) in inputs.zip(expected) { let hash = XxHash3_64::oneshot(input); - assert_eq!(hash, expected, "input was {input:?}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); } } @@ -515,24 +544,26 @@ mod test { for (input, expected) in inputs.zip(expected) { let hash = XxHash3_64::oneshot(input); - assert_eq!(hash, expected, "input was {input:?}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn hash_240_plus_bytes() { - let inputs = bytes![241, 242, 243, 244]; + let inputs = bytes![241, 242, 243, 244, 1024, 10240]; let expected = [ 0x02e8_cd95_421c_6d02, 0xddcb_33c4_9405_1832, 0x8835_f952_9193_e3dc, 0xbc17_c91e_c3cf_8d7f, + 0xe5d7_8baf_a45b_2aa5, + 0xbcd6_3266_df6e_2244, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = XxHash3_64::oneshot(input); - assert_eq!(hash, expected, "input was {input:?}"); + assert_eq!(hash, expected, "input was {} bytes", input.len()); } } From 03683aaafcf44f7eb80864601fef6fba3b95ac9a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 15:07:53 -0400 Subject: [PATCH 052/166] little helper --- Cargo.toml | 1 + asmasm/Cargo.toml | 7 +++++++ asmasm/src/main.rs | 12 ++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 asmasm/Cargo.toml create mode 100644 asmasm/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 99b104214..899e3b220 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [workspace] members = [ + "asmasm", "compare", "renu-sum", "xx_hash-sys", diff --git a/asmasm/Cargo.toml b/asmasm/Cargo.toml new file mode 100644 index 000000000..7134d8f83 --- /dev/null +++ b/asmasm/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "asmasm" +version = "0.1.0" +edition = "2021" + +[dependencies] +xx-renu = { path = ".." } diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs new file mode 100644 index 000000000..7e130d879 --- /dev/null +++ b/asmasm/src/main.rs @@ -0,0 +1,12 @@ +use std::{hint::black_box, time::Instant}; +use xx_renu::xxhash3_64::XxHash3_64; + +fn main() { + let filename = std::env::args().nth(1).expect("filename"); + let file = std::fs::read(filename).expect("read"); + let start = Instant::now(); + let hash = XxHash3_64::oneshot(&file); + let elapsed = start.elapsed(); + black_box(hash); + eprintln!("{elapsed:?}"); +} From a2455467bec1c89d28b0e8cec0c2139948044fc7 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 15:08:11 -0400 Subject: [PATCH 053/166] bencha --- compare/benches/benchmark.rs | 38 +++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 2bce74e6c..bf0c0b821 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -166,5 +166,41 @@ fn half_sizes(max: usize) -> impl Iterator { iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) }) } +mod xxhash3_64 { + use super::*; + + fn oneshot(c: &mut Criterion) { + let (seed, data) = gen_data(BIG_DATA_SIZE); + let mut g = c.benchmark_group("xxhash3_64/oneshot"); + + for size in [data.len()] { + //half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE)} { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("impl-c/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = c::XxHash3_64::oneshot(data); + black_box(hash); + }) + }); + + let id = format!("impl-rust/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = rust::XxHash3_64::oneshot(data); + black_box(hash); + }) + }); + } + + g.finish(); + } + + criterion_group!(benches, oneshot); +} + criterion_group!(benches, tiny_data, oneshot, streaming); -criterion_main!(benches); + +criterion_main!(benches, xxhash3_64::benches); From 460904a3992a14412dc86f6d6bdfa8ba9a612884 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 15:08:54 -0400 Subject: [PATCH 054/166] moar --- src/xxhash3_64.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 340f6bb76..c616ff174 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,6 +1,6 @@ -#![allow(missing_docs, dead_code, non_snake_case)] +#![allow(missing_docs)] -use core::{mem, slice}; +use core::{mem, slice, iter}; use crate::{IntoU128, IntoU32, IntoU64}; From 59d71b4e97119bcd325145f543a31b5b9fb1147e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 9 Jul 2024 16:07:48 -0400 Subject: [PATCH 055/166] moresafe --- src/xxhash3_64.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index c616ff174..ffc7a6aee 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -301,19 +301,14 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) { #[inline] fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - let secret_words = unsafe { - secret - .as_ptr() - .add(secret.len()) - .sub(mem::size_of::<[u64; 8]>()) - .cast::<[u64; 8]>() - .read_unaligned() - }; - - for i in 0..8 { - acc[i] ^= acc[i] >> 47; - acc[i] ^= secret_words[i]; - acc[i] = acc[i].wrapping_mul(PRIME32_1); + let last = secret.last_chunk::<{mem::size_of::<[u8; 64]>()}>().unwrap(); + let (last, _) = last.bp_as_chunks(); + let last = last.iter().copied().map(u64::from_ne_bytes); + + for (acc, secret) in acc.iter_mut().zip(last) { + *acc ^= *acc >> 47; + *acc ^= secret; + *acc = acc.wrapping_mul(PRIME32_1); } } From 73ad587210f719ab6647851540f19f32368f5422 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 10 Jul 2024 11:10:39 -0400 Subject: [PATCH 056/166] reorg --- src/xxhash3_64.rs | 430 ++++++++++++++++++++++++---------------------- 1 file changed, 225 insertions(+), 205 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index ffc7a6aee..b0a13d9bd 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,6 +1,6 @@ #![allow(missing_docs)] -use core::{mem, slice, iter}; +use core::{mem, slice}; use crate::{IntoU128, IntoU32, IntoU64}; @@ -39,174 +39,141 @@ impl XxHash3_64 { pub fn oneshot(input: &[u8]) -> u64 { let seed = 0; let secret = DEFAULT_SECRET; + let secret = &secret[..]; match input.len() { - 0 => { - let secret_words = - unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }; - avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) - } - - 1..=3 => { - let input_length = input.len() as u8; // OK as we checked that the length fits - - let combined = input[input.len() - 1].into_u32() - | input_length.into_u32() << 8 - | input[0].into_u32() << 16 - | input[input.len() >> 1].into_u32() << 24; - - let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() }; - let value = - ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64(); - - // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result." - avalanche_xxh64(value) - } - - 4..=8 => { - let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; - let input_last = unsafe { - input - .as_ptr() - .add(input.len()) - .sub(mem::size_of::()) - .cast::() - .read_unaligned() - }; - let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); - - let secret_words = - unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }; - let combined = input_last.into_u64() | (input_first.into_u64() << 32); - - let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined; - value ^= value.rotate_left(49) ^ value.rotate_left(24); - value = value.wrapping_mul(PRIME_MX2); - value ^= (value >> 35).wrapping_add(input.len().into_u64()); - value = value.wrapping_mul(PRIME_MX2); - value ^= value >> 28; - value - } - - 9..=16 => { - let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; - let input_last = unsafe { - input - .as_ptr() - .add(input.len()) - .sub(mem::size_of::()) - .cast::() - .read_unaligned() - }; - - let secret_words = - unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }; - let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; - let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; - let mul_result = low.into_u128().wrapping_mul(high.into_u128()); - let value = input - .len() - .into_u64() - .wrapping_add(low.swap_bytes()) - .wrapping_add(high) - .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); - - avalanche(value) - } - - 17..=128 => { - let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - - let num_rounds = ((input.len() - 1) >> 5) + 1; - - let (fwd, _) = input.bp_as_chunks(); - let (_, bwd) = input.bp_as_rchunks(); - - let fwd = fwd.iter(); - let bwd = bwd.iter().rev(); - - for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) { - acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed)); - acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed)); - } - - avalanche(acc) - } - - 129..=240 => { - let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - - let (head, _tail) = input.bp_as_chunks(); - let mut head = head.iter(); - - for (i, chunk) in head.by_ref().take(8).enumerate() { - acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed)); - } - - acc = avalanche(acc); - - for (i, chunk) in head.enumerate() { - acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed)); - } - - acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed)); - - avalanche(acc) - } - - _ => { - #[rustfmt::skip] - let mut acc = [ - PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, - PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, - ]; - - let stripes_per_block = (secret.len() - 64) / 8; - let block_size = 64 * stripes_per_block; - - let mut blocks = input.chunks(block_size).fuse(); - let last_block = blocks.next_back().unwrap(); - - for block in blocks { - round(&mut acc, block, &secret); - } - - let last_stripe = unsafe { - input - .as_ptr() - .add(input.len()) - .sub(mem::size_of::()) - .cast::() - .read_unaligned() - }; - - last_round(&mut acc, last_block, last_stripe, &secret); - - final_merge( - &mut acc, - input.len().into_u64().wrapping_mul(PRIME64_1), - &secret, - 11, - ) - } + 0 => impl_0_bytes(secret, seed), + + 1..=3 => impl_1_to_3_bytes(secret, seed, input), + + 4..=8 => impl_4_to_8_bytes(secret, seed, input), + + 9..=16 => impl_9_to_16_bytes(secret, seed, input), + + 17..=128 => impl_17_to_128_bytes(secret, seed, input), + + 129..=240 => impl_129_to_240_bytes(secret, seed, input), + + _ => impl_241_plus_bytes(secret, input), } } } -fn avalanche(mut x: u64) -> u64 { - x ^= x >> 37; - x = x.wrapping_mul(PRIME_MX1); - x ^= x >> 32; - x +#[inline] +fn impl_0_bytes(secret: &[u8], seed: u64) -> u64 { + let secret_words = unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }; + avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) } -fn avalanche_xxh64(mut x: u64) -> u64 { - x ^= x >> 33; - x = x.wrapping_mul(PRIME64_2); - x ^= x >> 29; - x = x.wrapping_mul(PRIME64_3); - x ^= x >> 32; - x +#[inline] +fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { + let input_length = input.len() as u8; // OK as we checked that the length fits + + let combined = input[input.len() - 1].into_u32() + | input_length.into_u32() << 8 + | input[0].into_u32() << 16 + | input[input.len() >> 1].into_u32() << 24; + + let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() }; + + let value = ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64(); + + // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result." + avalanche_xxh64(value) +} + +#[inline] +fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { + let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; + let input_last = unsafe { + input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::()) + .cast::() + .read_unaligned() + }; + + let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); + let secret_words = unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }; + + let combined = input_last.into_u64() | (input_first.into_u64() << 32); + + let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined; + value ^= value.rotate_left(49) ^ value.rotate_left(24); + value = value.wrapping_mul(PRIME_MX2); + value ^= (value >> 35).wrapping_add(input.len().into_u64()); + value = value.wrapping_mul(PRIME_MX2); + value ^= value >> 28; + value +} + +#[inline] +fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { + let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; + let input_last = unsafe { + input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::()) + .cast::() + .read_unaligned() + }; + + let secret_words = unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }; + let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; + let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; + let mul_result = low.into_u128().wrapping_mul(high.into_u128()); + let value = input + .len() + .into_u64() + .wrapping_add(low.swap_bytes()) + .wrapping_add(high) + .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); + + avalanche(value) +} + +#[inline] +fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { + let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); + + let num_rounds = ((input.len() - 1) >> 5) + 1; + + let (fwd, _) = input.bp_as_chunks(); + let (_, bwd) = input.bp_as_rchunks(); + + let fwd = fwd.iter(); + let bwd = bwd.iter().rev(); + + for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) { + acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed)); + acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed)); + } + + avalanche(acc) +} + +#[inline] +fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { + let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); + + let (head, _tail) = input.bp_as_chunks(); + let mut head = head.iter(); + + for (i, chunk) in head.by_ref().take(8).enumerate() { + acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed)); + } + + acc = avalanche(acc); + + for (i, chunk) in head.enumerate() { + acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed)); + } + + acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed)); + + avalanche(acc) } #[inline] @@ -249,50 +216,55 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> // acc[1] = acc[1] ^ data_words1[0].wrapping_add(data_words1[1]); // } -// Step 2-1. Process stripes in the block -#[inline] -fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) { - // TODO: Should these casts / reads happen outside this function? - let secret = &secret[secret_offset..]; - let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() }; +#[rustfmt::skip] +const INITIAL_ACCUMULATORS: [u64; 8] = [ + PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, + PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, +]; - for i in 0..8 { - let value = stripe[i] ^ secret_words[i]; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]); - acc[i] = acc[i].wrapping_add({ - let a = value.lower_half().into_u64(); - let b = value.upper_half().into_u64(); - a.wrapping_mul(b) - }); +#[inline(never)] +fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { + let mut acc = INITIAL_ACCUMULATORS; + + let stripes_per_block = (secret.len() - 64) / 8; + let block_size = 64 * stripes_per_block; + + let mut blocks = input.chunks(block_size).fuse(); + let last_block = blocks.next_back().unwrap(); + let last_stripe = unsafe { + input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::()) + .cast::() + .read_unaligned() + }; + + for block in blocks { + round(&mut acc, block, secret); } + + last_round(&mut acc, last_block, last_stripe, secret); + + final_merge( + &mut acc, + input.len().into_u64().wrapping_mul(PRIME64_1), + secret, + 11, + ) } #[inline] -fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - for i in 0..8 { - // TODO: Should these casts / reads happen outside this function? - let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; - - let value = stripe ^ secret; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - acc[i] = acc[i].wrapping_add({ - let a = value.lower_half().into_u64(); - let b = value.upper_half().into_u64(); - a.wrapping_mul(b) - }); - } +fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { + round_accumulate(acc, block, secret); + round_scramble(acc, secret); } #[inline] -fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) { +fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::() }>(); - - let secrets = iter::from_fn(|| { - let (c, _) = secret.split_first_chunk()?; - secret = &secret[8..]; - Some(c) - }); + let secrets = + (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); for (stripe, secret) in stripes.iter().zip(secrets) { accumulate_hot(acc, stripe, secret); @@ -301,7 +273,9 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], mut secret: &[u8]) { #[inline] fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - let last = secret.last_chunk::<{mem::size_of::<[u8; 64]>()}>().unwrap(); + let last = secret + .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() + .unwrap(); let (last, _) = last.bp_as_chunks(); let last = last.iter().copied().map(u64::from_ne_bytes); @@ -312,13 +286,7 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { } } -#[inline] -fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { - round_accumulate(acc, block, secret); - round_scramble(acc, secret); -} - -#[inline] +#[inline(never)] fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u8]) { let n_full_stripes = (block.len() - 1) / 64; for n in 0..n_full_stripes { @@ -328,7 +296,7 @@ fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u accumulate(acc, last_stripe, secret, secret.len() - 71); } -#[inline] +#[inline(never)] fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { let secret_words = unsafe { secret @@ -350,6 +318,58 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset avalanche(result) } +#[inline(never)] +fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) { + // TODO: Should these casts / reads happen outside this function? + let secret = &secret[secret_offset..]; + let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() }; + + for i in 0..8 { + let value = stripe[i] ^ secret_words[i]; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]); + acc[i] = acc[i].wrapping_add({ + let a = value.lower_half().into_u64(); + let b = value.upper_half().into_u64(); + a.wrapping_mul(b) + }); + } +} + +#[inline] +fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + for i in 0..8 { + // TODO: Should these casts / reads happen outside this function? + let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + acc[i] = acc[i].wrapping_add({ + let a = value.lower_half().into_u64(); + let b = value.upper_half().into_u64(); + a.wrapping_mul(b) + }); + } +} + +#[inline] +fn avalanche(mut x: u64) -> u64 { + x ^= x >> 37; + x = x.wrapping_mul(PRIME_MX1); + x ^= x >> 32; + x +} + +#[inline] +fn avalanche_xxh64(mut x: u64) -> u64 { + x ^= x >> 33; + x = x.wrapping_mul(PRIME64_2); + x ^= x >> 29; + x = x.wrapping_mul(PRIME64_3); + x ^= x >> 32; + x +} + trait Halves { type Output; From b26aebc353f20cc5402e7d885ebd8fb7dc7f81fc Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 10 Jul 2024 11:43:21 -0400 Subject: [PATCH 057/166] reorg --- src/xxhash3_64.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index b0a13d9bd..19c38fbb9 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -38,8 +38,7 @@ impl XxHash3_64 { #[inline(never)] pub fn oneshot(input: &[u8]) -> u64 { let seed = 0; - let secret = DEFAULT_SECRET; - let secret = &secret[..]; + let secret = &DEFAULT_SECRET; match input.len() { 0 => impl_0_bytes(secret, seed), From 485428382e85b27777a66637f845c3f977e883f3 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 10 Jul 2024 13:42:56 -0400 Subject: [PATCH 058/166] faster --- src/xxhash3_64.rs | 96 ++++++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 19c38fbb9..1a1b9f592 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -221,7 +221,7 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, ]; -#[inline(never)] +#[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { let mut acc = INITIAL_ACCUMULATORS; @@ -230,13 +230,12 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { let mut blocks = input.chunks(block_size).fuse(); let last_block = blocks.next_back().unwrap(); - let last_stripe = unsafe { - input + let last_stripe: &[u8; 64] = unsafe { + &*input .as_ptr() .add(input.len()) - .sub(mem::size_of::()) - .cast::() - .read_unaligned() + .sub(mem::size_of::<[u8; 64]>()) + .cast() }; for block in blocks { @@ -255,6 +254,8 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { #[inline] fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { +// unsafe { core::arch::aarch64::_prefetch(block.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) }; + round_accumulate(acc, block, secret); round_scramble(acc, secret); } @@ -266,7 +267,9 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); for (stripe, secret) in stripes.iter().zip(secrets) { - accumulate_hot(acc, stripe, secret); + // todo cast to bigger to specify how much to fetch? + unsafe { core::arch::aarch64::_prefetch(stripe.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) }; + accumulate(acc, stripe, secret); } } @@ -285,17 +288,27 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { } } -#[inline(never)] -fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: Stripe, secret: &[u8]) { - let n_full_stripes = (block.len() - 1) / 64; - for n in 0..n_full_stripes { - let stripe = unsafe { block.as_ptr().add(n * 64).cast::().read_unaligned() }; - accumulate(acc, stripe, secret, n * 8); +#[inline] +fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { + // Accumulation steps are run for the stripes in the last block, + // except for the last stripe (whether it is full or not) + let stripes = match block.bp_as_chunks() { + ([stripes @ .., _last], []) => stripes, + (stripes, _last) => stripes, + }; + let secrets = + (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); + + for (stripe, secret) in stripes.iter().zip(secrets) { + accumulate(acc, stripe, secret); } - accumulate(acc, last_stripe, secret, secret.len() - 71); + + let q = &secret[secret.len() - 71..]; + let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() }; + accumulate(acc, last_stripe, q); } -#[inline(never)] +#[inline] fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { let secret_words = unsafe { secret @@ -317,25 +330,8 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset avalanche(result) } -#[inline(never)] -fn accumulate(acc: &mut [u64; 8], stripe: Stripe, secret: &[u8], secret_offset: usize) { - // TODO: Should these casts / reads happen outside this function? - let secret = &secret[secret_offset..]; - let secret_words = unsafe { secret.as_ptr().cast::<[u64; 8]>().read_unaligned() }; - - for i in 0..8 { - let value = stripe[i] ^ secret_words[i]; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe[i]); - acc[i] = acc[i].wrapping_add({ - let a = value.lower_half().into_u64(); - let b = value.upper_half().into_u64(); - a.wrapping_mul(b) - }); - } -} - #[inline] -fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { +fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { for i in 0..8 { // TODO: Should these casts / reads happen outside this function? let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; @@ -343,14 +339,38 @@ fn accumulate_hot(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let value = stripe ^ secret; acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - acc[i] = acc[i].wrapping_add({ - let a = value.lower_half().into_u64(); - let b = value.upper_half().into_u64(); - a.wrapping_mul(b) - }); + + acc[i] = multiply_and_add(value, value >> 32, acc[i]); } } +#[inline] +#[cfg(not(target_arch = "aarch64"))] +fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + acc.wrapping_add({ + let a = (lhs as u32).into_u64(); + let b = (rhs as u32).into_u64(); + a.wrapping_mul(b) + }) +} + +#[inline] +// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 +#[cfg(target_arch = "aarch64")] +fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + let res; + + unsafe { asm!( + "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", + lhs = in(reg) lhs, + rhs = in(reg) rhs, + acc = in(reg) acc, + res = out(reg) res, + ) } + + res +} + #[inline] fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; From 273e81fa3e7ad60f9c2dc0f8ce1ec8075f7a40da Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 10 Jul 2024 13:47:39 -0400 Subject: [PATCH 059/166] faster --- src/xxhash3_64.rs | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 1a1b9f592..024981860 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,6 +1,6 @@ #![allow(missing_docs)] -use core::{mem, slice}; +use core::{mem, slice, arch::asm}; use crate::{IntoU128, IntoU32, IntoU64}; @@ -254,8 +254,6 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { #[inline] fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { -// unsafe { core::arch::aarch64::_prefetch(block.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) }; - round_accumulate(acc, block, secret); round_scramble(acc, secret); } @@ -267,8 +265,6 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); for (stripe, secret) in stripes.iter().zip(secrets) { - // todo cast to bigger to specify how much to fetch? - unsafe { core::arch::aarch64::_prefetch(stripe.as_ptr().cast(), _PREFETCH_READ, _PREFETCH_LOCALITY3) }; accumulate(acc, stripe, secret); } } @@ -339,14 +335,13 @@ fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let value = stripe ^ secret; acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - - acc[i] = multiply_and_add(value, value >> 32, acc[i]); + acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); } } #[inline] #[cfg(not(target_arch = "aarch64"))] -fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { +fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { acc.wrapping_add({ let a = (lhs as u32).into_u64(); let b = (rhs as u32).into_u64(); @@ -357,16 +352,18 @@ fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 #[cfg(target_arch = "aarch64")] -fn multiply_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { +fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { let res; - unsafe { asm!( - "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", - lhs = in(reg) lhs, - rhs = in(reg) rhs, - acc = in(reg) acc, - res = out(reg) res, - ) } + unsafe { + asm!( + "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", + lhs = in(reg) lhs, + rhs = in(reg) rhs, + acc = in(reg) acc, + res = out(reg) res, + ) + } res } From 010cc9667d1d50a08a823f25d6923699f026f3d5 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 10 Jul 2024 14:30:14 -0400 Subject: [PATCH 060/166] asm compare for C --- asmasm/Cargo.toml | 1 + asmasm/src/main.rs | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/asmasm/Cargo.toml b/asmasm/Cargo.toml index 7134d8f83..511b782de 100644 --- a/asmasm/Cargo.toml +++ b/asmasm/Cargo.toml @@ -5,3 +5,4 @@ edition = "2021" [dependencies] xx-renu = { path = ".." } +xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs index 7e130d879..e515bcdfc 100644 --- a/asmasm/src/main.rs +++ b/asmasm/src/main.rs @@ -1,12 +1,25 @@ use std::{hint::black_box, time::Instant}; +use xx_hash_sys::XxHash3_64 as C; use xx_renu::xxhash3_64::XxHash3_64; fn main() { let filename = std::env::args().nth(1).expect("filename"); + let use_c = std::env::args() + .nth(2) + .map_or(false, |a| a.eq_ignore_ascii_case("C")); let file = std::fs::read(filename).expect("read"); - let start = Instant::now(); - let hash = XxHash3_64::oneshot(&file); - let elapsed = start.elapsed(); - black_box(hash); - eprintln!("{elapsed:?}"); + + if use_c { + let start = Instant::now(); + let hash = C::oneshot(&file); + let elapsed = start.elapsed(); + black_box(hash); + eprintln!("C {elapsed:?}"); + } else { + let start = Instant::now(); + let hash = XxHash3_64::oneshot(&file); + let elapsed = start.elapsed(); + black_box(hash); + eprintln!("Rust {elapsed:?}"); + } } From baa97991d527f1c3a369faf9dc7788f55d122a4e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 10 Jul 2024 14:43:02 -0400 Subject: [PATCH 061/166] clean --- src/xxhash3_64.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 024981860..a39043574 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,6 +1,6 @@ #![allow(missing_docs)] -use core::{mem, slice, arch::asm}; +use core::{mem, slice}; use crate::{IntoU128, IntoU32, IntoU64}; @@ -146,8 +146,8 @@ fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { let bwd = bwd.iter().rev(); for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) { - acc = acc.wrapping_add(mix_step(fwd_chunk, &secret, i * 32, seed)); - acc = acc.wrapping_add(mix_step(bwd_chunk, &secret, i * 32 + 16, seed)); + acc = acc.wrapping_add(mix_step(fwd_chunk, secret, i * 32, seed)); + acc = acc.wrapping_add(mix_step(bwd_chunk, secret, i * 32 + 16, seed)); } avalanche(acc) @@ -161,16 +161,16 @@ fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { let mut head = head.iter(); for (i, chunk) in head.by_ref().take(8).enumerate() { - acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16, seed)); + acc = acc.wrapping_add(mix_step(chunk, secret, i * 16, seed)); } acc = avalanche(acc); for (i, chunk) in head.enumerate() { - acc = acc.wrapping_add(mix_step(chunk, &secret, i * 16 + 3, seed)); + acc = acc.wrapping_add(mix_step(chunk, secret, i * 16 + 3, seed)); } - acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), &secret, 119, seed)); + acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), secret, 119, seed)); avalanche(acc) } @@ -342,17 +342,19 @@ fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { #[inline] #[cfg(not(target_arch = "aarch64"))] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - acc.wrapping_add({ - let a = (lhs as u32).into_u64(); - let b = (rhs as u32).into_u64(); - a.wrapping_mul(b) - }) + let lhs = (lhs as u32).into_u64(); + let rhs = (rhs as u32).into_u64(); + + let product = lhs.wrapping_mul(rhs); + acc.wrapping_add(product) } #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 #[cfg(target_arch = "aarch64")] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + use core::arch::asm; + let res; unsafe { From d680cd95440cd5d405c8c380033d93b37fa11923 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 11 Jul 2024 12:19:55 -0400 Subject: [PATCH 062/166] doc cfgs --- Cargo.toml | 3 +++ src/lib.rs | 9 +++++++++ src/xxhash32.rs | 3 +++ src/xxhash64.rs | 3 +++ 4 files changed, 18 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 899e3b220..57d98e056 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,3 +27,6 @@ serde = { version = "1.0.0", optional = true, default-features = false, features [dev-dependencies] serde_json = "1.0.117" + +[package.metadata.docs.rs] +all-features = true diff --git a/src/lib.rs b/src/lib.rs index f060aeb8d..597fb5d48 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,24 +67,33 @@ #![no_std] #![deny(rust_2018_idioms)] #![deny(missing_docs)] +#![cfg_attr(docsrs, feature(doc_cfg))] #[cfg(any(doc, test))] extern crate std; #[cfg(feature = "xxhash32")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] pub mod xxhash32; #[cfg(feature = "xxhash32")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] pub use xxhash32::Hasher as XxHash32; #[cfg(feature = "xxhash64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] pub mod xxhash64; #[cfg(feature = "xxhash64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] pub use xxhash64::Hasher as XxHash64; +#[cfg(feature = "xxhash3_64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub mod xxhash3_64; +#[cfg(feature = "xxhash3_64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub use xxhash3_64::XxHash3_64; trait IntoU32 { diff --git a/src/xxhash32.rs b/src/xxhash32.rs index d5f8a272e..097a4c129 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -497,6 +497,7 @@ mod test { } #[cfg(feature = "random")] +#[cfg_attr(docsrs, doc(cfg(feature = "random")))] mod random_impl { use super::*; @@ -540,9 +541,11 @@ mod random_impl { } #[cfg(feature = "random")] +#[cfg_attr(docsrs, doc(cfg(feature = "random")))] pub use random_impl::*; #[cfg(feature = "serialize")] +#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))] mod serialize_impl { use serde::{Deserialize, Serialize}; diff --git a/src/xxhash64.rs b/src/xxhash64.rs index fcfa55642..f488e429f 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -486,6 +486,7 @@ mod test { } #[cfg(feature = "random")] +#[cfg_attr(docsrs, doc(cfg(feature = "random")))] mod random_impl { use super::*; @@ -529,9 +530,11 @@ mod random_impl { } #[cfg(feature = "random")] +#[cfg_attr(docsrs, doc(cfg(feature = "random")))] pub use random_impl::*; #[cfg(feature = "serialize")] +#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))] mod serialize_impl { use serde::{Deserialize, Serialize}; From fbe55dd268b4f5656d33800f1db17bea695de288 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 11 Jul 2024 12:20:14 -0400 Subject: [PATCH 063/166] flag it --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 57d98e056..1a515e981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [features] -default = ["random", "xxhash32", "xxhash64"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64"] random = ["dep:rand"] @@ -20,6 +20,7 @@ serialize = ["dep:serde"] xxhash32 = [] xxhash64 = [] +xxhash3_64 = [] [dependencies] rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } From 8833e84ca278a3d0f8f4e6876a73bb2e52f5dc56 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 11 Jul 2024 13:36:33 -0400 Subject: [PATCH 064/166] seed interface --- compare/src/lib.rs | 4 +- src/xxhash3_64.rs | 83 ++++++++++++++++++++++++++++++++++++------ xx_hash-sys/src/lib.rs | 9 +++++ 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 597a7254e..9b0e77d6d 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -277,8 +277,8 @@ mod xxhash3_64 { // } fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = c::XxHash3_64::oneshot(data); - let rust = rust::XxHash3_64::oneshot(data); + let native = c::XxHash3_64::oneshot_with_seed(seed, data); + let rust = rust::XxHash3_64::oneshot_with_seed(seed, data); prop_assert_eq!(native, rust); Ok(()) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index a39043574..79bb012a7 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -37,24 +37,58 @@ type Stripe = [u64; 8]; impl XxHash3_64 { #[inline(never)] pub fn oneshot(input: &[u8]) -> u64 { - let seed = 0; - let secret = &DEFAULT_SECRET; + impl_oneshot(&DEFAULT_SECRET, 0, input) + } - match input.len() { - 0 => impl_0_bytes(secret, seed), + #[inline(never)] + pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 { + let secret = if seed != 0 && input.len() > 240 { + &derive_secret(seed) + } else { + &DEFAULT_SECRET + }; - 1..=3 => impl_1_to_3_bytes(secret, seed, input), + impl_oneshot(secret, seed, input) + } +} - 4..=8 => impl_4_to_8_bytes(secret, seed, input), +fn derive_secret(seed: u64) -> [u8; 192] { + let mut derived_secret = DEFAULT_SECRET; + let base = derived_secret.as_mut_ptr().cast::(); - 9..=16 => impl_9_to_16_bytes(secret, seed, input), + for i in 0..12 { + let a_p = unsafe { base.add(i * 2) }; + let b_p = unsafe { base.add(i * 2 + 1) }; - 17..=128 => impl_17_to_128_bytes(secret, seed, input), + let mut a = unsafe { a_p.read_unaligned() }; + let mut b = unsafe { b_p.read_unaligned() }; - 129..=240 => impl_129_to_240_bytes(secret, seed, input), + a = a.wrapping_add(seed); + b = b.wrapping_sub(seed); - _ => impl_241_plus_bytes(secret, input), - } + unsafe { a_p.write_unaligned(a) }; + unsafe { b_p.write_unaligned(b) }; + } + + derived_secret +} + +#[inline] +fn impl_oneshot(secret: &[u8; 192], seed: u64, input: &[u8]) -> u64 { + match input.len() { + 0 => impl_0_bytes(secret, seed), + + 1..=3 => impl_1_to_3_bytes(secret, seed, input), + + 4..=8 => impl_4_to_8_bytes(secret, seed, input), + + 9..=16 => impl_9_to_16_bytes(secret, seed, input), + + 17..=128 => impl_17_to_128_bytes(secret, seed, input), + + 129..=240 => impl_129_to_240_bytes(secret, seed, input), + + _ => impl_241_plus_bytes(secret, input), } } @@ -98,7 +132,11 @@ fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { let combined = input_last.into_u64() | (input_first.into_u64() << 32); - let mut value = ((secret_words[0] ^ secret_words[1]) - modified_seed) ^ combined; + let mut value = { + let a = secret_words[0] ^ secret_words[1]; + let b = a.wrapping_sub(modified_seed); + b ^ combined + }; value ^= value.rotate_left(49) ^ value.rotate_left(24); value = value.wrapping_mul(PRIME_MX2); value ^= (value >> 35).wrapping_add(input.len().into_u64()); @@ -600,6 +638,27 @@ mod test { } } + #[test] + fn hash_with_seed() { + let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024]; + + let expected = [ + 0x4aed_e683_89c0_e311, + 0x78fc_079a_75aa_f3c0, + 0x1b73_06b8_9f25_4507, + 0x7df7_627f_d1f9_39b6, + 0x49ca_0fff_0950_1622, + 0x2bfd_caec_30ff_3000, + 0xf984_56bc_25be_0901, + 0x2483_9f0f_cdf4_d078, + ]; + + for (input, expected) in inputs.iter().zip(expected) { + let hash = XxHash3_64::oneshot_with_seed(0xdead_cafe, input); + assert_eq!(hash, expected, "input was {} bytes", input.len()); + } + } + #[test] fn backported_as_chunks() { let x = [1, 2, 3, 4, 5]; diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 28b2298b8..25ffc7ee1 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -131,6 +131,11 @@ pub struct XXH3_state_t { extern "C" { fn XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; fn XXH3_createState() -> *mut XXH3_state_t; fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; @@ -150,6 +155,10 @@ impl XxHash3_64 { unsafe { XXH3_64bits(data.as_ptr().cast(), data.len()) } } + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } + pub fn with_seed() -> Self { let state = unsafe { let state = XXH3_createState(); From 5ebb61e326f1d8454860bdefcd52315174bc1288 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 11 Jul 2024 13:53:38 -0400 Subject: [PATCH 065/166] secret interface --- compare/src/lib.rs | 14 ++++++++++++++ src/xxhash3_64.rs | 15 +++++++++++++-- xx_hash-sys/src/lib.rs | 10 ++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 9b0e77d6d..c369a9181 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -205,6 +205,7 @@ mod xxhash64 { mod xxhash3_64 { use proptest::{prelude::*, test_runner::TestCaseResult}; + use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH; use std::hash::Hasher as _; use super::*; @@ -235,6 +236,11 @@ mod xxhash3_64 { oneshot_impl(seed, &data[offset..])?; } + #[test] + fn oneshot_with_a_secret(secret in prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024), data: Vec) { + oneshot_with_secret_impl(&secret, &data)?; + } + // #[test] // fn streaming_one_chunk(seed: u64, data: Vec) { // streaming_one_chunk_impl(seed, &data)?; @@ -284,6 +290,14 @@ mod xxhash3_64 { Ok(()) } + fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_64::oneshot_with_secret(secret, data); + let rust = rust::XxHash3_64::oneshot_with_secret(secret, data); + + prop_assert_eq!(native, rust); + Ok(()) + } + // fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { // let native = { // let mut hasher = c::XxHash64::with_seed(seed); diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 79bb012a7..be34bf042 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -15,6 +15,8 @@ const PRIME64_5: u64 = 0x27D4EB2F165667C5; const PRIME_MX1: u64 = 0x165667919E3779F9; const PRIME_MX2: u64 = 0x9FB21C651E98DF25; +const DEFAULT_SEED: u64 = 0; + const DEFAULT_SECRET: [u8; 192] = [ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, @@ -30,6 +32,8 @@ const DEFAULT_SECRET: [u8; 192] = [ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]; +pub const SECRET_MINIMUM_LENGTH: usize = 136; + pub struct XxHash3_64; type Stripe = [u64; 8]; @@ -37,7 +41,7 @@ type Stripe = [u64; 8]; impl XxHash3_64 { #[inline(never)] pub fn oneshot(input: &[u8]) -> u64 { - impl_oneshot(&DEFAULT_SECRET, 0, input) + impl_oneshot(&DEFAULT_SECRET, DEFAULT_SEED, input) } #[inline(never)] @@ -50,8 +54,15 @@ impl XxHash3_64 { impl_oneshot(secret, seed, input) } + + #[inline(never)] + pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 { + assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: ERROR + impl_oneshot(secret, DEFAULT_SEED, input) + } } +#[inline] fn derive_secret(seed: u64) -> [u8; 192] { let mut derived_secret = DEFAULT_SECRET; let base = derived_secret.as_mut_ptr().cast::(); @@ -74,7 +85,7 @@ fn derive_secret(seed: u64) -> [u8; 192] { } #[inline] -fn impl_oneshot(secret: &[u8; 192], seed: u64, input: &[u8]) -> u64 { +fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 { match input.len() { 0 => impl_0_bytes(secret, seed), diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 25ffc7ee1..40fcc6256 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -136,6 +136,12 @@ extern "C" { length: libc::size_t, seed: XXH64_hash_t, ) -> XXH64_hash_t; + fn XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; fn XXH3_createState() -> *mut XXH3_state_t; fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; @@ -159,6 +165,10 @@ impl XxHash3_64 { unsafe { XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } } + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { XXH3_64bits_withSecret(data.as_ptr().cast(), data.len(), secret.as_ptr().cast(), secret.len()) } + } + pub fn with_seed() -> Self { let state = unsafe { let state = XXH3_createState(); From 215d2c381dcae28e2c271ed9ca5d1e700710abd6 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 11 Jul 2024 14:53:21 -0400 Subject: [PATCH 066/166] build scalar and optimized and compare head-to-head --- compare/benches/benchmark.rs | 18 ++++---- compare/src/lib.rs | 2 +- xx_hash-sys/build.rs | 55 +++++++++++++++++++++-- xx_hash-sys/src/lib.rs | 87 ++++++++++++++++++++++++++++++++++-- 4 files changed, 145 insertions(+), 17 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index bf0c0b821..18a9415c6 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -173,25 +173,23 @@ mod xxhash3_64 { let (seed, data) = gen_data(BIG_DATA_SIZE); let mut g = c.benchmark_group("xxhash3_64/oneshot"); - for size in [data.len()] { - //half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE)} { + for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); let id = format!("impl-c/size-{size:07}"); g.bench_function(id, |b| { - b.iter(|| { - let hash = c::XxHash3_64::oneshot(data); - black_box(hash); - }) + b.iter(|| c::XxHash3_64::oneshot_with_seed(seed, data)) + }); + + let id = format!("impl-c-scalar/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::ScalarXxHash3_64::oneshot_with_seed(seed, data)) }); let id = format!("impl-rust/size-{size:07}"); g.bench_function(id, |b| { - b.iter(|| { - let hash = rust::XxHash3_64::oneshot(data); - black_box(hash); - }) + b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data)) }); } diff --git a/compare/src/lib.rs b/compare/src/lib.rs index c369a9181..b85b1c167 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -205,8 +205,8 @@ mod xxhash64 { mod xxhash3_64 { use proptest::{prelude::*, test_runner::TestCaseResult}; - use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH; use std::hash::Hasher as _; + use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH; use super::*; diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs index cdc31eb97..fb0ef2515 100644 --- a/xx_hash-sys/build.rs +++ b/xx_hash-sys/build.rs @@ -1,10 +1,59 @@ -use std::{env, path::PathBuf}; +use std::{env, fs, path::PathBuf, process::Command}; fn main() { let base = env::var_os("CARGO_MANIFEST_DIR").unwrap(); let base: PathBuf = base.into(); let xxhash = base.join("xxHash"); - println!("cargo::rustc-link-lib=static=xxhash"); - println!("cargo::rustc-link-search={}", xxhash.display()); + let out = env::var("OUT_DIR").expect("no OUT_DIR"); + let mut out = PathBuf::from(out); + out.push("xxhash"); + fs::create_dir_all(&out).expect("make it"); + + let make_cmd = || { + let mut c = Command::new("make"); + c.current_dir(&xxhash); + c + }; + + let s = make_cmd() + .arg("clean") + .status() + .expect("Could not run clean for scalar build"); + assert!(s.success(), "Scalar clean failed"); + + let s = make_cmd() + .arg("libxxhash.a") + .env( + "CFLAGS", + "-O3 -DXXH_VECTOR=XXH_SCALAR -DXXH_NAMESPACE=scalar_", + ) + .status() + .expect("Could not run scalar build"); + assert!(s.success(), "Scalar build failed"); + + let name = xxhash.join("libxxhash.a"); + let new = out.join("libxxhash_scalar.a"); + fs::copy(name, new).expect("Copy scalar"); + + let s = make_cmd() + .arg("clean") + .status() + .expect("Could not run clean for optimized build"); + assert!(s.success(), "Optimized clean failed"); + + let s = make_cmd() + .arg("libxxhash.a") + .status() + .expect("Could not run optimized build"); + assert!(s.success(), "Optimized build failed"); + + let name = xxhash.join("libxxhash.a"); + let new = out.join("libxxhash_optimized.a"); + fs::copy(name, new).expect("Copy scalar"); + + + println!("cargo::rustc-link-lib=static=xxhash_scalar"); + println!("cargo::rustc-link-lib=static=xxhash_optimized"); + println!("cargo::rustc-link-search={}", out.display()); } diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 40fcc6256..a8ae42811 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -121,8 +121,6 @@ impl Drop for XxHash64 { // ---------- -// type XXH_hash_t = u64; - #[repr(C)] pub struct XXH3_state_t { _data: [u8; 0], @@ -166,7 +164,14 @@ impl XxHash3_64 { } pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { XXH3_64bits_withSecret(data.as_ptr().cast(), data.len(), secret.as_ptr().cast(), secret.len()) } + unsafe { + XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } } pub fn with_seed() -> Self { @@ -195,3 +200,79 @@ impl Drop for XxHash3_64 { assert_eq!(retval, XXH_OK); } } + +// ---------- + +extern "C" { + fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn scalar_XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; + fn scalar_XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; + + fn scalar_XXH3_createState() -> *mut XXH3_state_t; + fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn scalar_XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; +} + +pub struct ScalarXxHash3_64(*mut XXH3_state_t); + +impl ScalarXxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) } + } + + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } + + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + scalar_XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } + } + + pub fn with_seed() -> Self { + let state = unsafe { + let state = scalar_XXH3_createState(); + scalar_XXH3_64bits_reset(state); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { scalar_XXH3_64bits_digest(self.0) } + } +} + +impl Drop for ScalarXxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { scalar_XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); + } +} From c6c9a129b34f143609c8d2a2b7978ad6ea95e0e8 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 14 Jul 2024 12:10:45 -0400 Subject: [PATCH 067/166] simd some --- src/xxhash3_64.rs | 94 +++++++++++++++++++++++++++++++++++++++----- xx_hash-sys/build.rs | 5 +-- 2 files changed, 87 insertions(+), 12 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index be34bf042..6905bef46 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -377,15 +377,82 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset #[inline] fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - for i in 0..8 { - // TODO: Should these casts / reads happen outside this function? - let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; - - let value = stripe ^ secret; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + use core::arch::aarch64::*; + + // unsafe { + // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast()); + // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast()); + // } + + // eprintln!("{acc:x?}"); + // for i in 0..8 { + // // TODO: Should these casts / reads happen outside this function? + // let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + // let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + + // eprintln!("{:x?}, {:x?}", stripe, secret); + + // let value = stripe ^ secret; + // acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + // acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + // } + + // We process 4x u64 at a time as that allows us to completely + // fill a `uint64x2_t` with useful values when performing the + // `vmull_{high_}u32`. + let (acc2, _) = acc.bp_as_chunks_mut::<4>(); + for (i, acc) in acc2.into_iter().enumerate() { + unsafe { + let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); + let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); + let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); + let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); + let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); + let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); + + let value_0 = veorq_u64(stripe_0, secret_0); + let value_1 = veorq_u64(stripe_1, secret_1); + + let parts_0 = vreinterpretq_u32_u64(value_0); + let parts_1 = vreinterpretq_u32_u64(value_1); + + let hi = vuzp1q_u32(parts_0, parts_1); + let lo = vuzp2q_u32(parts_0, parts_1); + + let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo)); + let product_1 = vmull_high_u32(hi, lo); + + accv_0 = vaddq_u64(accv_0, product_0); + accv_1 = vaddq_u64(accv_1, product_1); + + let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); + let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); + accv_0 = vaddq_u64(accv_0, stripe_rot_0); + accv_1 = vaddq_u64(accv_1, stripe_rot_1); + + vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); + vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); + }; } + + + // Pseudo-SIMD + + // for (acc, (str, sec)) in acc.iter_mut().zip(stripe_x.into_iter().zip(secret_x)) { + // let value = str ^ sec; + // *acc = multiply_64_as_32_and_add(value, value >> 32, *acc); + // } + + // let mut stripe_x = stripe_x; + + // stripe_x.swap(0, 1); + // stripe_x.swap(2, 3); + // stripe_x.swap(4, 5); + // stripe_x.swap(6, 7); + + // for (acc, str) in acc.iter_mut().zip(stripe_x) { + // *acc = acc.wrapping_add(str); + // } } #[inline] @@ -474,6 +541,7 @@ impl Halves for u128 { trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); + fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); } @@ -486,6 +554,14 @@ impl SliceBackport for [T] { (head, tail) } + fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { + assert_ne!(N, 0); + let len = self.len() / N; + let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) }; + let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) }; + (head, tail) + } + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { assert_ne!(N, 0); let len = self.len() / N; @@ -631,7 +707,7 @@ mod test { } #[test] - fn hash_240_plus_bytes() { + fn hash_241_plus_bytes() { let inputs = bytes![241, 242, 243, 244, 1024, 10240]; let expected = [ diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs index fb0ef2515..417dedd70 100644 --- a/xx_hash-sys/build.rs +++ b/xx_hash-sys/build.rs @@ -33,7 +33,7 @@ fn main() { assert!(s.success(), "Scalar build failed"); let name = xxhash.join("libxxhash.a"); - let new = out.join("libxxhash_scalar.a"); + let new = out.join("libxxhash_scalar.a"); fs::copy(name, new).expect("Copy scalar"); let s = make_cmd() @@ -49,10 +49,9 @@ fn main() { assert!(s.success(), "Optimized build failed"); let name = xxhash.join("libxxhash.a"); - let new = out.join("libxxhash_optimized.a"); + let new = out.join("libxxhash_optimized.a"); fs::copy(name, new).expect("Copy scalar"); - println!("cargo::rustc-link-lib=static=xxhash_scalar"); println!("cargo::rustc-link-lib=static=xxhash_optimized"); println!("cargo::rustc-link-search={}", out.display()); From 843e7621c3129bdb2a3f26e019f0f1f8443ee94a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 14 Jul 2024 18:03:01 -0400 Subject: [PATCH 068/166] checkpoint simd scramble --- src/xxhash3_64.rs | 98 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 10 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 6905bef46..61e3e05ed 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -320,17 +320,95 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { #[inline] fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - let last = secret - .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() - .unwrap(); - let (last, _) = last.bp_as_chunks(); - let last = last.iter().copied().map(u64::from_ne_bytes); - for (acc, secret) in acc.iter_mut().zip(last) { - *acc ^= *acc >> 47; - *acc ^= secret; - *acc = acc.wrapping_mul(PRIME32_1); - } + + // let last = secret + // .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() + // .unwrap(); + // let (last, _) = last.bp_as_chunks(); + // let last = last.iter().copied().map(u64::from_ne_bytes); + + // for (acc, secret) in acc.iter_mut().zip(last) { + // *acc ^= *acc >> 47; + // *acc ^= secret; + // *acc = acc.wrapping_mul(PRIME32_1); + // } + + unsafe { + use core::arch::aarch64::*; + + let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); + let (acc, _) = acc.bp_as_chunks_mut::<2>(); + for (i, acc) in acc.iter_mut().enumerate() { + let mut accv = vld1q_u64(acc.as_ptr()); + let secret = vld1q_u64(secret_base.add(i * 2)); + + let shifted = vshrq_n_u64::<47>(accv); + accv = veorq_u64(accv, shifted); + accv = veorq_u64(accv, secret); + + // let acc0 = vgetq_lane_u64::<0>(accv); + // let acc1 = vgetq_lane_u64::<1>(accv); + // let r0 = acc0.wrapping_mul(PRIME32_1); + // let r1 = acc1.wrapping_mul(PRIME32_1); + // eprintln!("expected = {r0:x}, {r1:x}"); + + let prime = vdupq_n_u32(PRIME32_1 as _); // opt: always 0 in high bits cause 32bit + + // eprintln!("acc = {accv:x?}"); + + let accv = vreinterpretq_u32_u64(accv); + + // eprintln!("acc = {accv:x?}"); + + // eprintln!("lo(acc) = {:x?} lo(pp) = {:x?}", vget_low_u32(accv), vget_low_u32(pp)); + + let lo = vmull_u32(vget_low_u32(accv), vget_low_u32(prime)); + let hi = vmull_high_u32(accv, prime); + + // eprintln!("lo = {lo:x?} hi = {hi:x?}"); + + let a = vuzp1q_u64(lo, hi); + let b = vuzp2q_u64(lo, hi); + + // eprintln!("a = {a:x?} b = {b:x?}"); + + let b = vshlq_n_u64::<32>(b); + + let s = vaddq_u64(a, b); + + // eprintln!("s = {s:x?}"); + // let accv = vreinterpretq_u64_u32(accv); + // eprintln!("acc = {accv:x?}"); + + let accv = s; + +// panic!(); + + vst1q_u64(acc.as_mut_ptr(), accv); + } + } + + // eprintln!("{acc:x?}"); + // eprintln!("----------"); + + + // scalar + // acc + // + // [23e3a8c41a04e6b, e0ab8aff41b10f66, + // fd0385440ae4def7, ac00b2db47f23b90, + // 60a911d92c86ff3b, ad3b37a550927c9c, + // 211896d1cfc9b1b9, 66ceedfabb78caeb] + // + // simd + // acc + // + // [b66e3311c60fb961, c8a474f8ebf44757, + // 6810423fba6d7ed0, d41b8185aab06f3, + // 9f012bd957bae2ba, b3ce30e7c301b27e, + // c2090074dc5d2070, dadf4f22e5e02bdd] + } #[inline] From 39ec48a758985aca92fc097dac59868170a553ad Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 15 Jul 2024 09:06:04 -0400 Subject: [PATCH 069/166] checkpoint simd scramble --- src/xxhash3_64.rs | 115 ++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 54 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 61e3e05ed..cc70bc29b 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -347,68 +347,75 @@ fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { accv = veorq_u64(accv, shifted); accv = veorq_u64(accv, secret); - // let acc0 = vgetq_lane_u64::<0>(accv); - // let acc1 = vgetq_lane_u64::<1>(accv); - // let r0 = acc0.wrapping_mul(PRIME32_1); - // let r1 = acc1.wrapping_mul(PRIME32_1); - // eprintln!("expected = {r0:x}, {r1:x}"); + accv = neon::xx_vmulq_u32_u64(accv, PRIME32_1 as u32); - let prime = vdupq_n_u32(PRIME32_1 as _); // opt: always 0 in high bits cause 32bit - - // eprintln!("acc = {accv:x?}"); - - let accv = vreinterpretq_u32_u64(accv); - - // eprintln!("acc = {accv:x?}"); - - // eprintln!("lo(acc) = {:x?} lo(pp) = {:x?}", vget_low_u32(accv), vget_low_u32(pp)); - - let lo = vmull_u32(vget_low_u32(accv), vget_low_u32(prime)); - let hi = vmull_high_u32(accv, prime); - - // eprintln!("lo = {lo:x?} hi = {hi:x?}"); - - let a = vuzp1q_u64(lo, hi); - let b = vuzp2q_u64(lo, hi); - - // eprintln!("a = {a:x?} b = {b:x?}"); - - let b = vshlq_n_u64::<32>(b); - - let s = vaddq_u64(a, b); - - // eprintln!("s = {s:x?}"); - // let accv = vreinterpretq_u64_u32(accv); - // eprintln!("acc = {accv:x?}"); - - let accv = s; - -// panic!(); - - vst1q_u64(acc.as_mut_ptr(), accv); + vst1q_u64(acc.as_mut_ptr(), accv); } - } - - // eprintln!("{acc:x?}"); - // eprintln!("----------"); + } +} +mod neon { + use core::arch::aarch64::*; - // scalar - // acc + // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the + // lower 64 bits of the result) operation, so we have to make our + // own out of 32-bit operations . We can simplify by realizing + // that we are always multiplying by a 32-bit number. + // + // The basic algorithm is traditional long multiplication. `[]` + // denotes groups of 32 bits. + // + // [AAAA][BBBB] + // x [CCCC] + // -------------------- + // [BCBC][BCBC] + // + [ACAC][ACAC] + // -------------------- + // [ACBC][BCBC] // 64-bit truncation occurs + // + // This can be written in NEON as a vectorwise wrapping + // multiplication of the high-order chunk of the input (`A`) + // against the constant and then a multiply-widen-and-accumulate + // of the low-order chunk of the input and the constant: + // + // 1. High-order, vectorwise // - // [23e3a8c41a04e6b, e0ab8aff41b10f66, - // fd0385440ae4def7, ac00b2db47f23b90, - // 60a911d92c86ff3b, ad3b37a550927c9c, - // 211896d1cfc9b1b9, 66ceedfabb78caeb] + // [AAAA][BBBB] + // x [CCCC][0000] + // -------------------- + // [ACAC][0000] // - // simd - // acc + // 2. Low-order, widening // - // [b66e3311c60fb961, c8a474f8ebf44757, - // 6810423fba6d7ed0, d41b8185aab06f3, - // 9f012bd957bae2ba, b3ce30e7c301b27e, - // c2090074dc5d2070, dadf4f22e5e02bdd] + // [BBBB] + // x [CCCC] // widening + // -------------------- + // [BCBC][BCBC] + // + // 3. Accumulation + // + // [ACAC][0000] + // + [BCBC][BCBC] // vectorwise + // -------------------- + // [ACBC][BCBC] + // + // Thankfully, NEON has a single multiply-widen-and-accumulate + // operation. + #[inline] + pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { + unsafe { + let input_as_u32 = vreinterpretq_u32_u64(input); + let factor = vmov_n_u32(og_factor); + let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); + let factor_striped = vreinterpretq_u32_u64(factor_striped); + + let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); + let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); + let input_lo = vmovn_u64(input); + vmlal_u32(high_shifted, input_lo, factor) + } + } } #[inline] From 4ca95734d7d1d22953d73dc4acff74b99fea1880 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 15 Jul 2024 09:52:40 -0400 Subject: [PATCH 070/166] organize --- src/xxhash3_64.rs | 193 ++++++++++++++++++++++------------------------ 1 file changed, 94 insertions(+), 99 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index cc70bc29b..202658e5b 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -319,43 +319,99 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { } #[inline] +#[cfg(not(target_arch = "aarch64"))] fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { + let last = secret + .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() + .unwrap(); + let (last, _) = last.bp_as_chunks(); + let last = last.iter().copied().map(u64::from_ne_bytes); + for (acc, secret) in acc.iter_mut().zip(last) { + *acc ^= *acc >> 47; + *acc ^= secret; + *acc = acc.wrapping_mul(PRIME32_1); + } +} - // let last = secret - // .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() - // .unwrap(); - // let (last, _) = last.bp_as_chunks(); - // let last = last.iter().copied().map(u64::from_ne_bytes); +#[cfg(target_arch = "aarch64")] +use neon::{accumulate, round_scramble}; - // for (acc, secret) in acc.iter_mut().zip(last) { - // *acc ^= *acc >> 47; - // *acc ^= secret; - // *acc = acc.wrapping_mul(PRIME32_1); - // } +mod neon { + use core::arch::aarch64::*; - unsafe { - use core::arch::aarch64::*; + use super::{SliceBackport as _, PRIME32_1}; + + #[inline] + pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { + unsafe { + let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); + let (acc, _) = acc.bp_as_chunks_mut::<2>(); + for (i, acc) in acc.iter_mut().enumerate() { + let mut accv = vld1q_u64(acc.as_ptr()); + let secret = vld1q_u64(secret_base.add(i * 2)); - let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); - let (acc, _) = acc.bp_as_chunks_mut::<2>(); - for (i, acc) in acc.iter_mut().enumerate() { - let mut accv = vld1q_u64(acc.as_ptr()); - let secret = vld1q_u64(secret_base.add(i * 2)); + // tmp[i] = acc[i] >> 47 + let shifted = vshrq_n_u64::<47>(accv); - let shifted = vshrq_n_u64::<47>(accv); - accv = veorq_u64(accv, shifted); - accv = veorq_u64(accv, secret); + // acc[i] ^= tmp[i] + accv = veorq_u64(accv, shifted); - accv = neon::xx_vmulq_u32_u64(accv, PRIME32_1 as u32); + // acc[i] ^= secret[i] + accv = veorq_u64(accv, secret); - vst1q_u64(acc.as_mut_ptr(), accv); + // acc[i] *= PRIME32_1 + accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); + + vst1q_u64(acc.as_mut_ptr(), accv); + } } } -} -mod neon { - use core::arch::aarch64::*; + // We process 4x u64 at a time as that allows us to completely + // fill a `uint64x2_t` with useful values when performing the + // `vmull_{high_}u32`. + #[inline] + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let (acc2, _) = acc.bp_as_chunks_mut::<4>(); + for (i, acc) in acc2.into_iter().enumerate() { + unsafe { + let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); + let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); + let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); + let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); + let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); + let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); + + // value[i] = stripe[i] ^ secret[i]; + let value_0 = veorq_u64(stripe_0, secret_0); + let value_1 = veorq_u64(stripe_1, secret_1); + + // tmp[i] = value[i] * (value[i] >> 32) + let parts_0 = vreinterpretq_u32_u64(value_0); + let parts_1 = vreinterpretq_u32_u64(value_1); + + let hi = vuzp1q_u32(parts_0, parts_1); + let lo = vuzp2q_u32(parts_0, parts_1); + + let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo)); + let product_1 = vmull_high_u32(hi, lo); + + // acc[i] += tmp[i] + accv_0 = vaddq_u64(accv_0, product_0); + accv_1 = vaddq_u64(accv_1, product_1); + + // acc[i ^ 1] = acc[i ^ 1] + stripe[i]; + let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); + let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); + accv_0 = vaddq_u64(accv_0, stripe_rot_0); + accv_1 = vaddq_u64(accv_1, stripe_rot_1); + + vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); + vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); + }; + } + } // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the // lower 64 bits of the result) operation, so we have to make our @@ -416,6 +472,11 @@ mod neon { vmlal_u32(high_shifted, input_lo, factor) } } + + // unsafe { + // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast()); + // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast()); + // } } #[inline] @@ -461,83 +522,17 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset } #[inline] +#[cfg(not(target_arch = "aarch64"))] fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - use core::arch::aarch64::*; + for i in 0..8 { + // TODO: Should these casts / reads happen outside this function? + let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; - // unsafe { - // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast()); - // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast()); - // } - - // eprintln!("{acc:x?}"); - // for i in 0..8 { - // // TODO: Should these casts / reads happen outside this function? - // let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - // let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; - - // eprintln!("{:x?}, {:x?}", stripe, secret); - - // let value = stripe ^ secret; - // acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - // acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); - // } - - // We process 4x u64 at a time as that allows us to completely - // fill a `uint64x2_t` with useful values when performing the - // `vmull_{high_}u32`. - let (acc2, _) = acc.bp_as_chunks_mut::<4>(); - for (i, acc) in acc2.into_iter().enumerate() { - unsafe { - let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); - let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); - let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); - let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); - let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); - let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); - - let value_0 = veorq_u64(stripe_0, secret_0); - let value_1 = veorq_u64(stripe_1, secret_1); - - let parts_0 = vreinterpretq_u32_u64(value_0); - let parts_1 = vreinterpretq_u32_u64(value_1); - - let hi = vuzp1q_u32(parts_0, parts_1); - let lo = vuzp2q_u32(parts_0, parts_1); - - let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo)); - let product_1 = vmull_high_u32(hi, lo); - - accv_0 = vaddq_u64(accv_0, product_0); - accv_1 = vaddq_u64(accv_1, product_1); - - let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); - let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); - accv_0 = vaddq_u64(accv_0, stripe_rot_0); - accv_1 = vaddq_u64(accv_1, stripe_rot_1); - - vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); - vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); - }; + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); } - - - // Pseudo-SIMD - - // for (acc, (str, sec)) in acc.iter_mut().zip(stripe_x.into_iter().zip(secret_x)) { - // let value = str ^ sec; - // *acc = multiply_64_as_32_and_add(value, value >> 32, *acc); - // } - - // let mut stripe_x = stripe_x; - - // stripe_x.swap(0, 1); - // stripe_x.swap(2, 3); - // stripe_x.swap(4, 5); - // stripe_x.swap(6, 7); - - // for (acc, str) in acc.iter_mut().zip(stripe_x) { - // *acc = acc.wrapping_add(str); - // } } #[inline] From feb485b01a1929793691240465174476f7013e92 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 21 Jul 2024 08:51:44 -0400 Subject: [PATCH 071/166] more link --- src/xxhash3_64.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 202658e5b..67db79bf4 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -547,6 +547,7 @@ fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 +// https://github.com/llvm/llvm-project/issues/98481 #[cfg(target_arch = "aarch64")] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { use core::arch::asm; From 5835fdf700b8033847ae490c306806a9590957ea Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 21 Jul 2024 08:49:30 -0400 Subject: [PATCH 072/166] NEON performance parity --- src/xxhash3_64.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 67db79bf4..c6420f8d9 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -370,7 +370,7 @@ mod neon { // We process 4x u64 at a time as that allows us to completely // fill a `uint64x2_t` with useful values when performing the - // `vmull_{high_}u32`. + // multiplication. #[inline] pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let (acc2, _) = acc.bp_as_chunks_mut::<4>(); @@ -383,29 +383,35 @@ mod neon { let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); + // stripe_rot[i ^ 1] = stripe[i]; + let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); + let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); + // value[i] = stripe[i] ^ secret[i]; let value_0 = veorq_u64(stripe_0, secret_0); let value_1 = veorq_u64(stripe_1, secret_1); - // tmp[i] = value[i] * (value[i] >> 32) + // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] + // + // Each vector has 64-bit values, but we treat them as + // 32-bit and then unzip them. This naturally splits + // the upper and lower 32 bits. let parts_0 = vreinterpretq_u32_u64(value_0); let parts_1 = vreinterpretq_u32_u64(value_1); let hi = vuzp1q_u32(parts_0, parts_1); let lo = vuzp2q_u32(parts_0, parts_1); - let product_0 = vmull_u32(vget_low_u32(hi), vget_low_u32(lo)); - let product_1 = vmull_high_u32(hi, lo); + let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); + let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); - // acc[i] += tmp[i] - accv_0 = vaddq_u64(accv_0, product_0); - accv_1 = vaddq_u64(accv_1, product_1); + // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 + core::arch::asm!("/* {x:v} */", x = in(vreg) sum_0); + core::arch::asm!("/* {x:v} */", x = in(vreg) sum_1); - // acc[i ^ 1] = acc[i ^ 1] + stripe[i]; - let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); - let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); - accv_0 = vaddq_u64(accv_0, stripe_rot_0); - accv_1 = vaddq_u64(accv_1, stripe_rot_1); + // acc[i] += sum[i] + accv_0 = vaddq_u64(accv_0, sum_0); + accv_1 = vaddq_u64(accv_1, sum_1); vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); From 938d94eb3b0217cb07a5496962e7451b7ae09299 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 21 Jul 2024 18:59:14 -0400 Subject: [PATCH 073/166] organize simd --- Cargo.toml | 4 +- src/xxhash3_64.rs | 137 +++++++++++++++++++++++++--------------------- 2 files changed, 79 insertions(+), 62 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1a515e981..6c660495d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd"] random = ["dep:rand"] @@ -22,6 +22,8 @@ xxhash32 = [] xxhash64 = [] xxhash3_64 = [] +simd = [] + [dependencies] rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index c6420f8d9..02c6db296 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -318,25 +318,77 @@ fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { } } -#[inline] -#[cfg(not(target_arch = "aarch64"))] -fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - let last = secret - .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() - .unwrap(); - let (last, _) = last.bp_as_chunks(); - let last = last.iter().copied().map(u64::from_ne_bytes); - - for (acc, secret) in acc.iter_mut().zip(last) { - *acc ^= *acc >> 47; - *acc ^= secret; - *acc = acc.wrapping_mul(PRIME32_1); +#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))] +mod scalar { + use core::mem; + + use super::{SliceBackport as _, PRIME32_1}; + + #[inline] + pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { + let last = secret + .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() + .unwrap(); + let (last, _) = last.bp_as_chunks(); + let last = last.iter().copied().map(u64::from_ne_bytes); + + for (acc, secret) in acc.iter_mut().zip(last) { + *acc ^= *acc >> 47; + *acc ^= secret; + *acc = acc.wrapping_mul(PRIME32_1); + } + } + + #[inline] + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + for i in 0..8 { + // TODO: Should these casts / reads happen outside this function? + let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + } + } + + #[inline] + #[cfg(not(target_arch = "aarch64"))] + fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + let lhs = (lhs as u32).into_u64(); + let rhs = (rhs as u32).into_u64(); + + let product = lhs.wrapping_mul(rhs); + acc.wrapping_add(product) + } + + #[inline] + // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 + // https://github.com/llvm/llvm-project/issues/98481 + #[cfg(target_arch = "aarch64")] + fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + use core::arch::asm; + + let res; + + unsafe { + asm!( + "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", + lhs = in(reg) lhs, + rhs = in(reg) rhs, + acc = in(reg) acc, + res = out(reg) res, + ) + } + + res } } -#[cfg(target_arch = "aarch64")] -use neon::{accumulate, round_scramble}; +#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))] +use scalar as vector_impl; +#[cfg(all(target_arch = "aarch64", feature = "simd"))] mod neon { use core::arch::aarch64::*; @@ -485,6 +537,11 @@ mod neon { // } } +#[cfg(all(target_arch = "aarch64", feature = "simd"))] +use neon as vector_impl; + +use vector_impl::{accumulate, round_scramble}; + #[inline] fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { // Accumulation steps are run for the stripes in the last block, @@ -527,52 +584,6 @@ fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset avalanche(result) } -#[inline] -#[cfg(not(target_arch = "aarch64"))] -fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - for i in 0..8 { - // TODO: Should these casts / reads happen outside this function? - let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; - - let value = stripe ^ secret; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); - } -} - -#[inline] -#[cfg(not(target_arch = "aarch64"))] -fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - let lhs = (lhs as u32).into_u64(); - let rhs = (rhs as u32).into_u64(); - - let product = lhs.wrapping_mul(rhs); - acc.wrapping_add(product) -} - -#[inline] -// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 -// https://github.com/llvm/llvm-project/issues/98481 -#[cfg(target_arch = "aarch64")] -fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - use core::arch::asm; - - let res; - - unsafe { - asm!( - "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", - lhs = in(reg) lhs, - rhs = in(reg) rhs, - acc = in(reg) acc, - res = out(reg) res, - ) - } - - res -} - #[inline] fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; @@ -628,7 +639,10 @@ impl Halves for u128 { trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); + + #[cfg(all(target_arch = "aarch64", feature = "simd"))] fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); + fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); } @@ -641,6 +655,7 @@ impl SliceBackport for [T] { (head, tail) } + #[cfg(all(target_arch = "aarch64", feature = "simd"))] fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { assert_ne!(N, 0); let len = self.len() / N; From de5b5d7da07d46eb6f3132b762bb388169316beb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 22 Jul 2024 11:12:09 -0400 Subject: [PATCH 074/166] bench simd on off --- compare/Cargo.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compare/Cargo.toml b/compare/Cargo.toml index 44ef771ee..084a5b2b3 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -3,6 +3,11 @@ name = "compare" version = "0.1.0" edition = "2021" +[features] +default = ["simd"] + +simd = ["xx-renu/simd"] + [[bench]] name = "benchmark" harness = false @@ -12,5 +17,5 @@ criterion = "0.5.1" proptest = "1.5.0" rand = "0.8.5" twox-hash = "1.6.3" -xx-renu = { path = ".." } +xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64"] } xx_hash-sys = { path = "../xx_hash-sys" } From 50da6239b7a56113d67c446b42688a62b707ab0d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 22 Jul 2024 11:16:17 -0400 Subject: [PATCH 075/166] Simplify the control flow --- src/xxhash3_64.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 02c6db296..e955f8c5f 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -36,8 +36,6 @@ pub const SECRET_MINIMUM_LENGTH: usize = 136; pub struct XxHash3_64; -type Stripe = [u64; 8]; - impl XxHash3_64 { #[inline(never)] pub fn oneshot(input: &[u8]) -> u64 { @@ -274,11 +272,20 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { let mut acc = INITIAL_ACCUMULATORS; + assert!(secret.len() >= SECRET_MINIMUM_LENGTH); + assert!(input.len() >= 241); + let stripes_per_block = (secret.len() - 64) / 8; let block_size = 64 * stripes_per_block; - let mut blocks = input.chunks(block_size).fuse(); - let last_block = blocks.next_back().unwrap(); + let mut blocks = input.chunks_exact(block_size); + let last_block = + if blocks.remainder().is_empty() { + unsafe { blocks.next_back().unwrap_unchecked() } + } else { + blocks.remainder() + }; + let last_stripe: &[u8; 64] = unsafe { &*input .as_ptr() @@ -288,7 +295,9 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { }; for block in blocks { - round(&mut acc, block, secret); + let (stripes, _) = block.bp_as_chunks(); + + round(&mut acc, stripes, secret); } last_round(&mut acc, last_block, last_stripe, secret); @@ -302,14 +311,13 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { } #[inline] -fn round(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { - round_accumulate(acc, block, secret); +fn round(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { + round_accumulate(acc, stripes, secret); round_scramble(acc, secret); } #[inline] -fn round_accumulate(acc: &mut [u64; 8], block: &[u8], secret: &[u8]) { - let (stripes, _) = block.bp_as_chunks::<{ mem::size_of::() }>(); +fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { let secrets = (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); From 3408fa7694b94c93f0ff04ffdcd75b02b2e8b953 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 22 Jul 2024 11:33:33 -0400 Subject: [PATCH 076/166] cleanup --- src/xxhash3_64.rs | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index e955f8c5f..96e59ee3e 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -244,24 +244,6 @@ fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> mul_result.lower_half() ^ mul_result.upper_half() } -// fn mix_two_chunks( -// acc: &mut [u64; 2], -// data1: &[u8; 16], -// data2: &[u8; 16], -// secret: &[u8], -// secret_offset: usize, -// seed: u64, -// ) { -// // TODO: Should these casts / reads happen outside this function? -// let data_words1 = unsafe { data1.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion -// let data_words2 = unsafe { data2.as_ptr().cast::<[u64; 2]>().read_unaligned() }; // TODO:little-endian conversion - -// acc[0] = acc[0] + mix_step(data1, secret, secret_offset, seed); -// acc[1] = acc[1] + mix_step(data2, secret, secret_offset + 16, seed); -// acc[0] = acc[0] ^ data_words2[0].wrapping_add(data_words2[1]); -// acc[1] = acc[1] ^ data_words1[0].wrapping_add(data_words1[1]); -// } - #[rustfmt::skip] const INITIAL_ACCUMULATORS: [u64; 8] = [ PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, @@ -281,6 +263,10 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { let mut blocks = input.chunks_exact(block_size); let last_block = if blocks.remainder().is_empty() { + // SAFETY: We know that `input` is non-empty, which means + // that either there will be a remainder or one or more + // full blocks. That info isn't flowing to the optimizer, + // so we use `unwrap_unchecked`. unsafe { blocks.next_back().unwrap_unchecked() } } else { blocks.remainder() @@ -465,9 +451,8 @@ mod neon { let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); - // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 - core::arch::asm!("/* {x:v} */", x = in(vreg) sum_0); - core::arch::asm!("/* {x:v} */", x = in(vreg) sum_1); + reordering_barrier(sum_0); + reordering_barrier(sum_1); // acc[i] += sum[i] accv_0 = vaddq_u64(accv_0, sum_0); @@ -539,10 +524,11 @@ mod neon { } } - // unsafe { - // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(stripe.as_ptr().cast()); - // _prefetch::<_PREFETCH_READ, _PREFETCH_LOCALITY3>(secret.as_ptr().cast()); - // } + // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 + #[inline] + fn reordering_barrier(r: uint64x2_t) { + unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } + } } #[cfg(all(target_arch = "aarch64", feature = "simd"))] From 087edbf6f86ace2fe06709a20b858c0f5e4612d1 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 22 Jul 2024 16:43:27 -0400 Subject: [PATCH 077/166] stub out x64 simd --- src/xxhash3_64.rs | 64 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 96e59ee3e..d3c6067ab 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -312,11 +312,14 @@ fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { } } -#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))] +#[cfg(all( + not(all(target_feature = "neon", feature = "simd")), + not(all(target_feature = "avx2", feature = "simd")), +))] mod scalar { use core::mem; - use super::{SliceBackport as _, PRIME32_1}; + use super::{SliceBackport as _, PRIME32_1, IntoU64}; #[inline] pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { @@ -379,10 +382,13 @@ mod scalar { } } -#[cfg(any(not(all(feature = "simd", target_arch = "aarch64"))))] +#[cfg(all( + not(all(target_feature = "neon", feature = "simd")), + not(all(target_feature = "avx2", feature = "simd")), +))] use scalar as vector_impl; -#[cfg(all(target_arch = "aarch64", feature = "simd"))] +#[cfg(all(target_feature = "neon", feature = "simd"))] mod neon { use core::arch::aarch64::*; @@ -531,9 +537,57 @@ mod neon { } } -#[cfg(all(target_arch = "aarch64", feature = "simd"))] +#[cfg(all(target_feature = "neon", feature = "simd"))] use neon as vector_impl; +#[cfg(all(target_feature = "avx2", feature = "simd"))] +mod avx2 { + use core::mem; + + use super::{SliceBackport as _, PRIME32_1, IntoU64}; + + #[inline] + pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { + let last = secret + .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() + .unwrap(); + let (last, _) = last.bp_as_chunks(); + let last = last.iter().copied().map(u64::from_ne_bytes); + + for (acc, secret) in acc.iter_mut().zip(last) { + *acc ^= *acc >> 47; + *acc ^= secret; + *acc = acc.wrapping_mul(PRIME32_1); + } + } + + #[inline] + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + for i in 0..8 { + // TODO: Should these casts / reads happen outside this function? + let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + + acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + } + } + + #[inline] + fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + let lhs = (lhs as u32).into_u64(); + let rhs = (rhs as u32).into_u64(); + + let product = lhs.wrapping_mul(rhs); + acc.wrapping_add(product) + } +} + +#[cfg(all(target_feature = "avx2", feature = "simd"))] +use avx2 as vector_impl; + use vector_impl::{accumulate, round_scramble}; #[inline] From 51ded36f0b0d1edd9c7fb20a00e1bcb97e47a1b1 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 23 Jul 2024 15:10:33 -0400 Subject: [PATCH 078/166] hack in one simd --- src/xxhash3_64.rs | 48 +++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index d3c6067ab..5e2b319d0 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -542,7 +542,7 @@ use neon as vector_impl; #[cfg(all(target_feature = "avx2", feature = "simd"))] mod avx2 { - use core::mem; + use core::{mem, arch::x86_64::*}; use super::{SliceBackport as _, PRIME32_1, IntoU64}; @@ -551,6 +551,7 @@ mod avx2 { let last = secret .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() .unwrap(); + let (last, _) = last.bp_as_chunks(); let last = last.iter().copied().map(u64::from_ne_bytes); @@ -563,25 +564,40 @@ mod avx2 { #[inline] pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - for i in 0..8 { - // TODO: Should these casts / reads happen outside this function? - let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + for i in 0..2 { + unsafe { + // todo: align the accumulator and avoid the unaligned load and store + let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast()); + let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::().add(4 * i).cast()); + let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::().add(4 * i).cast()); - let value = stripe ^ secret; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm256_xor_si256(stripe_0, secret_0); - acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); - } - } + // TODO: "rotate" is not quite correct + // stripe_rot[i] = stripe[i ^ 1] + let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0); - #[inline] - fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - let lhs = (lhs as u32).into_u64(); - let rhs = (rhs as u32).into_u64(); + // acc[i] += stripe_rot[i] + acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0); - let product = lhs.wrapping_mul(rhs); - acc.wrapping_add(product) + // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i]) + let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0); + + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) + let product_0 = _mm256_mul_epu32(value_0, value_swap_0); + + // eprintln!(); + // eprintln!("{value_0:016x?}"); + // eprintln!("{value_swap_0:016x?}"); + // eprintln!("{product_0:016x?}"); + + // acc[i] += product[i] + acc_0 = _mm256_add_epi64(acc_0, product_0); + + _mm256_storeu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast(), acc_0); + } + } } } From 2c7b465af345d046312c7c62ba78ec266d1f6d75 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 23 Jul 2024 15:10:33 -0400 Subject: [PATCH 079/166] simd cleanup --- compare/benches/benchmark.rs | 5 +++++ src/xxhash3_64.rs | 35 ++++++----------------------------- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 18a9415c6..ba2318b68 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -187,6 +187,11 @@ mod xxhash3_64 { b.iter(|| c::ScalarXxHash3_64::oneshot_with_seed(seed, data)) }); + let id = format!("impl-c-avx2/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::Avx2XxHash3_64::oneshot_with_seed(seed, data)) + }); + let id = format!("impl-rust/size-{size:07}"); g.bench_function(id, |b| { b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data)) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 5e2b319d0..6820e4777 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -224,7 +224,6 @@ fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { #[inline] fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 { - // TODO: Should these casts / reads happen outside this function? let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; let secret_words = unsafe { secret @@ -312,10 +311,8 @@ fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { } } -#[cfg(all( - not(all(target_feature = "neon", feature = "simd")), - not(all(target_feature = "avx2", feature = "simd")), -))] +// This module is not `cfg`-gated because it is used by some of the +// SIMD implementations. mod scalar { use core::mem; @@ -337,9 +334,9 @@ mod scalar { } #[inline] + #[allow(dead_code)] pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { for i in 0..8 { - // TODO: Should these casts / reads happen outside this function? let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; @@ -542,25 +539,10 @@ use neon as vector_impl; #[cfg(all(target_feature = "avx2", feature = "simd"))] mod avx2 { - use core::{mem, arch::x86_64::*}; + use core::arch::x86_64::*; - use super::{SliceBackport as _, PRIME32_1, IntoU64}; - - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - let last = secret - .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() - .unwrap(); - - let (last, _) = last.bp_as_chunks(); - let last = last.iter().copied().map(u64::from_ne_bytes); - - for (acc, secret) in acc.iter_mut().zip(last) { - *acc ^= *acc >> 47; - *acc ^= secret; - *acc = acc.wrapping_mul(PRIME32_1); - } - } + // The scalar implementation is autovectorized nicely enough + pub use super::scalar::round_scramble; #[inline] pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { @@ -587,11 +569,6 @@ mod avx2 { // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) let product_0 = _mm256_mul_epu32(value_0, value_swap_0); - // eprintln!(); - // eprintln!("{value_0:016x?}"); - // eprintln!("{value_swap_0:016x?}"); - // eprintln!("{product_0:016x?}"); - // acc[i] += product[i] acc_0 = _mm256_add_epi64(acc_0, product_0); From be7325c8d47b1a6bd6c70c7ab9b7a6d24c72aa44 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 23 Jul 2024 15:10:33 -0400 Subject: [PATCH 080/166] use cc for builds and a forced avx2 variant --- xx_hash-sys/Cargo.toml | 3 ++ xx_hash-sys/build.rs | 78 ++++++++++++++---------------------------- xx_hash-sys/src/lib.rs | 76 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 52 deletions(-) diff --git a/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml index d385daf66..dd96308c0 100644 --- a/xx_hash-sys/Cargo.toml +++ b/xx_hash-sys/Cargo.toml @@ -6,3 +6,6 @@ publish = false [dependencies] libc = { version = "0.2.155", default-features = false } + +[build-dependencies] +cc = { version = "1.1.6", default-features = false } diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs index 417dedd70..2edfce466 100644 --- a/xx_hash-sys/build.rs +++ b/xx_hash-sys/build.rs @@ -1,58 +1,32 @@ -use std::{env, fs, path::PathBuf, process::Command}; +use std::{env, path::PathBuf}; fn main() { - let base = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let base: PathBuf = base.into(); - let xxhash = base.join("xxHash"); - - let out = env::var("OUT_DIR").expect("no OUT_DIR"); - let mut out = PathBuf::from(out); - out.push("xxhash"); - fs::create_dir_all(&out).expect("make it"); + // TODO: CARGO_CFG_TARGET_FEATURE has `Some(adx,aes,avx,avx2,...` - let make_cmd = || { - let mut c = Command::new("make"); - c.current_dir(&xxhash); - c + let base = env::var_os("CARGO_MANIFEST_DIR").unwrap(); + let mut base: PathBuf = base.into(); + base.push("xxHash"); + base.push("xxhash.c"); + + let build = { + let mut build = cc::Build::new(); + build.file(base); + build }; - let s = make_cmd() - .arg("clean") - .status() - .expect("Could not run clean for scalar build"); - assert!(s.success(), "Scalar clean failed"); - - let s = make_cmd() - .arg("libxxhash.a") - .env( - "CFLAGS", - "-O3 -DXXH_VECTOR=XXH_SCALAR -DXXH_NAMESPACE=scalar_", - ) - .status() - .expect("Could not run scalar build"); - assert!(s.success(), "Scalar build failed"); - - let name = xxhash.join("libxxhash.a"); - let new = out.join("libxxhash_scalar.a"); - fs::copy(name, new).expect("Copy scalar"); - - let s = make_cmd() - .arg("clean") - .status() - .expect("Could not run clean for optimized build"); - assert!(s.success(), "Optimized clean failed"); - - let s = make_cmd() - .arg("libxxhash.a") - .status() - .expect("Could not run optimized build"); - assert!(s.success(), "Optimized build failed"); - - let name = xxhash.join("libxxhash.a"); - let new = out.join("libxxhash_optimized.a"); - fs::copy(name, new).expect("Copy scalar"); - - println!("cargo::rustc-link-lib=static=xxhash_scalar"); - println!("cargo::rustc-link-lib=static=xxhash_optimized"); - println!("cargo::rustc-link-search={}", out.display()); + let mut scalar_build = build.clone(); + scalar_build + .define("XXH_VECTOR", "XXH_SCALAR") + .define("XXH_NAMESPACE", "scalar_") + .compile("xxhash_scalar"); + + let mut avx2_build = build.clone(); + avx2_build + .flag("-march=x86-64-v3") + .define("XXH_VECTOR", "XXH_AVX2") + .define("XXH_NAMESPACE", "avx2_") + .compile("xxhash_avx2"); + + let native_build = build; + native_build.compile("xxhash_native"); } diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index a8ae42811..8f10e9f6a 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -276,3 +276,79 @@ impl Drop for ScalarXxHash3_64 { assert_eq!(retval, XXH_OK); } } + +// ---------- + +extern "C" { + fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn avx2_XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; + fn avx2_XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; + + fn avx2_XXH3_createState() -> *mut XXH3_state_t; + fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn avx2_XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; +} + +pub struct Avx2XxHash3_64(*mut XXH3_state_t); + +impl Avx2XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) } + } + + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } + + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + avx2_XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } + } + + pub fn with_seed() -> Self { + let state = unsafe { + let state = avx2_XXH3_createState(); + avx2_XXH3_64bits_reset(state); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { avx2_XXH3_64bits_digest(self.0) } + } +} + +impl Drop for Avx2XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { avx2_XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); + } +} From f7ec3bc5f92c7620cabe8927417041f6d38b0dd4 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 23 Jul 2024 15:10:33 -0400 Subject: [PATCH 081/166] better choosin --- src/xxhash3_64.rs | 631 +++++++++++++++++++++++++--------------------- 1 file changed, 343 insertions(+), 288 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 6820e4777..992ca7d40 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -251,98 +251,158 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { - let mut acc = INITIAL_ACCUMULATORS; - - assert!(secret.len() >= SECRET_MINIMUM_LENGTH); - assert!(input.len() >= 241); - - let stripes_per_block = (secret.len() - 64) / 8; - let block_size = 64 * stripes_per_block; - - let mut blocks = input.chunks_exact(block_size); - let last_block = - if blocks.remainder().is_empty() { - // SAFETY: We know that `input` is non-empty, which means - // that either there will be a remainder or one or more - // full blocks. That info isn't flowing to the optimizer, - // so we use `unwrap_unchecked`. - unsafe { blocks.next_back().unwrap_unchecked() } - } else { - blocks.remainder() + unsafe { avx2::oneshot_unchecked(secret, input) } +} + +struct Algorithm(V); + +impl Algorithm { + fn do_it(&self, secret: &[u8], input: &[u8]) -> u64 { + let mut acc = INITIAL_ACCUMULATORS; + + assert!(secret.len() >= SECRET_MINIMUM_LENGTH); + assert!(input.len() >= 241); + + let stripes_per_block = (secret.len() - 64) / 8; + let block_size = 64 * stripes_per_block; + + let mut blocks = input.chunks_exact(block_size); + let last_block = + if blocks.remainder().is_empty() { + // SAFETY: We know that `input` is non-empty, which means + // that either there will be a remainder or one or more + // full blocks. That info isn't flowing to the optimizer, + // so we use `unwrap_unchecked`. + unsafe { blocks.next_back().unwrap_unchecked() } + } else { + blocks.remainder() + }; + + let last_stripe: &[u8; 64] = unsafe { + &*input + .as_ptr() + .add(input.len()) + .sub(mem::size_of::<[u8; 64]>()) + .cast() }; - let last_stripe: &[u8; 64] = unsafe { - &*input - .as_ptr() - .add(input.len()) - .sub(mem::size_of::<[u8; 64]>()) - .cast() - }; + for block in blocks { + let (stripes, _) = block.bp_as_chunks(); - for block in blocks { - let (stripes, _) = block.bp_as_chunks(); + self.round(&mut acc, stripes, secret); + } + + self.last_round(&mut acc, last_block, last_stripe, secret); - round(&mut acc, stripes, secret); + self.final_merge( + &mut acc, + input.len().into_u64().wrapping_mul(PRIME64_1), + secret, + 11, + ) } - last_round(&mut acc, last_block, last_stripe, secret); + #[inline] + fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { + self.round_accumulate(acc, stripes, secret); + self.0.round_scramble(acc, secret); + } - final_merge( - &mut acc, - input.len().into_u64().wrapping_mul(PRIME64_1), - secret, - 11, - ) -} + #[inline] + fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { + let secrets = + (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); -#[inline] -fn round(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { - round_accumulate(acc, stripes, secret); - round_scramble(acc, secret); -} + for (stripe, secret) in stripes.iter().zip(secrets) { + self.0.accumulate(acc, stripe, secret); + } + } -#[inline] -fn round_accumulate(acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { - let secrets = - (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); - for (stripe, secret) in stripes.iter().zip(secrets) { - accumulate(acc, stripe, secret); + #[inline] + fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { + // Accumulation steps are run for the stripes in the last block, + // except for the last stripe (whether it is full or not) + let stripes = match block.bp_as_chunks() { + ([stripes @ .., _last], []) => stripes, + (stripes, _last) => stripes, + }; + let secrets = + (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); + + for (stripe, secret) in stripes.iter().zip(secrets) { + self.0.accumulate(acc, stripe, secret); + } + + let q = &secret[secret.len() - 71..]; + let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() }; + self.0.accumulate(acc, last_stripe, q); + } + + #[inline] + fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { + let secret_words = unsafe { + secret + .as_ptr() + .add(secret_offset) + .cast::<[u64; 8]>() + .read_unaligned() + }; + let mut result = init_value; + for i in 0..4 { + // 64-bit by 64-bit multiplication to 128-bit full result + let mul_result = { + let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128(); + let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); + a.wrapping_mul(b) + }; + result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); + } + avalanche(result) } } +trait Vector { + fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]); + + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); +} + // This module is not `cfg`-gated because it is used by some of the // SIMD implementations. mod scalar { use core::mem; - use super::{SliceBackport as _, PRIME32_1, IntoU64}; + use super::{IntoU64, SliceBackport as _, Vector, PRIME32_1}; - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - let last = secret - .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() - .unwrap(); - let (last, _) = last.bp_as_chunks(); - let last = last.iter().copied().map(u64::from_ne_bytes); - - for (acc, secret) in acc.iter_mut().zip(last) { - *acc ^= *acc >> 47; - *acc ^= secret; - *acc = acc.wrapping_mul(PRIME32_1); + pub struct Impl; + + impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { + let last = secret + .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() + .unwrap(); + let (last, _) = last.bp_as_chunks(); + let last = last.iter().copied().map(u64::from_ne_bytes); + + for (acc, secret) in acc.iter_mut().zip(last) { + *acc ^= *acc >> 47; + *acc ^= secret; + *acc = acc.wrapping_mul(PRIME32_1); + } } - } - #[inline] - #[allow(dead_code)] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - for i in 0..8 { - let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; - - let value = stripe ^ secret; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + for i in 0..8 { + let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; + let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + } } } @@ -379,251 +439,246 @@ mod scalar { } } -#[cfg(all( - not(all(target_feature = "neon", feature = "simd")), - not(all(target_feature = "avx2", feature = "simd")), -))] -use scalar as vector_impl; - -#[cfg(all(target_feature = "neon", feature = "simd"))] -mod neon { - use core::arch::aarch64::*; - - use super::{SliceBackport as _, PRIME32_1}; - - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { - unsafe { - let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); - let (acc, _) = acc.bp_as_chunks_mut::<2>(); - for (i, acc) in acc.iter_mut().enumerate() { - let mut accv = vld1q_u64(acc.as_ptr()); - let secret = vld1q_u64(secret_base.add(i * 2)); +// #[cfg(all( +// not(all(target_feature = "neon", feature = "simd")), +// not(all(target_feature = "avx2", feature = "simd")), +// ))] +// use scalar as vector_impl; + +// #[cfg(all(target_feature = "neon", feature = "simd"))] +// mod neon { +// use core::arch::aarch64::*; + +// use super::{SliceBackport as _, PRIME32_1}; + +// #[inline] +// pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { +// unsafe { +// let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); +// let (acc, _) = acc.bp_as_chunks_mut::<2>(); +// for (i, acc) in acc.iter_mut().enumerate() { +// let mut accv = vld1q_u64(acc.as_ptr()); +// let secret = vld1q_u64(secret_base.add(i * 2)); + +// // tmp[i] = acc[i] >> 47 +// let shifted = vshrq_n_u64::<47>(accv); + +// // acc[i] ^= tmp[i] +// accv = veorq_u64(accv, shifted); + +// // acc[i] ^= secret[i] +// accv = veorq_u64(accv, secret); + +// // acc[i] *= PRIME32_1 +// accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); + +// vst1q_u64(acc.as_mut_ptr(), accv); +// } +// } +// } + +// // We process 4x u64 at a time as that allows us to completely +// // fill a `uint64x2_t` with useful values when performing the +// // multiplication. +// #[inline] +// pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { +// let (acc2, _) = acc.bp_as_chunks_mut::<4>(); +// for (i, acc) in acc2.into_iter().enumerate() { +// unsafe { +// let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); +// let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); +// let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); +// let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); +// let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); +// let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); + +// // stripe_rot[i ^ 1] = stripe[i]; +// let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); +// let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); + +// // value[i] = stripe[i] ^ secret[i]; +// let value_0 = veorq_u64(stripe_0, secret_0); +// let value_1 = veorq_u64(stripe_1, secret_1); + +// // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] +// // +// // Each vector has 64-bit values, but we treat them as +// // 32-bit and then unzip them. This naturally splits +// // the upper and lower 32 bits. +// let parts_0 = vreinterpretq_u32_u64(value_0); +// let parts_1 = vreinterpretq_u32_u64(value_1); + +// let hi = vuzp1q_u32(parts_0, parts_1); +// let lo = vuzp2q_u32(parts_0, parts_1); + +// let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); +// let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); + +// reordering_barrier(sum_0); +// reordering_barrier(sum_1); + +// // acc[i] += sum[i] +// accv_0 = vaddq_u64(accv_0, sum_0); +// accv_1 = vaddq_u64(accv_1, sum_1); + +// vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); +// vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); +// }; +// } +// } + +// // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the +// // lower 64 bits of the result) operation, so we have to make our +// // own out of 32-bit operations . We can simplify by realizing +// // that we are always multiplying by a 32-bit number. +// // +// // The basic algorithm is traditional long multiplication. `[]` +// // denotes groups of 32 bits. +// // +// // [AAAA][BBBB] +// // x [CCCC] +// // -------------------- +// // [BCBC][BCBC] +// // + [ACAC][ACAC] +// // -------------------- +// // [ACBC][BCBC] // 64-bit truncation occurs +// // +// // This can be written in NEON as a vectorwise wrapping +// // multiplication of the high-order chunk of the input (`A`) +// // against the constant and then a multiply-widen-and-accumulate +// // of the low-order chunk of the input and the constant: +// // +// // 1. High-order, vectorwise +// // +// // [AAAA][BBBB] +// // x [CCCC][0000] +// // -------------------- +// // [ACAC][0000] +// // +// // 2. Low-order, widening +// // +// // [BBBB] +// // x [CCCC] // widening +// // -------------------- +// // [BCBC][BCBC] +// // +// // 3. Accumulation +// // +// // [ACAC][0000] +// // + [BCBC][BCBC] // vectorwise +// // -------------------- +// // [ACBC][BCBC] +// // +// // Thankfully, NEON has a single multiply-widen-and-accumulate +// // operation. +// #[inline] +// pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { +// unsafe { +// let input_as_u32 = vreinterpretq_u32_u64(input); +// let factor = vmov_n_u32(og_factor); +// let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); +// let factor_striped = vreinterpretq_u32_u64(factor_striped); + +// let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); +// let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); + +// let input_lo = vmovn_u64(input); +// vmlal_u32(high_shifted, input_lo, factor) +// } +// } + +// // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 +// #[inline] +// fn reordering_barrier(r: uint64x2_t) { +// unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } +// } +// } + +// #[cfg(all(target_feature = "neon", feature = "simd"))] +// use neon as vector_impl; + +#[cfg(all(target_arch = "x86_64", feature = "simd"))] +mod avx2 { + use core::arch::x86_64::*; - // tmp[i] = acc[i] >> 47 - let shifted = vshrq_n_u64::<47>(accv); + use super::Vector; - // acc[i] ^= tmp[i] - accv = veorq_u64(accv, shifted); + #[cfg(target_feature = "avx2")] + pub unsafe fn oneshot(secret: &[u8], input: &[u8]) -> u64 { + unsafe { oneshot_unchecked(secret, input) } + } - // acc[i] ^= secret[i] - accv = veorq_u64(accv, secret); + #[target_feature(enable = "avx2")] + pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { + unsafe { super::Algorithm(Impl::new_unchecked()) }.do_it(secret, input) + } - // acc[i] *= PRIME32_1 - accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); + pub struct Impl(super::scalar::Impl); - vst1q_u64(acc.as_mut_ptr(), accv); - } + impl Impl { + #[cfg(target_feature = "avx2")] + pub fn new() -> Self { + unsafe { Self::new_unchecked() } } - } - // We process 4x u64 at a time as that allows us to completely - // fill a `uint64x2_t` with useful values when performing the - // multiplication. - #[inline] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - let (acc2, _) = acc.bp_as_chunks_mut::<4>(); - for (i, acc) in acc2.into_iter().enumerate() { - unsafe { - let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); - let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); - let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); - let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); - let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); - let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); - - // stripe_rot[i ^ 1] = stripe[i]; - let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); - let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); - - // value[i] = stripe[i] ^ secret[i]; - let value_0 = veorq_u64(stripe_0, secret_0); - let value_1 = veorq_u64(stripe_1, secret_1); - - // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] - // - // Each vector has 64-bit values, but we treat them as - // 32-bit and then unzip them. This naturally splits - // the upper and lower 32 bits. - let parts_0 = vreinterpretq_u32_u64(value_0); - let parts_1 = vreinterpretq_u32_u64(value_1); - - let hi = vuzp1q_u32(parts_0, parts_1); - let lo = vuzp2q_u32(parts_0, parts_1); - - let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); - let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); - - reordering_barrier(sum_0); - reordering_barrier(sum_1); - - // acc[i] += sum[i] - accv_0 = vaddq_u64(accv_0, sum_0); - accv_1 = vaddq_u64(accv_1, sum_1); - - vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); - vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); - }; + /// # Safety + /// You must ensure that the CPU has the AVX2 feature + pub unsafe fn new_unchecked() -> Impl { + Impl(super::scalar::Impl) } } - // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the - // lower 64 bits of the result) operation, so we have to make our - // own out of 32-bit operations . We can simplify by realizing - // that we are always multiplying by a 32-bit number. - // - // The basic algorithm is traditional long multiplication. `[]` - // denotes groups of 32 bits. - // - // [AAAA][BBBB] - // x [CCCC] - // -------------------- - // [BCBC][BCBC] - // + [ACAC][ACAC] - // -------------------- - // [ACBC][BCBC] // 64-bit truncation occurs - // - // This can be written in NEON as a vectorwise wrapping - // multiplication of the high-order chunk of the input (`A`) - // against the constant and then a multiply-widen-and-accumulate - // of the low-order chunk of the input and the constant: - // - // 1. High-order, vectorwise - // - // [AAAA][BBBB] - // x [CCCC][0000] - // -------------------- - // [ACAC][0000] - // - // 2. Low-order, widening - // - // [BBBB] - // x [CCCC] // widening - // -------------------- - // [BCBC][BCBC] - // - // 3. Accumulation - // - // [ACAC][0000] - // + [BCBC][BCBC] // vectorwise - // -------------------- - // [ACBC][BCBC] - // - // Thankfully, NEON has a single multiply-widen-and-accumulate - // operation. - #[inline] - pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { - unsafe { - let input_as_u32 = vreinterpretq_u32_u64(input); - let factor = vmov_n_u32(og_factor); - let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); - let factor_striped = vreinterpretq_u32_u64(factor_striped); - - let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); - let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); + impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { + // The scalar implementation is autovectorized nicely enough + self.0.round_scramble(acc, secret) + } - let input_lo = vmovn_u64(input); - vmlal_u32(high_shifted, input_lo, factor) + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + // SAFETY: Type can only be constructed when AVX2 feature is present + unsafe { accumulate_avx2(acc, stripe, secret) } } } - // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 #[inline] - fn reordering_barrier(r: uint64x2_t) { - unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } - } -} - -#[cfg(all(target_feature = "neon", feature = "simd"))] -use neon as vector_impl; - -#[cfg(all(target_feature = "avx2", feature = "simd"))] -mod avx2 { - use core::arch::x86_64::*; - - // The scalar implementation is autovectorized nicely enough - pub use super::scalar::round_scramble; - - #[inline] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + #[target_feature(enable = "avx2")] + unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { for i in 0..2 { - unsafe { - // todo: align the accumulator and avoid the unaligned load and store - let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast()); - let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::().add(4 * i).cast()); - let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::().add(4 * i).cast()); + // todo: align the accumulator and avoid the unaligned load and store + let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast()); + let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::().add(4 * i).cast()); + let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::().add(4 * i).cast()); - // let value[i] = stripe[i] ^ secret[i]; - let value_0 = _mm256_xor_si256(stripe_0, secret_0); + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm256_xor_si256(stripe_0, secret_0); - // TODO: "rotate" is not quite correct - // stripe_rot[i] = stripe[i ^ 1] - let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0); + // TODO: "rotate" is not quite correct + // stripe_rot[i] = stripe[i ^ 1] + let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0); - // acc[i] += stripe_rot[i] - acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0); + // acc[i] += stripe_rot[i] + acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0); - // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i]) - let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0); + // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i]) + let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0); - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) - let product_0 = _mm256_mul_epu32(value_0, value_swap_0); + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) + let product_0 = _mm256_mul_epu32(value_0, value_swap_0); - // acc[i] += product[i] - acc_0 = _mm256_add_epi64(acc_0, product_0); + // acc[i] += product[i] + acc_0 = _mm256_add_epi64(acc_0, product_0); - _mm256_storeu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast(), acc_0); - } + _mm256_storeu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast(), acc_0); } } } -#[cfg(all(target_feature = "avx2", feature = "simd"))] -use avx2 as vector_impl; - -use vector_impl::{accumulate, round_scramble}; - -#[inline] -fn last_round(acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { - // Accumulation steps are run for the stripes in the last block, - // except for the last stripe (whether it is full or not) - let stripes = match block.bp_as_chunks() { - ([stripes @ .., _last], []) => stripes, - (stripes, _last) => stripes, - }; - let secrets = - (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); - - for (stripe, secret) in stripes.iter().zip(secrets) { - accumulate(acc, stripe, secret); - } - - let q = &secret[secret.len() - 71..]; - let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() }; - accumulate(acc, last_stripe, q); -} +// #[cfg(all(target_feature = "avx2", feature = "simd"))] +// use avx2 as vector_impl; -#[inline] -fn final_merge(acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { - let secret_words = unsafe { - secret - .as_ptr() - .add(secret_offset) - .cast::<[u64; 8]>() - .read_unaligned() - }; - let mut result = init_value; - for i in 0..4 { - // 64-bit by 64-bit multiplication to 128-bit full result - let mul_result = { - let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128(); - let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); - a.wrapping_mul(b) - }; - result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); - } - avalanche(result) -} +// use vector_impl::{accumulate, round_scramble}; #[inline] fn avalanche(mut x: u64) -> u64 { From ffb2e32db8c9349018e96fe9d078cfa66d128e5a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 23 Jul 2024 15:42:09 -0400 Subject: [PATCH 082/166] Add detect --- Cargo.toml | 4 ++- compare/Cargo.toml | 2 +- src/lib.rs | 2 +- src/xxhash3_64.rs | 67 +++++++++++++++++++++++++++------------------- 4 files changed, 44 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6c660495d..784008721 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd", "std"] random = ["dep:rand"] @@ -24,6 +24,8 @@ xxhash3_64 = [] simd = [] +std = [] + [dependencies] rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } diff --git a/compare/Cargo.toml b/compare/Cargo.toml index 084a5b2b3..db575eb92 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -17,5 +17,5 @@ criterion = "0.5.1" proptest = "1.5.0" rand = "0.8.5" twox-hash = "1.6.3" -xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64"] } +xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] } xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/src/lib.rs b/src/lib.rs index 597fb5d48..2a6b24eb7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,9 +64,9 @@ //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` -#![no_std] #![deny(rust_2018_idioms)] #![deny(missing_docs)] +#![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] #[cfg(any(doc, test))] diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 992ca7d40..01180a4df 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -251,13 +251,14 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { - unsafe { avx2::oneshot_unchecked(secret, input) } +// unsafe { avx2::oneshot_unchecked(secret, input) } + x86_64_detect::oneshot(secret, input) } struct Algorithm(V); impl Algorithm { - fn do_it(&self, secret: &[u8], input: &[u8]) -> u64 { + fn oneshot(&self, secret: &[u8], input: &[u8]) -> u64 { let mut acc = INITIAL_ACCUMULATORS; assert!(secret.len() >= SECRET_MINIMUM_LENGTH); @@ -267,16 +268,15 @@ impl Algorithm { let block_size = 64 * stripes_per_block; let mut blocks = input.chunks_exact(block_size); - let last_block = - if blocks.remainder().is_empty() { - // SAFETY: We know that `input` is non-empty, which means - // that either there will be a remainder or one or more - // full blocks. That info isn't flowing to the optimizer, - // so we use `unwrap_unchecked`. - unsafe { blocks.next_back().unwrap_unchecked() } - } else { - blocks.remainder() - }; + let last_block = if blocks.remainder().is_empty() { + // SAFETY: We know that `input` is non-empty, which means + // that either there will be a remainder or one or more + // full blocks. That info isn't flowing to the optimizer, + // so we use `unwrap_unchecked`. + unsafe { blocks.next_back().unwrap_unchecked() } + } else { + blocks.remainder() + }; let last_stripe: &[u8; 64] = unsafe { &*input @@ -318,7 +318,6 @@ impl Algorithm { } } - #[inline] fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { // Accumulation steps are run for the stripes in the last block, @@ -340,7 +339,13 @@ impl Algorithm { } #[inline] - fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &[u8], secret_offset: usize) -> u64 { + fn final_merge( + &self, + acc: &mut [u64; 8], + init_value: u64, + secret: &[u8], + secret_offset: usize, + ) -> u64 { let secret_words = unsafe { secret .as_ptr() @@ -373,6 +378,11 @@ trait Vector { mod scalar { use core::mem; + #[inline] + pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { + super::Algorithm(Impl).oneshot(secret, input) + } + use super::{IntoU64, SliceBackport as _, Vector, PRIME32_1}; pub struct Impl; @@ -439,12 +449,6 @@ mod scalar { } } -// #[cfg(all( -// not(all(target_feature = "neon", feature = "simd")), -// not(all(target_feature = "avx2", feature = "simd")), -// ))] -// use scalar as vector_impl; - // #[cfg(all(target_feature = "neon", feature = "simd"))] // mod neon { // use core::arch::aarch64::*; @@ -594,23 +598,22 @@ mod scalar { // } // } -// #[cfg(all(target_feature = "neon", feature = "simd"))] -// use neon as vector_impl; - #[cfg(all(target_arch = "x86_64", feature = "simd"))] mod avx2 { use core::arch::x86_64::*; use super::Vector; + #[inline] #[cfg(target_feature = "avx2")] - pub unsafe fn oneshot(secret: &[u8], input: &[u8]) -> u64 { + pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { unsafe { oneshot_unchecked(secret, input) } } + #[inline] #[target_feature(enable = "avx2")] pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { - unsafe { super::Algorithm(Impl::new_unchecked()) }.do_it(secret, input) + unsafe { super::Algorithm(Impl::new_unchecked()) }.oneshot(secret, input) } pub struct Impl(super::scalar::Impl); @@ -675,10 +678,18 @@ mod avx2 { } } -// #[cfg(all(target_feature = "avx2", feature = "simd"))] -// use avx2 as vector_impl; +#[cfg(all(target_arch = "x86_64", feature = "std"))] +mod x86_64_detect { + pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { -// use vector_impl::{accumulate, round_scramble}; + #[cfg(feature = "simd")] + if is_x86_feature_detected!("avx2") { + return unsafe { super::avx2::oneshot_unchecked(secret, input) } + } + + super::scalar::oneshot(secret, input) + } +} #[inline] fn avalanche(mut x: u64) -> u64 { From c00c286b2e55d43801ff05efc58704939f3bcdbb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 23 Jul 2024 18:10:29 -0400 Subject: [PATCH 083/166] Move neon to trait impl --- compare/benches/benchmark.rs | 21 +- src/xxhash3_64.rs | 380 ++++++++++++++++++++--------------- xx_hash-sys/build.rs | 48 ++++- xx_hash-sys/src/lib.rs | 324 ++++++++++++++++++----------- 4 files changed, 478 insertions(+), 295 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index ba2318b68..eddd860c4 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -184,13 +184,24 @@ mod xxhash3_64 { let id = format!("impl-c-scalar/size-{size:07}"); g.bench_function(id, |b| { - b.iter(|| c::ScalarXxHash3_64::oneshot_with_seed(seed, data)) + b.iter(|| c::scalar::XxHash3_64::oneshot_with_seed(seed, data)) }); - let id = format!("impl-c-avx2/size-{size:07}"); - g.bench_function(id, |b| { - b.iter(|| c::Avx2XxHash3_64::oneshot_with_seed(seed, data)) - }); + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::neon::XxHash3_64::oneshot_with_seed(seed, data)) + }); + } + + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::avx2::XxHash3_64::oneshot_with_seed(seed, data)) + }); + } let id = format!("impl-rust/size-{size:07}"); g.bench_function(id, |b| { diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 01180a4df..6b395fb6c 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -251,8 +251,7 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { -// unsafe { avx2::oneshot_unchecked(secret, input) } - x86_64_detect::oneshot(secret, input) + detect::oneshot(secret, input) } struct Algorithm(V); @@ -383,7 +382,7 @@ mod scalar { super::Algorithm(Impl).oneshot(secret, input) } - use super::{IntoU64, SliceBackport as _, Vector, PRIME32_1}; + use super::{SliceBackport as _, Vector, PRIME32_1}; pub struct Impl; @@ -419,6 +418,8 @@ mod scalar { #[inline] #[cfg(not(target_arch = "aarch64"))] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + use super::IntoU64; + let lhs = (lhs as u32).into_u64(); let rhs = (rhs as u32).into_u64(); @@ -449,181 +450,216 @@ mod scalar { } } -// #[cfg(all(target_feature = "neon", feature = "simd"))] -// mod neon { -// use core::arch::aarch64::*; - -// use super::{SliceBackport as _, PRIME32_1}; - -// #[inline] -// pub fn round_scramble(acc: &mut [u64; 8], secret: &[u8]) { -// unsafe { -// let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); -// let (acc, _) = acc.bp_as_chunks_mut::<2>(); -// for (i, acc) in acc.iter_mut().enumerate() { -// let mut accv = vld1q_u64(acc.as_ptr()); -// let secret = vld1q_u64(secret_base.add(i * 2)); - -// // tmp[i] = acc[i] >> 47 -// let shifted = vshrq_n_u64::<47>(accv); - -// // acc[i] ^= tmp[i] -// accv = veorq_u64(accv, shifted); - -// // acc[i] ^= secret[i] -// accv = veorq_u64(accv, secret); - -// // acc[i] *= PRIME32_1 -// accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); - -// vst1q_u64(acc.as_mut_ptr(), accv); -// } -// } -// } - -// // We process 4x u64 at a time as that allows us to completely -// // fill a `uint64x2_t` with useful values when performing the -// // multiplication. -// #[inline] -// pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { -// let (acc2, _) = acc.bp_as_chunks_mut::<4>(); -// for (i, acc) in acc2.into_iter().enumerate() { -// unsafe { -// let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); -// let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); -// let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); -// let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); -// let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); -// let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); - -// // stripe_rot[i ^ 1] = stripe[i]; -// let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); -// let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); - -// // value[i] = stripe[i] ^ secret[i]; -// let value_0 = veorq_u64(stripe_0, secret_0); -// let value_1 = veorq_u64(stripe_1, secret_1); - -// // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] -// // -// // Each vector has 64-bit values, but we treat them as -// // 32-bit and then unzip them. This naturally splits -// // the upper and lower 32 bits. -// let parts_0 = vreinterpretq_u32_u64(value_0); -// let parts_1 = vreinterpretq_u32_u64(value_1); - -// let hi = vuzp1q_u32(parts_0, parts_1); -// let lo = vuzp2q_u32(parts_0, parts_1); - -// let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); -// let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); - -// reordering_barrier(sum_0); -// reordering_barrier(sum_1); - -// // acc[i] += sum[i] -// accv_0 = vaddq_u64(accv_0, sum_0); -// accv_1 = vaddq_u64(accv_1, sum_1); - -// vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); -// vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); -// }; -// } -// } - -// // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the -// // lower 64 bits of the result) operation, so we have to make our -// // own out of 32-bit operations . We can simplify by realizing -// // that we are always multiplying by a 32-bit number. -// // -// // The basic algorithm is traditional long multiplication. `[]` -// // denotes groups of 32 bits. -// // -// // [AAAA][BBBB] -// // x [CCCC] -// // -------------------- -// // [BCBC][BCBC] -// // + [ACAC][ACAC] -// // -------------------- -// // [ACBC][BCBC] // 64-bit truncation occurs -// // -// // This can be written in NEON as a vectorwise wrapping -// // multiplication of the high-order chunk of the input (`A`) -// // against the constant and then a multiply-widen-and-accumulate -// // of the low-order chunk of the input and the constant: -// // -// // 1. High-order, vectorwise -// // -// // [AAAA][BBBB] -// // x [CCCC][0000] -// // -------------------- -// // [ACAC][0000] -// // -// // 2. Low-order, widening -// // -// // [BBBB] -// // x [CCCC] // widening -// // -------------------- -// // [BCBC][BCBC] -// // -// // 3. Accumulation -// // -// // [ACAC][0000] -// // + [BCBC][BCBC] // vectorwise -// // -------------------- -// // [ACBC][BCBC] -// // -// // Thankfully, NEON has a single multiply-widen-and-accumulate -// // operation. -// #[inline] -// pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { -// unsafe { -// let input_as_u32 = vreinterpretq_u32_u64(input); -// let factor = vmov_n_u32(og_factor); -// let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); -// let factor_striped = vreinterpretq_u32_u64(factor_striped); - -// let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); -// let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); - -// let input_lo = vmovn_u64(input); -// vmlal_u32(high_shifted, input_lo, factor) -// } -// } - -// // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 -// #[inline] -// fn reordering_barrier(r: uint64x2_t) { -// unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } -// } -// } +#[cfg(all(target_arch = "aarch64", feature = "simd"))] +mod neon { + use core::arch::aarch64::*; -#[cfg(all(target_arch = "x86_64", feature = "simd"))] -mod avx2 { - use core::arch::x86_64::*; + use super::{SliceBackport as _, Vector, PRIME32_1}; - use super::Vector; + /// # Safety + /// You must ensure that the CPU has the NEON feature + #[inline] + #[target_feature(enable = "neon")] + pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { + super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) + } + + struct Impl(()); + + impl Impl { + /// # Safety + /// You must ensure that the CPU has the NEON feature + unsafe fn new_unchecked() -> Self { + Self(()) + } + } + + impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { + unsafe { round_scramble_neon(acc, secret) } + } + + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + unsafe { accumulate_neon(acc, stripe, secret) } + } + } + + #[inline] + #[target_feature(enable = "neon")] + unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret: &[u8]) { + unsafe { + let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); + let (acc, _) = acc.bp_as_chunks_mut::<2>(); + for (i, acc) in acc.iter_mut().enumerate() { + let mut accv = vld1q_u64(acc.as_ptr()); + let secret = vld1q_u64(secret_base.add(i * 2)); + // tmp[i] = acc[i] >> 47 + let shifted = vshrq_n_u64::<47>(accv); + + // acc[i] ^= tmp[i] + accv = veorq_u64(accv, shifted); + + // acc[i] ^= secret[i] + accv = veorq_u64(accv, secret); + + // acc[i] *= PRIME32_1 + accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); + + vst1q_u64(acc.as_mut_ptr(), accv); + } + } + } + + // We process 4x u64 at a time as that allows us to completely + // fill a `uint64x2_t` with useful values when performing the + // multiplication. + #[target_feature(enable = "neon")] #[inline] - #[cfg(target_feature = "avx2")] + unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let (acc2, _) = acc.bp_as_chunks_mut::<4>(); + for (i, acc) in acc2.into_iter().enumerate() { + unsafe { + let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); + let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); + let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); + let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); + let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); + let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); + + // stripe_rot[i ^ 1] = stripe[i]; + let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); + let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); + + // value[i] = stripe[i] ^ secret[i]; + let value_0 = veorq_u64(stripe_0, secret_0); + let value_1 = veorq_u64(stripe_1, secret_1); + + // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] + // + // Each vector has 64-bit values, but we treat them as + // 32-bit and then unzip them. This naturally splits + // the upper and lower 32 bits. + let parts_0 = vreinterpretq_u32_u64(value_0); + let parts_1 = vreinterpretq_u32_u64(value_1); + + let hi = vuzp1q_u32(parts_0, parts_1); + let lo = vuzp2q_u32(parts_0, parts_1); + + let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); + let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); + + reordering_barrier(sum_0); + reordering_barrier(sum_1); + + // acc[i] += sum[i] + accv_0 = vaddq_u64(accv_0, sum_0); + accv_1 = vaddq_u64(accv_1, sum_1); + + vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); + vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); + }; + } + } + + // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the + // lower 64 bits of the result) operation, so we have to make our + // own out of 32-bit operations . We can simplify by realizing + // that we are always multiplying by a 32-bit number. + // + // The basic algorithm is traditional long multiplication. `[]` + // denotes groups of 32 bits. + // + // [AAAA][BBBB] + // x [CCCC] + // -------------------- + // [BCBC][BCBC] + // + [ACAC][ACAC] + // -------------------- + // [ACBC][BCBC] // 64-bit truncation occurs + // + // This can be written in NEON as a vectorwise wrapping + // multiplication of the high-order chunk of the input (`A`) + // against the constant and then a multiply-widen-and-accumulate + // of the low-order chunk of the input and the constant: + // + // 1. High-order, vectorwise + // + // [AAAA][BBBB] + // x [CCCC][0000] + // -------------------- + // [ACAC][0000] + // + // 2. Low-order, widening + // + // [BBBB] + // x [CCCC] // widening + // -------------------- + // [BCBC][BCBC] + // + // 3. Accumulation + // + // [ACAC][0000] + // + [BCBC][BCBC] // vectorwise + // -------------------- + // [ACBC][BCBC] + // + // Thankfully, NEON has a single multiply-widen-and-accumulate + // operation. + #[inline] + pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { + unsafe { + let input_as_u32 = vreinterpretq_u32_u64(input); + let factor = vmov_n_u32(og_factor); + let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); + let factor_striped = vreinterpretq_u32_u64(factor_striped); + + let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); + let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); + + let input_lo = vmovn_u64(input); + vmlal_u32(high_shifted, input_lo, factor) + } + } + + // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 + #[inline] + fn reordering_barrier(r: uint64x2_t) { + unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } + } +} + +#[cfg(all(target_arch = "aarch64", feature = "std"))] +mod aarch64_detect { pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - unsafe { oneshot_unchecked(secret, input) } + #[cfg(feature = "simd")] + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { super::neon::oneshot_unchecked(secret, input) }; + } + + super::scalar::oneshot(secret, input) } +} + +#[cfg(all(target_arch = "x86_64", feature = "simd"))] +mod avx2 { + use core::arch::x86_64::*; + use super::Vector; + + /// # Safety + /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { - unsafe { super::Algorithm(Impl::new_unchecked()) }.oneshot(secret, input) + super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } pub struct Impl(super::scalar::Impl); impl Impl { - #[cfg(target_feature = "avx2")] - pub fn new() -> Self { - unsafe { Self::new_unchecked() } - } - /// # Safety /// You must ensure that the CPU has the AVX2 feature pub unsafe fn new_unchecked() -> Impl { @@ -681,16 +717,28 @@ mod avx2 { #[cfg(all(target_arch = "x86_64", feature = "std"))] mod x86_64_detect { pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - #[cfg(feature = "simd")] - if is_x86_feature_detected!("avx2") { - return unsafe { super::avx2::oneshot_unchecked(secret, input) } + if std::arch::is_x86_feature_detected!("avx2") { + return unsafe { super::avx2::oneshot_unchecked(secret, input) }; } super::scalar::oneshot(secret, input) } } +mod detect { + pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { + #[cfg(all(target_arch = "aarch64", feature = "std"))] + return super::aarch64_detect::oneshot(secret, input); + + #[cfg(all(target_arch = "x86_64", feature = "std"))] + return super::x86_64_detect::oneshot(secret, input); + + #[allow(unreachable_code)] + super::scalar::oneshot(secret, input) + } +} + #[inline] fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs index 2edfce466..8aaf4aef5 100644 --- a/xx_hash-sys/build.rs +++ b/xx_hash-sys/build.rs @@ -1,7 +1,8 @@ -use std::{env, path::PathBuf}; +use std::{env, path::PathBuf, str::FromStr}; fn main() { - // TODO: CARGO_CFG_TARGET_FEATURE has `Some(adx,aes,avx,avx2,...` + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").expect("Need to know target architecture"); + let target_arch = target_arch.parse::().ok(); let base = env::var_os("CARGO_MANIFEST_DIR").unwrap(); let mut base: PathBuf = base.into(); @@ -20,13 +21,44 @@ fn main() { .define("XXH_NAMESPACE", "scalar_") .compile("xxhash_scalar"); - let mut avx2_build = build.clone(); - avx2_build - .flag("-march=x86-64-v3") - .define("XXH_VECTOR", "XXH_AVX2") - .define("XXH_NAMESPACE", "avx2_") - .compile("xxhash_avx2"); + match target_arch { + Some(Arch::Aarch64) => { + let mut neon_build = build.clone(); + neon_build + .define("XXH_VECTOR", "XXH_NEON") + .define("XXH_NAMESPACE", "neon_") + .compile("xxhash_neon"); + } + + Some(Arch::X86_64) => { + let mut avx2_build = build.clone(); + avx2_build + .flag("-march=x86-64-v3") + .define("XXH_VECTOR", "XXH_AVX2") + .define("XXH_NAMESPACE", "avx2_") + .compile("xxhash_avx2"); + } + + None => {} + } let native_build = build; native_build.compile("xxhash_native"); } + +enum Arch { + Aarch64, + X86_64, +} + +impl FromStr for Arch { + type Err = (); + + fn from_str(s: &str) -> Result { + Ok(match s { + "aarch64" => Self::Aarch64, + "x86_64" => Self::X86_64, + _ => return Err(()), + }) + } +} diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 8f10e9f6a..8ca1333a6 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -203,152 +203,244 @@ impl Drop for XxHash3_64 { // ---------- -extern "C" { - fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn scalar_XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn scalar_XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; - - fn scalar_XXH3_createState() -> *mut XXH3_state_t; - fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn scalar_XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; -} +pub mod scalar { + use super::*; + + extern "C" { + fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn scalar_XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; + fn scalar_XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; + + fn scalar_XXH3_createState() -> *mut XXH3_state_t; + fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn scalar_XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; + } -pub struct ScalarXxHash3_64(*mut XXH3_state_t); + pub struct XxHash3_64(*mut XXH3_state_t); -impl ScalarXxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) } - } + impl XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) } + } - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } - } + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - scalar_XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + scalar_XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } } - } - pub fn with_seed() -> Self { - let state = unsafe { - let state = scalar_XXH3_createState(); - scalar_XXH3_64bits_reset(state); - state - }; + pub fn with_seed() -> Self { + let state = unsafe { + let state = scalar_XXH3_createState(); + scalar_XXH3_64bits_reset(state); + state + }; - Self(state) - } + Self(state) + } - pub fn write(&mut self, data: &[u8]) { - let retval = unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } + pub fn write(&mut self, data: &[u8]) { + let retval = + unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } - pub fn finish(&mut self) -> u64 { - unsafe { scalar_XXH3_64bits_digest(self.0) } + pub fn finish(&mut self) -> u64 { + unsafe { scalar_XXH3_64bits_digest(self.0) } + } } -} -impl Drop for ScalarXxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { scalar_XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); + impl Drop for XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { scalar_XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); + } } } // ---------- -extern "C" { - fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn avx2_XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn avx2_XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; +pub mod neon { + use super::*; + + extern "C" { + fn neon_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn neon_XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; + fn neon_XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; + + fn neon_XXH3_createState() -> *mut XXH3_state_t; + fn neon_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn neon_XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn neon_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn neon_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; + } - fn avx2_XXH3_createState() -> *mut XXH3_state_t; - fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn avx2_XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; -} + pub struct XxHash3_64(*mut XXH3_state_t); -pub struct Avx2XxHash3_64(*mut XXH3_state_t); + impl XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { neon_XXH3_64bits(data.as_ptr().cast(), data.len()) } + } -impl Avx2XxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) } - } + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { neon_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + neon_XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } + } + + pub fn with_seed() -> Self { + let state = unsafe { + let state = neon_XXH3_createState(); + neon_XXH3_64bits_reset(state); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = + unsafe { neon_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { neon_XXH3_64bits_digest(self.0) } + } } - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - avx2_XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) + impl Drop for XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { neon_XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); } } +} - pub fn with_seed() -> Self { - let state = unsafe { - let state = avx2_XXH3_createState(); - avx2_XXH3_64bits_reset(state); - state - }; +// ---------- - Self(state) +#[cfg(target_arch = "x86_64")] +pub mod avx2 { + use super::*; + + extern "C" { + fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn avx2_XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; + fn avx2_XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; + + fn avx2_XXH3_createState() -> *mut XXH3_state_t; + fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn avx2_XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; } - pub fn write(&mut self, data: &[u8]) { - let retval = unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } + pub struct XxHash3_64(*mut XXH3_state_t); - pub fn finish(&mut self) -> u64 { - unsafe { avx2_XXH3_64bits_digest(self.0) } + impl XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) } + } + + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } + + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + avx2_XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } + } + + pub fn with_seed() -> Self { + let state = unsafe { + let state = avx2_XXH3_createState(); + avx2_XXH3_64bits_reset(state); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = + unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { avx2_XXH3_64bits_digest(self.0) } + } } -} -impl Drop for Avx2XxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { avx2_XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); + impl Drop for XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { avx2_XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); + } } } From a37289a5e5980b31210803f0ad2f653410537dbf Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 08:57:22 -0400 Subject: [PATCH 084/166] avx cleanup --- src/xxhash3_64.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 6b395fb6c..733122464 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -693,18 +693,17 @@ mod avx2 { // let value[i] = stripe[i] ^ secret[i]; let value_0 = _mm256_xor_si256(stripe_0, secret_0); - // TODO: "rotate" is not quite correct - // stripe_rot[i] = stripe[i ^ 1] - let stripe_rot_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0); + // stripe_swap[i] = stripe[i ^ 1] + let stripe_swap_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0); - // acc[i] += stripe_rot[i] - acc_0 = _mm256_add_epi64(acc_0, stripe_rot_0); + // acc[i] += stripe_swap[i] + acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); - // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i]) - let value_swap_0 = _mm256_shuffle_epi32::<0b10_11_00_01>(value_0); + // value_shift[i] = value[i] >> 32 + let value_shift_0 = _mm256_srli_epi64::<32>(value_0); - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) - let product_0 = _mm256_mul_epu32(value_0, value_swap_0); + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) + let product_0 = _mm256_mul_epu32(value_0, value_shift_0); // acc[i] += product[i] acc_0 = _mm256_add_epi64(acc_0, product_0); From 8e2a3593700db8b0645185d2982f42b14d56cf18 Mon Sep 17 00:00:00 2001 From: Dennis Duda Date: Wed, 24 Jul 2024 09:05:43 -0400 Subject: [PATCH 085/166] add sse2 implementation --- src/xxhash3_64.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 733122464..6568cfa14 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -713,12 +713,87 @@ mod avx2 { } } +mod sse2 { + use core::arch::x86_64::*; + + use super::Vector; + + /// # Safety + /// You must ensure that the CPU has the SSE2 feature + #[inline] + #[target_feature(enable = "sse2")] + pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { + super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) + } + + pub struct Impl(super::scalar::Impl); + + impl Impl { + /// # Safety + /// You must ensure that the CPU has the SSE2 feature + pub unsafe fn new_unchecked() -> Impl { + Impl(super::scalar::Impl) + } + } + + impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { + // The scalar implementation is autovectorized nicely enough + self.0.round_scramble(acc, secret) + } + + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + // SAFETY: Type can only be constructed when SSE2 feature is present + unsafe { accumulate_sse2(acc, stripe, secret) } + } + } + + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + for i in 0..4 { + // todo: align the accumulator and avoid the unaligned load and store + let mut acc_0 = _mm_loadu_si128(acc.as_mut_ptr().cast::().add(2 * i).cast()); + let stripe_0 = _mm_loadu_si128(stripe.as_ptr().cast::().add(2 * i).cast()); + let secret_0 = _mm_loadu_si128(secret.as_ptr().cast::().add(2 * i).cast()); + + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm_xor_si128(stripe_0, secret_0); + + // stripe_swap[i] = stripe[i ^ 1] + let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0); + + // acc[i] += stripe_swap[i] + acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); + + // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i]) + let value_swap_0 = _mm_shuffle_epi32::<0b10_11_00_01>(value_0); + + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) + let product_0 = _mm_mul_epu32(value_0, value_swap_0); + + // acc[i] += product[i] + acc_0 = _mm_add_epi64(acc_0, product_0); + + _mm_storeu_si128(acc.as_mut_ptr().cast::().add(2 * i).cast(), acc_0); + } + } +} + #[cfg(all(target_arch = "x86_64", feature = "std"))] mod x86_64_detect { pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { #[cfg(feature = "simd")] - if std::arch::is_x86_feature_detected!("avx2") { - return unsafe { super::avx2::oneshot_unchecked(secret, input) }; + { + if std::arch::is_x86_feature_detected!("avx2") { + return unsafe { super::avx2::oneshot_unchecked(secret, input) }; + } + + if std::arch::is_x86_feature_detected!("sse2") { + return unsafe { super::sse2::oneshot_unchecked(secret, input) }; + } } super::scalar::oneshot(secret, input) From 816e8ce853d7622f3a529f1e2225f469cd2e6abf Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 10:52:42 -0400 Subject: [PATCH 086/166] Add SSE2 C code variant --- compare/benches/benchmark.rs | 5 +++ xx_hash-sys/build.rs | 6 +++ xx_hash-sys/src/lib.rs | 80 ++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index eddd860c4..f9b765d9b 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -201,6 +201,11 @@ mod xxhash3_64 { g.bench_function(id, |b| { b.iter(|| c::avx2::XxHash3_64::oneshot_with_seed(seed, data)) }); + + let id = format!("impl-c-sse2/size-{size:07}"); + g.bench_function(id, |b| { + b.iter(|| c::sse2::XxHash3_64::oneshot_with_seed(seed, data)) + }); } let id = format!("impl-rust/size-{size:07}"); diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs index 8aaf4aef5..73056b693 100644 --- a/xx_hash-sys/build.rs +++ b/xx_hash-sys/build.rs @@ -37,6 +37,12 @@ fn main() { .define("XXH_VECTOR", "XXH_AVX2") .define("XXH_NAMESPACE", "avx2_") .compile("xxhash_avx2"); + + let mut sse2_build = build.clone(); + sse2_build + .define("XXH_VECTOR", "XXH_SSE2") + .define("XXH_NAMESPACE", "sse2_") + .compile("xxhash_sse2"); } None => {} diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 8ca1333a6..45bfb08e5 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -444,3 +444,83 @@ pub mod avx2 { } } } + +#[cfg(target_arch = "x86_64")] +pub mod sse2 { + use super::*; + + extern "C" { + fn sse2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; + fn sse2_XXH3_64bits_withSeed( + input: *const libc::c_void, + length: libc::size_t, + seed: XXH64_hash_t, + ) -> XXH64_hash_t; + fn sse2_XXH3_64bits_withSecret( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> XXH64_hash_t; + + fn sse2_XXH3_createState() -> *mut XXH3_state_t; + fn sse2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn sse2_XXH3_64bits_update( + state: *mut XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> XXH_errorcode; + fn sse2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; + fn sse2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; + } + + pub struct XxHash3_64(*mut XXH3_state_t); + + impl XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { sse2_XXH3_64bits(data.as_ptr().cast(), data.len()) } + } + + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { sse2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } + } + + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + sse2_XXH3_64bits_withSecret( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } + } + + pub fn with_seed() -> Self { + let state = unsafe { + let state = sse2_XXH3_createState(); + sse2_XXH3_64bits_reset(state); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = + unsafe { sse2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { sse2_XXH3_64bits_digest(self.0) } + } + } + + impl Drop for XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { sse2_XXH3_freeState(self.0) }; + assert_eq!(retval, XXH_OK); + } + } +} From f0b3ad4cb65c6ce4d662d891d1fbedcae5dedd05 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 16:37:51 -0400 Subject: [PATCH 087/166] A few more inlines for good measure --- src/xxhash3_64.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 6568cfa14..1aff25fa2 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -257,6 +257,7 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { struct Algorithm(V); impl Algorithm { + #[inline] fn oneshot(&self, secret: &[u8], input: &[u8]) -> u64 { let mut acc = INITIAL_ACCUMULATORS; @@ -469,6 +470,7 @@ mod neon { impl Impl { /// # Safety /// You must ensure that the CPU has the NEON feature + #[inline] unsafe fn new_unchecked() -> Self { Self(()) } @@ -633,6 +635,7 @@ mod neon { #[cfg(all(target_arch = "aarch64", feature = "std"))] mod aarch64_detect { + #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { #[cfg(feature = "simd")] if std::arch::is_aarch64_feature_detected!("neon") { @@ -662,6 +665,7 @@ mod avx2 { impl Impl { /// # Safety /// You must ensure that the CPU has the AVX2 feature + #[inline] pub unsafe fn new_unchecked() -> Impl { Impl(super::scalar::Impl) } @@ -731,6 +735,7 @@ mod sse2 { impl Impl { /// # Safety /// You must ensure that the CPU has the SSE2 feature + #[inline] pub unsafe fn new_unchecked() -> Impl { Impl(super::scalar::Impl) } @@ -784,6 +789,7 @@ mod sse2 { #[cfg(all(target_arch = "x86_64", feature = "std"))] mod x86_64_detect { + #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { #[cfg(feature = "simd")] { @@ -801,6 +807,7 @@ mod x86_64_detect { } mod detect { + #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { #[cfg(all(target_arch = "aarch64", feature = "std"))] return super::aarch64_detect::oneshot(secret, input); From 8b5b56d498e0b601769119a20abfdae98beb2b01 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 16:37:55 -0400 Subject: [PATCH 088/166] Simplify and cross-pollinate the AVX2 and SSE2 implementations --- src/xxhash3_64.rs | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 1aff25fa2..f3642a6a7 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -688,17 +688,21 @@ mod avx2 { #[inline] #[target_feature(enable = "avx2")] unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let acc = acc.as_mut_ptr().cast::<__m256i>(); + let stripe = stripe.as_ptr().cast::<__m256i>(); + let secret = secret.as_ptr().cast::<__m256i>(); + for i in 0..2 { // todo: align the accumulator and avoid the unaligned load and store - let mut acc_0 = _mm256_loadu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast()); - let stripe_0 = _mm256_loadu_si256(stripe.as_ptr().cast::().add(4 * i).cast()); - let secret_0 = _mm256_loadu_si256(secret.as_ptr().cast::().add(4 * i).cast()); + let mut acc_0 = _mm256_loadu_si256(acc.add(i)); + let stripe_0 = _mm256_loadu_si256(stripe.add(i)); + let secret_0 = _mm256_loadu_si256(secret.add(i)); // let value[i] = stripe[i] ^ secret[i]; let value_0 = _mm256_xor_si256(stripe_0, secret_0); // stripe_swap[i] = stripe[i ^ 1] - let stripe_swap_0 = _mm256_permute4x64_epi64::<0b10_11_00_01>(stripe_0); + let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0); // acc[i] += stripe_swap[i] acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); @@ -712,7 +716,7 @@ mod avx2 { // acc[i] += product[i] acc_0 = _mm256_add_epi64(acc_0, product_0); - _mm256_storeu_si256(acc.as_mut_ptr().cast::().add(4 * i).cast(), acc_0); + _mm256_storeu_si256(acc.add(i), acc_0); } } } @@ -758,11 +762,15 @@ mod sse2 { #[inline] #[target_feature(enable = "sse2")] unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let acc = acc.as_mut_ptr().cast::<__m128i>(); + let stripe = stripe.as_ptr().cast::<__m128i>(); + let secret = secret.as_ptr().cast::<__m128i>(); + for i in 0..4 { // todo: align the accumulator and avoid the unaligned load and store - let mut acc_0 = _mm_loadu_si128(acc.as_mut_ptr().cast::().add(2 * i).cast()); - let stripe_0 = _mm_loadu_si128(stripe.as_ptr().cast::().add(2 * i).cast()); - let secret_0 = _mm_loadu_si128(secret.as_ptr().cast::().add(2 * i).cast()); + let mut acc_0 = _mm_loadu_si128(acc.add(i)); + let stripe_0 = _mm_loadu_si128(stripe.add(i)); + let secret_0 = _mm_loadu_si128(secret.add(i)); // let value[i] = stripe[i] ^ secret[i]; let value_0 = _mm_xor_si128(stripe_0, secret_0); @@ -773,16 +781,16 @@ mod sse2 { // acc[i] += stripe_swap[i] acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); - // value_swap[i] = swap_32_bit_pieces_in_64_bit_elements(value[i]) - let value_swap_0 = _mm_shuffle_epi32::<0b10_11_00_01>(value_0); + // value_shift[i] = value[i] >> 32 + let value_shift_0 = _mm_srli_epi64::<32>(value_0); - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_swap[i]) - let product_0 = _mm_mul_epu32(value_0, value_swap_0); + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) + let product_0 = _mm_mul_epu32(value_0, value_shift_0); // acc[i] += product[i] acc_0 = _mm_add_epi64(acc_0, product_0); - _mm_storeu_si128(acc.as_mut_ptr().cast::().add(2 * i).cast(), acc_0); + _mm_storeu_si128(acc.add(i), acc_0); } } } From 3ff0716fe6f77acbac403419f8cda6b656d465fa Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 16:49:44 -0400 Subject: [PATCH 089/166] Oops this is aarch64 only --- xx_hash-sys/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 45bfb08e5..14d132aa5 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -284,6 +284,7 @@ pub mod scalar { // ---------- +#[cfg(target_arch = "aarch64")] pub mod neon { use super::*; From e5f17792f65736b41afc296142999442483fdcf5 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 17:15:09 -0400 Subject: [PATCH 090/166] flag no go on msvc --- xx_hash-sys/build.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xx_hash-sys/build.rs b/xx_hash-sys/build.rs index 73056b693..d00bdc132 100644 --- a/xx_hash-sys/build.rs +++ b/xx_hash-sys/build.rs @@ -32,8 +32,16 @@ fn main() { Some(Arch::X86_64) => { let mut avx2_build = build.clone(); + + // TODO: check for msvc, not "windows" + if cfg!(target_os = "windows") { + // This seems to make the code slower + // avx2_build.flag("/arch:AVX2"); + } else { + avx2_build.flag("-march=x86-64-v3"); + } + avx2_build - .flag("-march=x86-64-v3") .define("XXH_VECTOR", "XXH_AVX2") .define("XXH_NAMESPACE", "avx2_") .compile("xxhash_avx2"); From 3b812a5b47df5814bb815096df20dfe761f0a38c Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 24 Jul 2024 20:20:16 -0400 Subject: [PATCH 091/166] Add cfg flags to select the implementation --- Cargo.toml | 13 ++++++++++--- README.md | 2 ++ compare/Cargo.toml | 5 ----- src/xxhash3_64.rs | 36 +++++++++++++++++++++++------------- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 784008721..53b8ee514 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "simd", "std"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] random = ["dep:rand"] @@ -22,10 +22,17 @@ xxhash32 = [] xxhash64 = [] xxhash3_64 = [] -simd = [] - std = [] +[lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = [ + 'cfg(_internal_xxhash3_force_scalar)', + 'cfg(_internal_xxhash3_force_neon)', + 'cfg(_internal_xxhash3_force_sse2)', + 'cfg(_internal_xxhash3_force_avx2)', +] + [dependencies] rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } diff --git a/README.md b/README.md index e3e37f9c9..ab987c651 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ cargo test -p comparison # proptests cargo miri test # unsafe cargo miri test --target s390x-unknown-linux-gnu # big-endian +cargo -Z profile-rustflags --config 'profile.test.package.xx-renu.rustflags=["--cfg=_internal_xxhash3_force_scalar"]' test + minimal versions no-features all-features diff --git a/compare/Cargo.toml b/compare/Cargo.toml index db575eb92..bbb8d0a59 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -3,11 +3,6 @@ name = "compare" version = "0.1.0" edition = "2021" -[features] -default = ["simd"] - -simd = ["xx-renu/simd"] - [[bench]] name = "benchmark" harness = false diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index f3642a6a7..f52c5d082 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -251,6 +251,19 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { + #[cfg(_internal_xxhash3_force_scalar)] + return scalar::oneshot(secret, input); + + #[cfg(_internal_xxhash3_force_neon)] + unsafe { return neon::oneshot_unchecked(secret, input) }; + + #[cfg(_internal_xxhash3_force_sse2)] + unsafe { return sse2::oneshot_unchecked(secret, input) }; + + #[cfg(_internal_xxhash3_force_avx2)] + unsafe { return avx2::oneshot_unchecked(secret, input) }; + + #[allow(unreachable_code)] detect::oneshot(secret, input) } @@ -451,7 +464,7 @@ mod scalar { } } -#[cfg(all(target_arch = "aarch64", feature = "simd"))] +#[cfg(target_arch = "aarch64")] mod neon { use core::arch::aarch64::*; @@ -637,7 +650,6 @@ mod neon { mod aarch64_detect { #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - #[cfg(feature = "simd")] if std::arch::is_aarch64_feature_detected!("neon") { return unsafe { super::neon::oneshot_unchecked(secret, input) }; } @@ -646,7 +658,7 @@ mod aarch64_detect { } } -#[cfg(all(target_arch = "x86_64", feature = "simd"))] +#[cfg(target_arch = "x86_64")] mod avx2 { use core::arch::x86_64::*; @@ -721,6 +733,7 @@ mod avx2 { } } +#[cfg(target_arch = "x86_64")] mod sse2 { use core::arch::x86_64::*; @@ -799,15 +812,12 @@ mod sse2 { mod x86_64_detect { #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - #[cfg(feature = "simd")] - { - if std::arch::is_x86_feature_detected!("avx2") { - return unsafe { super::avx2::oneshot_unchecked(secret, input) }; - } + if std::arch::is_x86_feature_detected!("avx2") { + return unsafe { super::avx2::oneshot_unchecked(secret, input) }; + } - if std::arch::is_x86_feature_detected!("sse2") { - return unsafe { super::sse2::oneshot_unchecked(secret, input) }; - } + if std::arch::is_x86_feature_detected!("sse2") { + return unsafe { super::sse2::oneshot_unchecked(secret, input) }; } super::scalar::oneshot(secret, input) @@ -884,7 +894,7 @@ impl Halves for u128 { trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); - #[cfg(all(target_arch = "aarch64", feature = "simd"))] + #[cfg(target_arch = "aarch64")] fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); @@ -899,7 +909,7 @@ impl SliceBackport for [T] { (head, tail) } - #[cfg(all(target_arch = "aarch64", feature = "simd"))] + #[cfg(target_arch = "aarch64")] fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { assert_ne!(N, 0); let len = self.len() / N; From 113e848dcb5d5d76f474a1c0d24f51f83140a713 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 25 Jul 2024 08:34:01 -0400 Subject: [PATCH 092/166] Add benchmark for small data --- compare/benches/benchmark.rs | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index f9b765d9b..b85f35547 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -169,6 +169,34 @@ fn half_sizes(max: usize) -> impl Iterator { mod xxhash3_64 { use super::*; + fn tiny_data(c: &mut Criterion) { + let (seed, data) = gen_data(TINY_DATA_SIZE); + let mut g = c.benchmark_group("xxhash3_64/tiny_data"); + + for size in 0..=data.len() { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("impl-c/fn-oneshot/size-{size:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = c::XxHash3_64::oneshot_with_seed(seed, data); + black_box(hash); + }) + }); + + let id = format!("impl-rust/fn-oneshot/size-{size:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let hash = rust::XxHash3_64::oneshot_with_seed(seed, data); + black_box(hash); + }) + }); + } + + g.finish(); + } + fn oneshot(c: &mut Criterion) { let (seed, data) = gen_data(BIG_DATA_SIZE); let mut g = c.benchmark_group("xxhash3_64/oneshot"); @@ -217,7 +245,7 @@ mod xxhash3_64 { g.finish(); } - criterion_group!(benches, oneshot); + criterion_group!(benches, tiny_data, oneshot); } criterion_group!(benches, tiny_data, oneshot, streaming); From cc1fc5a5466cd8ed440c5adcb6e9d3ab085a3485 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 25 Jul 2024 09:51:18 -0400 Subject: [PATCH 093/166] Force inling of the xxhash3_64 implementation This allows the code to be specialized for hard-coded secrets --- src/xxhash3_64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index f52c5d082..936c746c9 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -82,7 +82,7 @@ fn derive_secret(seed: u64) -> [u8; 192] { derived_secret } -#[inline] +#[inline(always)] fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 { match input.len() { 0 => impl_0_bytes(secret, seed), From 3ed151602cb6628f4368a1b371a18b020322bb77 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 25 Jul 2024 11:20:59 -0400 Subject: [PATCH 094/166] format --- src/xxhash3_64.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 936c746c9..8e42030b9 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -255,13 +255,19 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { return scalar::oneshot(secret, input); #[cfg(_internal_xxhash3_force_neon)] - unsafe { return neon::oneshot_unchecked(secret, input) }; + unsafe { + return neon::oneshot_unchecked(secret, input); + }; #[cfg(_internal_xxhash3_force_sse2)] - unsafe { return sse2::oneshot_unchecked(secret, input) }; + unsafe { + return sse2::oneshot_unchecked(secret, input); + }; #[cfg(_internal_xxhash3_force_avx2)] - unsafe { return avx2::oneshot_unchecked(secret, input) }; + unsafe { + return avx2::oneshot_unchecked(secret, input); + }; #[allow(unreachable_code)] detect::oneshot(secret, input) From f0b2cc27b1f65faad4eff4805e1722c8696155de Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 25 Jul 2024 14:04:05 -0400 Subject: [PATCH 095/166] manual unroll --- src/xxhash3_64.rs | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 8e42030b9..8509043cd 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -184,22 +184,49 @@ fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - let num_rounds = ((input.len() - 1) >> 5) + 1; - + let (secret, _) = secret.bp_as_chunks(); + let (secret, _) = secret.bp_as_chunks::<2>(); let (fwd, _) = input.bp_as_chunks(); let (_, bwd) = input.bp_as_rchunks(); - let fwd = fwd.iter(); - let bwd = bwd.iter().rev(); + let q = bwd.len(); + + if input.len() > 32 { + if input.len() > 64 { + if input.len() > 96 { + acc = acc.wrapping_add(mix_step_ff(&fwd[3], &secret[3][0], seed)); + acc = acc.wrapping_add(mix_step_ff(&bwd[q - 4], &secret[3][1], seed)); + } + + acc = acc.wrapping_add(mix_step_ff(&fwd[2], &secret[2][0], seed)); + acc = acc.wrapping_add(mix_step_ff(&bwd[q - 3], &secret[2][1], seed)); + } - for (i, (fwd_chunk, bwd_chunk)) in fwd.zip(bwd).enumerate().take(num_rounds) { - acc = acc.wrapping_add(mix_step(fwd_chunk, secret, i * 32, seed)); - acc = acc.wrapping_add(mix_step(bwd_chunk, secret, i * 32 + 16, seed)); + acc = acc.wrapping_add(mix_step_ff(&fwd[1], &secret[1][0], seed)); + acc = acc.wrapping_add(mix_step_ff(&bwd[q - 2], &secret[1][1], seed)); } + acc = acc.wrapping_add(mix_step_ff(&fwd[0], &secret[0][0], seed)); + acc = acc.wrapping_add(mix_step_ff(&bwd[q - 1], &secret[0][1], seed)); + avalanche(acc) } +#[inline] +fn mix_step_ff(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { + let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; + let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() }; + + let mul_result = { + let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); + let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); + + a.wrapping_mul(b) + }; + + mul_result.lower_half() ^ mul_result.upper_half() +} + #[inline] fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); From 1a73b5943634faafe685e5816fac8992ce8738a2 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 26 Jul 2024 07:58:14 -0400 Subject: [PATCH 096/166] retarget benches --- compare/benches/benchmark.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index b85f35547..26587bb3e 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -170,14 +170,20 @@ mod xxhash3_64 { use super::*; fn tiny_data(c: &mut Criterion) { - let (seed, data) = gen_data(TINY_DATA_SIZE); + let (seed, data) = gen_data(240); let mut g = c.benchmark_group("xxhash3_64/tiny_data"); - for size in 0..=data.len() { + // let categories = 0..=data.len(); + + // Visual inspection of all the data points showed these as + // examples of thier nearby neighbors. + let categories = [0, 2, 9, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230]; + + for size in categories { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("impl-c/fn-oneshot/size-{size:02}"); + let id = format!("impl-c/fn-oneshot/size-{size:03}"); g.bench_function(id, |b| { b.iter(|| { let hash = c::XxHash3_64::oneshot_with_seed(seed, data); @@ -185,7 +191,7 @@ mod xxhash3_64 { }) }); - let id = format!("impl-rust/fn-oneshot/size-{size:02}"); + let id = format!("impl-rust/fn-oneshot/size-{size:03}"); g.bench_function(id, |b| { b.iter(|| { let hash = rust::XxHash3_64::oneshot_with_seed(seed, data); From 6c8e6de199637fc666c0b15cdc28ef4323241d8e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 26 Jul 2024 07:58:36 -0400 Subject: [PATCH 097/166] extra --- src/xxhash3_64.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 8509043cd..8ec726d34 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -477,6 +477,7 @@ mod scalar { #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 // https://github.com/llvm/llvm-project/issues/98481 + // TODO: this is probably if NEON, yeah? #[cfg(target_arch = "aarch64")] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { use core::arch::asm; From 5f4a1d88a9dab8dea6a81291e5553fb9d232fed5 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 26 Jul 2024 11:53:32 -0400 Subject: [PATCH 098/166] reorder match Since more data takes longer to hash, prioritize it to even out the speeds. --- src/xxhash3_64.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 8ec726d34..129c2cf50 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -85,19 +85,19 @@ fn derive_secret(seed: u64) -> [u8; 192] { #[inline(always)] fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 { match input.len() { - 0 => impl_0_bytes(secret, seed), + 241.. => impl_241_plus_bytes(secret, input), - 1..=3 => impl_1_to_3_bytes(secret, seed, input), + 129..=240 => impl_129_to_240_bytes(secret, seed, input), - 4..=8 => impl_4_to_8_bytes(secret, seed, input), + 17..=128 => impl_17_to_128_bytes(secret, seed, input), 9..=16 => impl_9_to_16_bytes(secret, seed, input), - 17..=128 => impl_17_to_128_bytes(secret, seed, input), + 4..=8 => impl_4_to_8_bytes(secret, seed, input), - 129..=240 => impl_129_to_240_bytes(secret, seed, input), + 1..=3 => impl_1_to_3_bytes(secret, seed, input), - _ => impl_241_plus_bytes(secret, input), + 0 => impl_0_bytes(secret, seed), } } From 90060cc264663a17c425592ca9eac9436c256cb6 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 26 Jul 2024 11:54:26 -0400 Subject: [PATCH 099/166] use array mix_step everywhere --- src/xxhash3_64.rs | 60 ++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 129c2cf50..3095b4627 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -194,71 +194,57 @@ fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { if input.len() > 32 { if input.len() > 64 { if input.len() > 96 { - acc = acc.wrapping_add(mix_step_ff(&fwd[3], &secret[3][0], seed)); - acc = acc.wrapping_add(mix_step_ff(&bwd[q - 4], &secret[3][1], seed)); + acc = acc.wrapping_add(mix_step(&fwd[3], &secret[3][0], seed)); + acc = acc.wrapping_add(mix_step(&bwd[q - 4], &secret[3][1], seed)); } - acc = acc.wrapping_add(mix_step_ff(&fwd[2], &secret[2][0], seed)); - acc = acc.wrapping_add(mix_step_ff(&bwd[q - 3], &secret[2][1], seed)); + acc = acc.wrapping_add(mix_step(&fwd[2], &secret[2][0], seed)); + acc = acc.wrapping_add(mix_step(&bwd[q - 3], &secret[2][1], seed)); } - acc = acc.wrapping_add(mix_step_ff(&fwd[1], &secret[1][0], seed)); - acc = acc.wrapping_add(mix_step_ff(&bwd[q - 2], &secret[1][1], seed)); + acc = acc.wrapping_add(mix_step(&fwd[1], &secret[1][0], seed)); + acc = acc.wrapping_add(mix_step(&bwd[q - 2], &secret[1][1], seed)); } - acc = acc.wrapping_add(mix_step_ff(&fwd[0], &secret[0][0], seed)); - acc = acc.wrapping_add(mix_step_ff(&bwd[q - 1], &secret[0][1], seed)); + acc = acc.wrapping_add(mix_step(&fwd[0], &secret[0][0], seed)); + acc = acc.wrapping_add(mix_step(&bwd[q - 1], &secret[0][1], seed)); avalanche(acc) } -#[inline] -fn mix_step_ff(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { - let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; - let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() }; - - let mul_result = { - let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); - let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); - - a.wrapping_mul(b) - }; - - mul_result.lower_half() ^ mul_result.upper_half() -} - #[inline] fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - let (head, _tail) = input.bp_as_chunks(); + let (head, _) = input.bp_as_chunks(); + let last_chunk = input.last_chunk().unwrap(); let mut head = head.iter(); - for (i, chunk) in head.by_ref().take(8).enumerate() { - acc = acc.wrapping_add(mix_step(chunk, secret, i * 16, seed)); + let (ss, _) = secret.bp_as_chunks(); + let (ss2, _) = secret[3..].bp_as_chunks(); + + let qq = head.by_ref().zip(ss); + + for (chunk, s) in qq.take(8) { + acc = acc.wrapping_add(mix_step(chunk, s, seed)); } acc = avalanche(acc); - for (i, chunk) in head.enumerate() { - acc = acc.wrapping_add(mix_step(chunk, secret, i * 16 + 3, seed)); + for (chunk, s) in head.zip(ss2) { + acc = acc.wrapping_add(mix_step(chunk, s, seed)); } - acc = acc.wrapping_add(mix_step(input.last_chunk().unwrap(), secret, 119, seed)); + let ss3 = &secret[119..].first_chunk().unwrap(); + acc = acc.wrapping_add(mix_step(last_chunk, ss3, seed)); avalanche(acc) } #[inline] -fn mix_step(data: &[u8; 16], secret: &[u8], secret_offset: usize, seed: u64) -> u64 { +fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; - let secret_words = unsafe { - secret - .as_ptr() - .add(secret_offset) - .cast::<[u64; 2]>() - .read_unaligned() - }; + let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() }; let mul_result = { let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); From 88a738e86e4d2b7df17c64c48b35a6fe83444248 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 12 Aug 2024 13:18:47 -0400 Subject: [PATCH 100/166] Add a streaming implementation for XxHash3_64 --- Cargo.toml | 5 +- compare/benches/benchmark.rs | 66 +++- compare/src/lib.rs | 130 +++---- src/lib.rs | 5 +- src/xxhash3_64.rs | 727 ++++++++++++++++++++++++++++++++--- xx_hash-sys/src/lib.rs | 49 ++- 6 files changed, 846 insertions(+), 136 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 53b8ee514..5183cadfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "alloc", "std"] random = ["dep:rand"] @@ -22,7 +22,8 @@ xxhash32 = [] xxhash64 = [] xxhash3_64 = [] -std = [] +alloc = [] +std = ["alloc"] [lints.rust.unexpected_cfgs] level = "warn" diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 26587bb3e..b009f0e72 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -251,7 +251,71 @@ mod xxhash3_64 { g.finish(); } - criterion_group!(benches, tiny_data, oneshot); + fn streaming(c: &mut Criterion) { + let mut g = c.benchmark_group("xxhash3_64/streaming_many_chunks"); + + for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { + for n_chunks in half_sizes(MAX_CHUNKS) { + let (seed, chunks) = gen_chunked_data(size, n_chunks); + g.throughput(Throughput::Bytes(size as _)); + + let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + + let id = format!("impl-c-scalar/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::scalar::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::neon::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + } + + let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = rust::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + } + } + + g.finish(); + } + + criterion_group!(benches, tiny_data, oneshot, streaming); } criterion_group!(benches, tiny_data, oneshot, streaming); diff --git a/compare/src/lib.rs b/compare/src/lib.rs index b85b1c167..ddda2fea2 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -211,20 +211,20 @@ mod xxhash3_64 { use super::*; proptest! { - // #[test] - // fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { - // oneshot_same_as_one_chunk_impl(seed, &data)?; - // } + #[test] + fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } - // #[test] - // fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - // oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; - // } + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } - // #[test] - // fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { - // oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; - // } + #[test] + fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } #[test] fn oneshot(seed: u64, data: Vec) { @@ -241,46 +241,46 @@ mod xxhash3_64 { oneshot_with_secret_impl(&secret, &data)?; } - // #[test] - // fn streaming_one_chunk(seed: u64, data: Vec) { - // streaming_one_chunk_impl(seed, &data)?; - // } + #[test] + fn streaming_one_chunk(seed: u64, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } - // #[test] - // fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - // streaming_one_chunk_impl(seed, &data[offset..])?; - // } + #[test] + fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } } - // fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - // let oneshot = rust::XxHash64::oneshot(seed, data); - // let one_chunk = { - // let mut hasher = rust::XxHash64::with_seed(seed); - // hasher.write(data); - // hasher.finish() - // }; - - // prop_assert_eq!(oneshot, one_chunk); - // Ok(()) - // } - - // fn oneshot_same_as_many_chunks_impl( - // seed: u64, - // data: &[u8], - // chunks: &[Vec], - // ) -> TestCaseResult { - // let oneshot = rust::XxHash64::oneshot(seed, data); - // let many_chunks = { - // let mut hasher = rust::XxHash64::with_seed(seed); - // for chunk in chunks { - // hasher.write(chunk); - // } - // hasher.finish() - // }; - - // prop_assert_eq!(oneshot, many_chunks); - // Ok(()) - // } + fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash3_64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(oneshot, one_chunk); + Ok(()) + } + + fn oneshot_same_as_many_chunks_impl( + seed: u64, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash3_64::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish() + }; + + prop_assert_eq!(oneshot, many_chunks); + Ok(()) + } fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { let native = c::XxHash3_64::oneshot_with_seed(seed, data); @@ -298,22 +298,22 @@ mod xxhash3_64 { Ok(()) } - // fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - // let native = { - // let mut hasher = c::XxHash64::with_seed(seed); - // hasher.write(data); - // hasher.finish() - // }; - - // let rust = { - // let mut hasher = rust::XxHash64::with_seed(seed); - // hasher.write(data); - // hasher.finish() - // }; - - // prop_assert_eq!(native, rust); - // Ok(()) - // } + fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash3_64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash3_64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } } fn vec_and_index() -> impl Strategy, usize)> { diff --git a/src/lib.rs b/src/lib.rs index 2a6b24eb7..ad243166d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,7 +69,10 @@ #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] -#[cfg(any(doc, test))] +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(any(feature = "std", doc, test))] extern crate std; #[cfg(feature = "xxhash32")] diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 3095b4627..c0e45499a 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,6 +1,6 @@ #![allow(missing_docs)] -use core::{mem, slice}; +use core::{hash, mem, slice}; use crate::{IntoU128, IntoU32, IntoU64}; @@ -32,9 +32,15 @@ const DEFAULT_SECRET: [u8; 192] = [ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]; +const DEFAULT_BUFFER_LEN: usize = 1024; + pub const SECRET_MINIMUM_LENGTH: usize = 136; -pub struct XxHash3_64; +pub struct XxHash3_64 { + #[cfg(feature = "alloc")] + inner: with_alloc::AllocRawHasher, + _private: (), +} impl XxHash3_64 { #[inline(never)] @@ -44,13 +50,15 @@ impl XxHash3_64 { #[inline(never)] pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 { - let secret = if seed != 0 && input.len() > 240 { - &derive_secret(seed) - } else { - &DEFAULT_SECRET - }; + let mut secret = DEFAULT_SECRET; - impl_oneshot(secret, seed, input) + // We know that the secret will only be used if we have more + // than 240 bytes, so don't waste time computing it otherwise. + if input.len() > 240 { + derive_secret(seed, &mut secret); + } + + impl_oneshot(&secret, seed, input) } #[inline(never)] @@ -60,10 +68,354 @@ impl XxHash3_64 { } } +/// Holds secret and temporary buffers that are ensured to be +/// appropriately sized. +pub struct SecretBuffer { + seed: u64, + secret: S, + buffer: B, +} + +impl SecretBuffer +where + S: AsRef<[u8]>, + B: AsRef<[u8]> + AsMut<[u8]>, +{ + /// Takes the seed, secret, and buffer and performs no + /// modifications to them, only validating that the sizes are + /// appropriate. + pub fn new(seed: u64, secret: S, buffer: B) -> Result { + let this = Self { + seed, + secret, + buffer, + }; + + if this.is_valid() { + Ok(this) + } else { + Err(this.decompose()) + } + } + + fn is_valid(&self) -> bool { + let secret = self.secret.as_ref(); + + assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: return result + + let required_buffer_len = block_size(secret); + let buffer_len = self.buffer.as_ref().len(); + + required_buffer_len == buffer_len + } + + /// Returns the secret and buffer values. + pub fn decompose(self) -> (S, B) { + (self.secret, self.buffer) + } +} + +impl SecretBuffer<&'static [u8; 192], [u8; 1024]> { + /// Use the default seed and secret values while allocating nothing. + /// + /// Note that this type may take up a surprising amount of stack space. + #[inline] + pub const fn default() -> Self { + SecretBuffer { + seed: DEFAULT_SEED, + secret: &DEFAULT_SECRET, + buffer: [0; DEFAULT_BUFFER_LEN], + } + } +} + +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +mod with_alloc { + use ::alloc::{boxed::Box, vec}; + + use super::*; + + impl XxHash3_64 { + pub fn new() -> Self { + Self { + inner: RawHasher::allocate_default(), + _private: (), + } + } + + pub fn with_seed(seed: u64) -> Self { + Self { + inner: RawHasher::allocate_with_seed(seed), + _private: (), + } + } + + pub fn with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { + Self { + inner: RawHasher::allocate_with_seed_and_secret(seed, secret), + _private: (), + } + } + } + + impl Default for XxHash3_64 { + fn default() -> Self { + Self::new() + } + } + + impl hash::Hasher for XxHash3_64 { + #[inline] + fn write(&mut self, input: &[u8]) { + self.inner.write(input) + } + + #[inline] + fn finish(&self) -> u64 { + self.inner.finish() + } + } + + type AllocSecretBuffer = SecretBuffer, Box<[u8]>>; + + impl AllocSecretBuffer { + /// Allocates the secret and temporary buffers and fills them + /// with the default seed and secret values. + pub fn allocate_default() -> Self { + Self { + seed: DEFAULT_SEED, + secret: DEFAULT_SECRET.to_vec().into(), + buffer: vec![0; DEFAULT_BUFFER_LEN].into(), + } + } + + /// Allocates the secret and temporary buffers and uses the + /// provided seed to construct the secret value. + pub fn allocate_with_seed(seed: u64) -> Self { + let mut secret = DEFAULT_SECRET; + derive_secret(seed, &mut secret); + + Self { + seed, + secret: secret.to_vec().into(), + buffer: vec![0; DEFAULT_BUFFER_LEN].into(), + } + } + + /// Allocates the temporary buffer and uses the provided seed + /// and secret buffer. + pub fn allocate_with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { + let secret = secret.into(); + assert!(secret.len() > SECRET_MINIMUM_LENGTH); // todo result + let block_size = block_size(&secret); + + Self { + seed, + secret, + buffer: vec![0; block_size].into(), + } + } + } + + pub type AllocRawHasher = RawHasher, Box<[u8]>>; + + impl AllocRawHasher { + fn allocate_default() -> Self { + Self::new(SecretBuffer::allocate_default()) + } + + fn allocate_with_seed(seed: u64) -> Self { + Self::new(SecretBuffer::allocate_with_seed(seed)) + } + + fn allocate_with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { + Self::new(SecretBuffer::allocate_with_seed_and_secret(seed, secret)) + } + } +} + +impl SecretBuffer +where + S: AsRef<[u8]> + AsMut<[u8]>, + B: AsRef<[u8]> + AsMut<[u8]>, +{ + /// Fills the secret buffer with a secret derived from the seed + /// and the default secret. + pub fn with_seed(seed: u64, mut secret: S, buffer: B) -> Result { + let secret_slice: &mut [u8; 192] = match secret.as_mut().try_into() { + Ok(s) => s, + Err(_) => return Err((secret, buffer)), + }; + + *secret_slice = DEFAULT_SECRET; + derive_secret(seed, secret_slice); + + Self::new(seed, secret, buffer) + } +} + +/// A lower-level interface for computing a hash from streaming data. +/// +/// The algorithm requires two reasonably large pieces of data: the +/// secret and a temporary buffer. [`XxHash3_64`][] makes one concrete +/// implementation decision that uses dynamic memory allocation, but +/// specialized usages may desire more flexibility. This type, +/// combined with [`SecretBuffer`][], offer that flexibility at the +/// cost of a generic type. +pub struct RawHasher { + secret_buffer: SecretBuffer, + buffer_len: usize, + accumulator: [u64; 8], + total_bytes: usize, +} + +impl RawHasher { + pub fn new(secret_buffer: SecretBuffer) -> Self { + Self { + secret_buffer, + buffer_len: 0, + accumulator: INITIAL_ACCUMULATORS, + total_bytes: 0, + } + } +} + +impl hash::Hasher for RawHasher +where + S: AsRef<[u8]>, + B: AsRef<[u8]> + AsMut<[u8]>, +{ + #[inline] + fn write(&mut self, mut input: &[u8]) { + if input.is_empty() { + return; + } + + let Self { + secret_buffer, + buffer_len, + accumulator, + total_bytes, + } = self; + let SecretBuffer { + seed: _, + secret, + buffer, + } = secret_buffer; + let secret = secret.as_ref(); + let buffer = buffer.as_mut(); + let input_len = input.len(); + + // Short-circuit if the buffer is empty and we have one or + // more full buffers-worth on the input. + if buffer.is_empty() { + let (blocks, remainder) = unsafe { chunks_and_last(input, buffer.len()) }; + detect::rounds(accumulator, blocks, secret); + input = remainder; + } + + while !input.is_empty() { + let remaining = &mut buffer[*buffer_len..]; + let n_to_copy = usize::min(remaining.len(), input.len()); + + let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); + let (input_head, input_tail) = input.split_at(n_to_copy); + + remaining_head.copy_from_slice(input_head); + *buffer_len += n_to_copy; + + // We have not filled the whole buffer, no need to + // process it now + if !remaining_tail.is_empty() { + break; + } + + // We filled the buffer, but we don't know we have + // more data so we have to leave it in case it is the + // last full block. + if input_tail.is_empty() { + break; + } + + // We have a full buffer *and* we know there's more + // data after the buffer, so we can process this as a + // full block. + detect::rounds(accumulator, [&*buffer], secret); + *buffer_len = 0; + + input = input_tail; + } + + *total_bytes += input_len; + } + + #[inline] + fn finish(&self) -> u64 { + let Self { + ref secret_buffer, + buffer_len, + accumulator, + total_bytes, + } = *self; + let SecretBuffer { + seed, + ref secret, + ref buffer, + } = *secret_buffer; + + let secret = secret.as_ref(); + let buffer = buffer.as_ref(); + + let input = &buffer[..buffer_len]; + + match total_bytes { + 241.. => { + let mut temp = [0; 64]; + + let last_stripe = match input.last_chunk() { + Some(chunk) => chunk, + None => { + let n_to_reuse = 64 - input.len(); + let to_reuse = buffer.len() - n_to_reuse; + + let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); + temp_head.copy_from_slice(&buffer[to_reuse..]); + temp_tail.copy_from_slice(input); + + &temp + } + }; + + detect::finalize(accumulator, input, last_stripe, secret, total_bytes) + } + + 129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input), + + 17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input), + + 9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input), + + 4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input), + + 1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input), + + 0 => impl_0_bytes(&DEFAULT_SECRET, seed), + } + } +} + +/// # Correctness +/// +/// This function assumes that the incoming buffer has been populated +/// with the default secret. #[inline] -fn derive_secret(seed: u64) -> [u8; 192] { - let mut derived_secret = DEFAULT_SECRET; - let base = derived_secret.as_mut_ptr().cast::(); +fn derive_secret(seed: u64, secret: &mut [u8; 192]) { + if seed == DEFAULT_SEED { + return; + } + + let base = secret.as_mut_ptr().cast::(); for i in 0..12 { let a_p = unsafe { base.add(i * 2) }; @@ -78,8 +430,6 @@ fn derive_secret(seed: u64) -> [u8; 192] { unsafe { a_p.write_unaligned(a) }; unsafe { b_p.write_unaligned(b) }; } - - derived_secret } #[inline(always)] @@ -286,6 +636,11 @@ fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { detect::oneshot(secret, input) } +fn block_size(secret: &[u8]) -> usize { + let stripes_per_block = (secret.len() - 64) / 8; + 64 * stripes_per_block +} + struct Algorithm(V); impl Algorithm { @@ -296,42 +651,37 @@ impl Algorithm { assert!(secret.len() >= SECRET_MINIMUM_LENGTH); assert!(input.len() >= 241); - let stripes_per_block = (secret.len() - 64) / 8; - let block_size = 64 * stripes_per_block; + let block_size = block_size(secret); - let mut blocks = input.chunks_exact(block_size); - let last_block = if blocks.remainder().is_empty() { - // SAFETY: We know that `input` is non-empty, which means - // that either there will be a remainder or one or more - // full blocks. That info isn't flowing to the optimizer, - // so we use `unwrap_unchecked`. - unsafe { blocks.next_back().unwrap_unchecked() } - } else { - blocks.remainder() - }; + let (blocks, last_block) = unsafe { chunks_and_last(input, block_size) }; + + self.rounds(&mut acc, blocks, secret); + + let len = input.len(); let last_stripe: &[u8; 64] = unsafe { &*input .as_ptr() - .add(input.len()) + .add(len) .sub(mem::size_of::<[u8; 64]>()) .cast() }; + self.finalize(acc, last_block, last_stripe, secret, len) + } + + #[inline] + fn rounds<'a>( + &self, + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { for block in blocks { let (stripes, _) = block.bp_as_chunks(); - self.round(&mut acc, stripes, secret); + self.round(acc, stripes, secret); } - - self.last_round(&mut acc, last_block, last_stripe, secret); - - self.final_merge( - &mut acc, - input.len().into_u64().wrapping_mul(PRIME64_1), - secret, - 11, - ) } #[inline] @@ -350,6 +700,20 @@ impl Algorithm { } } + #[inline] + fn finalize( + &self, + mut acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + self.last_round(&mut acc, last_block, last_stripe, secret); + + self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret, 11) + } + #[inline] fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { // Accumulation steps are run for the stripes in the last block, @@ -399,6 +763,26 @@ impl Algorithm { } } +/// # Safety +/// `input` must be non-empty. +unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExact<'_, u8>, &[u8]) { + debug_assert!(!input.is_empty()); + + let mut blocks = input.chunks_exact(block_size); + + let last_block = if blocks.remainder().is_empty() { + // SAFETY: We know that `input` is non-empty, which means + // that either there will be a remainder or one or more + // full blocks. That info isn't flowing to the optimizer, + // so we use `unwrap_unchecked`. + unsafe { blocks.next_back().unwrap_unchecked() } + } else { + blocks.remainder() + }; + + (blocks, last_block) +} + trait Vector { fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]); @@ -415,6 +799,26 @@ mod scalar { super::Algorithm(Impl).oneshot(secret, input) } + #[inline] + pub fn rounds<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + super::Algorithm(Impl).rounds(acc, blocks, secret) + } + + #[inline] + pub fn finalize( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + super::Algorithm(Impl).finalize(acc, last_block, last_stripe, secret, len) + } + use super::{SliceBackport as _, Vector, PRIME32_1}; pub struct Impl; @@ -498,6 +902,32 @@ mod neon { super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } + /// # Safety + /// You must ensure that the CPU has the NEON feature + #[inline] + #[target_feature(enable = "neon")] + pub unsafe fn rounds_unchecked<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret) + } + + /// # Safety + /// You must ensure that the CPU has the NEON feature + #[inline] + #[target_feature(enable = "neon")] + pub unsafe fn finalize_unchecked( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len) + } + struct Impl(()); impl Impl { @@ -668,13 +1098,40 @@ mod neon { #[cfg(all(target_arch = "aarch64", feature = "std"))] mod aarch64_detect { + macro_rules! pick { + ($f:ident, $s:ident, $($t:tt)+) => { + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { super::neon::$f $($t)+ }; + } + + super::scalar::$s $($t)+ + + }; + } + #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - if std::arch::is_aarch64_feature_detected!("neon") { - return unsafe { super::neon::oneshot_unchecked(secret, input) }; - } + pick! { oneshot_unchecked, oneshot, (secret, input) } + } - super::scalar::oneshot(secret, input) + #[inline] + pub fn rounds<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + pick! { rounds_unchecked, rounds, (acc, blocks, secret) } + } + + #[inline] + pub fn finalize( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) } } } @@ -845,16 +1302,49 @@ mod x86_64_detect { } mod detect { + macro_rules! pick { + ($e:expr) => { + #[cfg(all(target_arch = "aarch64", feature = "std"))] + { + use super::aarch64_detect::*; + return $e; + } + + #[cfg(all(target_arch = "x86_64", feature = "std"))] + { + use super::x86_64_detect::*; + return $e; + } + + use super::scalar::*; + #[allow(unreachable_code)] + $e + }; + } + #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - #[cfg(all(target_arch = "aarch64", feature = "std"))] - return super::aarch64_detect::oneshot(secret, input); + pick! { oneshot(secret, input) } + } - #[cfg(all(target_arch = "x86_64", feature = "std"))] - return super::x86_64_detect::oneshot(secret, input); + #[inline] + pub fn rounds<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + pick! { rounds(acc, blocks, secret) } + } - #[allow(unreachable_code)] - super::scalar::oneshot(secret, input) + #[inline] + pub fn finalize( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + pick! { finalize(acc, last_block, last_stripe, secret, len) } } } @@ -949,10 +1439,31 @@ impl SliceBackport for [T] { #[cfg(test)] mod test { - use std::array; + use std::{array, hash::Hasher}; use super::*; + #[test] + fn secret_buffer_default_is_valid() { + assert!(SecretBuffer::default().is_valid()); + } + + #[test] + fn secret_buffer_allocate_default_is_valid() { + assert!(SecretBuffer::allocate_default().is_valid()) + } + + #[test] + fn secret_buffer_allocate_with_seed_is_valid() { + assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid()) + } + + #[test] + fn secret_buffer_allocate_with_seed_and_secret_is_valid() { + let secret = [42; 1024]; + assert!(SecretBuffer::allocate_with_seed_and_secret(0xdead_beef, secret).is_valid()) + } + macro_rules! bytes { ($($n: literal),* $(,)?) => { &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]] @@ -965,14 +1476,46 @@ mod test { array::from_fn(|i| (i % 251) as u8) } + fn hash_byte_by_byte(input: &[u8]) -> u64 { + let mut hasher = XxHash3_64::new(); + for byte in input.chunks(1) { + hasher.write(byte) + } + hasher.finish() + } + + fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 { + let mut hasher = XxHash3_64::with_seed(seed); + for byte in input.chunks(1) { + hasher.write(byte) + } + hasher.finish() + } + #[test] - fn hash_empty() { + fn oneshot_empty() { let hash = XxHash3_64::oneshot(&[]); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } #[test] - fn hash_1_to_3_bytes() { + fn streaming_empty() { + let hash = hash_byte_by_byte(&[]); + assert_eq!(hash, 0x2d06_8005_38d3_94c2); + } + + #[test] + fn oneshot_1_to_3_bytes() { + test_1_to_3_bytes(XxHash3_64::oneshot) + } + + #[test] + fn streaming_1_to_3_bytes() { + test_1_to_3_bytes(hash_byte_by_byte) + } + + #[track_caller] + fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![1, 2, 3]; let expected = [ @@ -982,13 +1525,23 @@ mod test { ]; for (input, expected) in inputs.iter().zip(expected) { - let hash = XxHash3_64::oneshot(input); + let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] - fn hash_4_to_8_bytes() { + fn oneshot_4_to_8_bytes() { + test_4_to_8_bytes(XxHash3_64::oneshot) + } + + #[test] + fn streaming_4_to_8_bytes() { + test_4_to_8_bytes(hash_byte_by_byte) + } + + #[track_caller] + fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![4, 5, 6, 7, 8]; let expected = [ @@ -1000,13 +1553,23 @@ mod test { ]; for (input, expected) in inputs.iter().zip(expected) { - let hash = XxHash3_64::oneshot(input); + let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] - fn hash_9_to_16_bytes() { + fn oneshot_9_to_16_bytes() { + test_9_to_16_bytes(XxHash3_64::oneshot) + } + + #[test] + fn streaming_9_to_16_bytes() { + test_9_to_16_bytes(hash_byte_by_byte) + } + + #[track_caller] + fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16]; let expected = [ @@ -1021,13 +1584,23 @@ mod test { ]; for (input, expected) in inputs.iter().zip(expected) { - let hash = XxHash3_64::oneshot(input); + let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] - fn hash_17_to_128_bytes() { + fn oneshot_17_to_128_bytes() { + test_17_to_128_bytes(XxHash3_64::oneshot) + } + + #[test] + fn streaming_17_to_128_bytes() { + test_17_to_128_bytes(hash_byte_by_byte) + } + + #[track_caller] + fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) { let lower_boundary = bytes![17, 18, 19]; let chunk_boundary = bytes![31, 32, 33]; let upper_boundary = bytes![126, 127, 128]; @@ -1053,13 +1626,23 @@ mod test { ]; for (input, expected) in inputs.zip(expected) { - let hash = XxHash3_64::oneshot(input); + let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] - fn hash_129_to_240_bytes() { + fn oneshot_129_to_240_bytes() { + test_129_to_240_bytes(XxHash3_64::oneshot) + } + + #[test] + fn streaming_129_to_240_bytes() { + test_129_to_240_bytes(hash_byte_by_byte) + } + + #[track_caller] + fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) { let lower_boundary = bytes![129, 130, 131]; let upper_boundary = bytes![238, 239, 240]; @@ -1077,13 +1660,23 @@ mod test { ]; for (input, expected) in inputs.zip(expected) { - let hash = XxHash3_64::oneshot(input); + let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] - fn hash_241_plus_bytes() { + fn oneshot_241_plus_bytes() { + test_241_plus_bytes(XxHash3_64::oneshot) + } + + #[test] + fn streaming_241_plus_bytes() { + test_241_plus_bytes(hash_byte_by_byte) + } + + #[track_caller] + fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![241, 242, 243, 244, 1024, 10240]; let expected = [ @@ -1096,13 +1689,23 @@ mod test { ]; for (input, expected) in inputs.iter().zip(expected) { - let hash = XxHash3_64::oneshot(input); + let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] - fn hash_with_seed() { + fn oneshot_with_seed() { + test_with_seed(XxHash3_64::oneshot_with_seed) + } + + #[test] + fn streaming_with_seed() { + test_with_seed(hash_byte_by_byte_with_seed) + } + + #[track_caller] + fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) { let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024]; let expected = [ @@ -1117,7 +1720,7 @@ mod test { ]; for (input, expected) in inputs.iter().zip(expected) { - let hash = XxHash3_64::oneshot_with_seed(0xdead_cafe, input); + let hash = f(0xdead_cafe, input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 14d132aa5..18b4365fb 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -143,6 +143,7 @@ extern "C" { fn XXH3_createState() -> *mut XXH3_state_t; fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn XXH3_64bits_reset_withSeed(state: *mut XXH3_state_t, seed: XXH64_hash_t) -> XXH_errorcode; fn XXH3_64bits_update( state: *mut XXH3_state_t, buffer: *const libc::c_void, @@ -174,7 +175,7 @@ impl XxHash3_64 { } } - pub fn with_seed() -> Self { + pub fn new() -> Self { let state = unsafe { let state = XXH3_createState(); XXH3_64bits_reset(state); @@ -184,6 +185,16 @@ impl XxHash3_64 { Self(state) } + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = XXH3_createState(); + XXH3_64bits_reset_withSeed(state, seed); + state + }; + + Self(state) + } + pub fn write(&mut self, data: &[u8]) { let retval = unsafe { XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; assert_eq!(retval, XXH_OK); @@ -222,6 +233,10 @@ pub mod scalar { fn scalar_XXH3_createState() -> *mut XXH3_state_t; fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn scalar_XXH3_64bits_reset_withSeed( + state: *mut XXH3_state_t, + seed: XXH64_hash_t, + ) -> XXH_errorcode; fn scalar_XXH3_64bits_update( state: *mut XXH3_state_t, buffer: *const libc::c_void, @@ -253,7 +268,7 @@ pub mod scalar { } } - pub fn with_seed() -> Self { + pub fn new() -> Self { let state = unsafe { let state = scalar_XXH3_createState(); scalar_XXH3_64bits_reset(state); @@ -263,6 +278,16 @@ pub mod scalar { Self(state) } + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = scalar_XXH3_createState(); + scalar_XXH3_64bits_reset_withSeed(state, seed); + state + }; + + Self(state) + } + pub fn write(&mut self, data: &[u8]) { let retval = unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; @@ -304,6 +329,10 @@ pub mod neon { fn neon_XXH3_createState() -> *mut XXH3_state_t; fn neon_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn neon_XXH3_64bits_reset_withSeed( + state: *mut XXH3_state_t, + seed: XXH64_hash_t, + ) -> XXH_errorcode; fn neon_XXH3_64bits_update( state: *mut XXH3_state_t, buffer: *const libc::c_void, @@ -335,7 +364,7 @@ pub mod neon { } } - pub fn with_seed() -> Self { + pub fn new() -> Self { let state = unsafe { let state = neon_XXH3_createState(); neon_XXH3_64bits_reset(state); @@ -345,6 +374,16 @@ pub mod neon { Self(state) } + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = neon_XXH3_createState(); + neon_XXH3_64bits_reset_withSeed(state, seed); + state + }; + + Self(state) + } + pub fn write(&mut self, data: &[u8]) { let retval = unsafe { neon_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; @@ -417,7 +456,7 @@ pub mod avx2 { } } - pub fn with_seed() -> Self { + pub fn new() -> Self { let state = unsafe { let state = avx2_XXH3_createState(); avx2_XXH3_64bits_reset(state); @@ -497,7 +536,7 @@ pub mod sse2 { } } - pub fn with_seed() -> Self { + pub fn new() -> Self { let state = unsafe { let state = sse2_XXH3_createState(); sse2_XXH3_64bits_reset(state); From 2485882a08de8ca8b97585dbbc0ab43d67cc2927 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 13 Aug 2024 15:34:26 -0400 Subject: [PATCH 101/166] Testing on x86 --- asmasm/src/main.rs | 21 ++++++-- compare/benches/benchmark.rs | 27 ++++++++++ src/xxhash3_64.rs | 97 ++++++++++++++++++++++++++++++++---- xx_hash-sys/src/lib.rs | 28 +++++++++++ 4 files changed, 161 insertions(+), 12 deletions(-) diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs index e515bcdfc..706702566 100644 --- a/asmasm/src/main.rs +++ b/asmasm/src/main.rs @@ -1,4 +1,4 @@ -use std::{hint::black_box, time::Instant}; +use std::{hash::Hasher, hint::black_box, time::Instant}; use xx_hash_sys::XxHash3_64 as C; use xx_renu::xxhash3_64::XxHash3_64; @@ -8,18 +8,33 @@ fn main() { .nth(2) .map_or(false, |a| a.eq_ignore_ascii_case("C")); let file = std::fs::read(filename).expect("read"); + let seed = 0xdead_beef; if use_c { let start = Instant::now(); - let hash = C::oneshot(&file); + let hash = do_c(seed, &file); let elapsed = start.elapsed(); black_box(hash); eprintln!("C {elapsed:?}"); } else { let start = Instant::now(); - let hash = XxHash3_64::oneshot(&file); + let hash = do_rust(seed, &file); let elapsed = start.elapsed(); black_box(hash); eprintln!("Rust {elapsed:?}"); } } + +#[inline(never)] +fn do_c(seed: u64, file: &[u8]) -> u64 { + let mut hasher = C::with_seed(seed); + hasher.write(file); + hasher.finish() +} + +#[inline(never)] +fn do_rust(seed: u64, file: &[u8]) -> u64 { + let mut hasher = XxHash3_64::with_seed(seed); + hasher.write(&file); + hasher.finish() +} diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index b009f0e72..52000e427 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -298,6 +298,33 @@ mod xxhash3_64 { }); } + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::avx2::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + + let id = format!("impl-c-sse2/size-{size:07}/chunks-{n_chunks:02}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::sse2::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + let hash = hasher.finish(); + black_box(hash); + }) + }); + } + let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}"); g.bench_function(id, |b| { b.iter(|| { diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index c0e45499a..d47c62ca5 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -286,7 +286,7 @@ where S: AsRef<[u8]>, B: AsRef<[u8]> + AsMut<[u8]>, { - #[inline] + #[inline(never)] fn write(&mut self, mut input: &[u8]) { if input.is_empty() { return; @@ -350,7 +350,7 @@ where *total_bytes += input_len; } - #[inline] + #[inline(never)] fn finish(&self) -> u64 { let Self { ref secret_buffer, @@ -765,6 +765,7 @@ impl Algorithm { /// # Safety /// `input` must be non-empty. +#[inline] unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExact<'_, u8>, &[u8]) { debug_assert!(!input.is_empty()); @@ -1149,6 +1150,32 @@ mod avx2 { super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } + /// # Safety + /// You must ensure that the CPU has the AVX2 feature + #[inline] + #[target_feature(enable = "avx2")] + pub unsafe fn rounds_unchecked<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret) + } + + /// # Safety + /// You must ensure that the CPU has the AVX2 feature + #[inline] + #[target_feature(enable = "avx2")] + pub unsafe fn finalize_unchecked( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len) + } + pub struct Impl(super::scalar::Impl); impl Impl { @@ -1224,6 +1251,32 @@ mod sse2 { super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } + /// # Safety + /// You must ensure that the CPU has the SSE2 feature + #[inline] + #[target_feature(enable = "sse2")] + pub unsafe fn rounds_unchecked<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret) + } + + /// # Safety + /// You must ensure that the CPU has the SSE2 feature + #[inline] + #[target_feature(enable = "sse2")] + pub unsafe fn finalize_unchecked( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len) + } + pub struct Impl(super::scalar::Impl); impl Impl { @@ -1287,17 +1340,43 @@ mod sse2 { #[cfg(all(target_arch = "x86_64", feature = "std"))] mod x86_64_detect { + macro_rules! pick { + ($f:ident, $s:ident, $($t:tt)+) => { + if std::arch::is_x86_feature_detected!("avx2") { + return unsafe { super::avx2::$f $($t)+ }; + } + + if std::arch::is_x86_feature_detected!("sse2") { + return unsafe { super::sse2::$f $($t)+ }; + } + + super::scalar::$s $($t)+ + }; + } + #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - if std::arch::is_x86_feature_detected!("avx2") { - return unsafe { super::avx2::oneshot_unchecked(secret, input) }; - } + pick! { oneshot_unchecked, oneshot, (secret, input) } + } - if std::arch::is_x86_feature_detected!("sse2") { - return unsafe { super::sse2::oneshot_unchecked(secret, input) }; - } + #[inline] + pub fn rounds<'a>( + acc: &mut [u64; 8], + blocks: impl IntoIterator, + secret: &[u8], + ) { + pick! { rounds_unchecked, rounds, (acc, blocks, secret) } + } - super::scalar::oneshot(secret, input) + #[inline] + pub fn finalize( + acc: [u64; 8], + last_block: &[u8], + last_stripe: &[u8; 64], + secret: &[u8], + len: usize, + ) -> u64 { + pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) } } } diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 18b4365fb..d2aae50a8 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -425,6 +425,10 @@ pub mod avx2 { fn avx2_XXH3_createState() -> *mut XXH3_state_t; fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn avx2_XXH3_64bits_reset_withSeed( + state: *mut XXH3_state_t, + seed: XXH64_hash_t, + ) -> XXH_errorcode; fn avx2_XXH3_64bits_update( state: *mut XXH3_state_t, buffer: *const libc::c_void, @@ -466,6 +470,16 @@ pub mod avx2 { Self(state) } + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = avx2_XXH3_createState(); + avx2_XXH3_64bits_reset_withSeed(state, seed); + state + }; + + Self(state) + } + pub fn write(&mut self, data: &[u8]) { let retval = unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; @@ -505,6 +519,10 @@ pub mod sse2 { fn sse2_XXH3_createState() -> *mut XXH3_state_t; fn sse2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; + fn sse2_XXH3_64bits_reset_withSeed( + state: *mut XXH3_state_t, + seed: XXH64_hash_t, + ) -> XXH_errorcode; fn sse2_XXH3_64bits_update( state: *mut XXH3_state_t, buffer: *const libc::c_void, @@ -546,6 +564,16 @@ pub mod sse2 { Self(state) } + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = sse2_XXH3_createState(); + sse2_XXH3_64bits_reset_withSeed(state, seed); + state + }; + + Self(state) + } + pub fn write(&mut self, data: &[u8]) { let retval = unsafe { sse2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; From 79d3d996d062f052bda805ab97826178e2ff2aa5 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 13 Aug 2024 19:02:08 -0400 Subject: [PATCH 102/166] Use paste to reduce duplication of C wrappers --- xx_hash-sys/Cargo.toml | 1 + xx_hash-sys/src/lib.rs | 548 ++++++++--------------------------------- 2 files changed, 104 insertions(+), 445 deletions(-) diff --git a/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml index dd96308c0..4218caa0b 100644 --- a/xx_hash-sys/Cargo.toml +++ b/xx_hash-sys/Cargo.toml @@ -6,6 +6,7 @@ publish = false [dependencies] libc = { version = "0.2.155", default-features = false } +paste = { version = "1.0.15", default-features = false } [build-dependencies] cc = { version = "1.1.6", default-features = false } diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index d2aae50a8..0de80c556 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -127,468 +127,126 @@ pub struct XXH3_state_t { _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } -extern "C" { - fn XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; - - fn XXH3_createState() -> *mut XXH3_state_t; - fn XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn XXH3_64bits_reset_withSeed(state: *mut XXH3_state_t, seed: XXH64_hash_t) -> XXH_errorcode; - fn XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; -} - -pub struct XxHash3_64(*mut XXH3_state_t); - -impl XxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { XXH3_64bits(data.as_ptr().cast(), data.len()) } - } +/// Constructs a wrapper around the XXH3_64bit familiy of functions as +/// we compile the library in multiple modes to performance test +/// against. +macro_rules! xxh3_64b_template { + () => { crate::xxh3_64b_template!(@ XXH3); }; + + ($prefix: ident) => { ::paste::paste! { crate::xxh3_64b_template!(@ [< $prefix _XXH3 >]); } }; + + (@ $prefix: ident) => { + ::paste::paste! { + extern "C" { + fn [<$prefix _64bits>](input: *const libc::c_void, length: libc::size_t) -> crate::XXH64_hash_t; + fn [<$prefix _64bits_withSeed>]( + input: *const libc::c_void, + length: libc::size_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH64_hash_t; + fn [<$prefix _64bits_withSecret>]( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + ) -> crate::XXH64_hash_t; + + fn [<$prefix _createState>]() -> *mut crate::XXH3_state_t; + fn [<$prefix _64bits_reset>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; + fn [<$prefix _64bits_reset_withSeed>]( + state: *mut crate::XXH3_state_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH_errorcode; + fn [<$prefix _64bits_update>]( + state: *mut crate::XXH3_state_t, + buffer: *const libc::c_void, + length: libc::size_t, + ) -> crate::XXH_errorcode; + fn [<$prefix _64bits_digest>](state: *mut crate::XXH3_state_t) -> crate::XXH64_hash_t; + fn [<$prefix _freeState>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; + } - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } - } + pub struct XxHash3_64(*mut crate::XXH3_state_t); + + impl XxHash3_64 { + pub fn oneshot(data: &[u8]) -> u64 { + unsafe { [<$prefix _64bits>](data.as_ptr().cast(), data.len()) } + } + + pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { + unsafe { [<$prefix _64bits_withSeed>](data.as_ptr().cast(), data.len(), seed) } + } + + pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { + unsafe { + [<$prefix _64bits_withSecret>]( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + ) + } + } + + pub fn new() -> Self { + let state = unsafe { + let state = [<$prefix _createState>](); + [<$prefix _64bits_reset>](state); + state + }; + + Self(state) + } + + pub fn with_seed(seed: u64) -> Self { + let state = unsafe { + let state = [<$prefix _createState>](); + [<$prefix _64bits_reset_withSeed>](state, seed); + state + }; + + Self(state) + } + + pub fn write(&mut self, data: &[u8]) { + let retval = + unsafe { [<$prefix _64bits_update>](self.0, data.as_ptr().cast(), data.len()) }; + assert_eq!(retval, crate::XXH_OK); + } + + pub fn finish(&mut self) -> u64 { + unsafe { [<$prefix _64bits_digest>](self.0) } + } + } - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) + impl Drop for XxHash3_64 { + fn drop(&mut self) { + let retval = unsafe { [<$prefix _freeState>](self.0) }; + assert_eq!(retval, crate::XXH_OK); + } + } } - } - - pub fn new() -> Self { - let state = unsafe { - let state = XXH3_createState(); - XXH3_64bits_reset(state); - state - }; - - Self(state) - } - - pub fn with_seed(seed: u64) -> Self { - let state = unsafe { - let state = XXH3_createState(); - XXH3_64bits_reset_withSeed(state, seed); - state - }; - - Self(state) - } - - pub fn write(&mut self, data: &[u8]) { - let retval = unsafe { XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } - - pub fn finish(&mut self) -> u64 { - unsafe { XXH3_64bits_digest(self.0) } - } + }; } +pub(crate) use xxh3_64b_template; -impl Drop for XxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); - } -} - -// ---------- +xxh3_64b_template!(); pub mod scalar { - use super::*; - - extern "C" { - fn scalar_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn scalar_XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn scalar_XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; - - fn scalar_XXH3_createState() -> *mut XXH3_state_t; - fn scalar_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn scalar_XXH3_64bits_reset_withSeed( - state: *mut XXH3_state_t, - seed: XXH64_hash_t, - ) -> XXH_errorcode; - fn scalar_XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn scalar_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn scalar_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; - } - - pub struct XxHash3_64(*mut XXH3_state_t); - - impl XxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { scalar_XXH3_64bits(data.as_ptr().cast(), data.len()) } - } - - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { scalar_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } - } - - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - scalar_XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) - } - } - - pub fn new() -> Self { - let state = unsafe { - let state = scalar_XXH3_createState(); - scalar_XXH3_64bits_reset(state); - state - }; - - Self(state) - } - - pub fn with_seed(seed: u64) -> Self { - let state = unsafe { - let state = scalar_XXH3_createState(); - scalar_XXH3_64bits_reset_withSeed(state, seed); - state - }; - - Self(state) - } - - pub fn write(&mut self, data: &[u8]) { - let retval = - unsafe { scalar_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } - - pub fn finish(&mut self) -> u64 { - unsafe { scalar_XXH3_64bits_digest(self.0) } - } - } - - impl Drop for XxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { scalar_XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); - } - } + crate::xxh3_64b_template!(scalar); } -// ---------- - #[cfg(target_arch = "aarch64")] pub mod neon { - use super::*; - - extern "C" { - fn neon_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn neon_XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn neon_XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; - - fn neon_XXH3_createState() -> *mut XXH3_state_t; - fn neon_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn neon_XXH3_64bits_reset_withSeed( - state: *mut XXH3_state_t, - seed: XXH64_hash_t, - ) -> XXH_errorcode; - fn neon_XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn neon_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn neon_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; - } - - pub struct XxHash3_64(*mut XXH3_state_t); - - impl XxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { neon_XXH3_64bits(data.as_ptr().cast(), data.len()) } - } - - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { neon_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } - } - - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - neon_XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) - } - } - - pub fn new() -> Self { - let state = unsafe { - let state = neon_XXH3_createState(); - neon_XXH3_64bits_reset(state); - state - }; - - Self(state) - } - - pub fn with_seed(seed: u64) -> Self { - let state = unsafe { - let state = neon_XXH3_createState(); - neon_XXH3_64bits_reset_withSeed(state, seed); - state - }; - - Self(state) - } - - pub fn write(&mut self, data: &[u8]) { - let retval = - unsafe { neon_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } - - pub fn finish(&mut self) -> u64 { - unsafe { neon_XXH3_64bits_digest(self.0) } - } - } - - impl Drop for XxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { neon_XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); - } - } + crate::xxh3_64b_template!(neon); } -// ---------- - #[cfg(target_arch = "x86_64")] pub mod avx2 { - use super::*; - - extern "C" { - fn avx2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn avx2_XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn avx2_XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; - - fn avx2_XXH3_createState() -> *mut XXH3_state_t; - fn avx2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn avx2_XXH3_64bits_reset_withSeed( - state: *mut XXH3_state_t, - seed: XXH64_hash_t, - ) -> XXH_errorcode; - fn avx2_XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn avx2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn avx2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; - } - - pub struct XxHash3_64(*mut XXH3_state_t); - - impl XxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { avx2_XXH3_64bits(data.as_ptr().cast(), data.len()) } - } - - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { avx2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } - } - - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - avx2_XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) - } - } - - pub fn new() -> Self { - let state = unsafe { - let state = avx2_XXH3_createState(); - avx2_XXH3_64bits_reset(state); - state - }; - - Self(state) - } - - pub fn with_seed(seed: u64) -> Self { - let state = unsafe { - let state = avx2_XXH3_createState(); - avx2_XXH3_64bits_reset_withSeed(state, seed); - state - }; - - Self(state) - } - - pub fn write(&mut self, data: &[u8]) { - let retval = - unsafe { avx2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } - - pub fn finish(&mut self) -> u64 { - unsafe { avx2_XXH3_64bits_digest(self.0) } - } - } - - impl Drop for XxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { avx2_XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); - } - } + crate::xxh3_64b_template!(avx2); } #[cfg(target_arch = "x86_64")] pub mod sse2 { - use super::*; - - extern "C" { - fn sse2_XXH3_64bits(input: *const libc::c_void, length: libc::size_t) -> XXH64_hash_t; - fn sse2_XXH3_64bits_withSeed( - input: *const libc::c_void, - length: libc::size_t, - seed: XXH64_hash_t, - ) -> XXH64_hash_t; - fn sse2_XXH3_64bits_withSecret( - input: *const libc::c_void, - length: libc::size_t, - secret: *const libc::c_void, - secret_length: libc::size_t, - ) -> XXH64_hash_t; - - fn sse2_XXH3_createState() -> *mut XXH3_state_t; - fn sse2_XXH3_64bits_reset(state: *mut XXH3_state_t) -> XXH_errorcode; - fn sse2_XXH3_64bits_reset_withSeed( - state: *mut XXH3_state_t, - seed: XXH64_hash_t, - ) -> XXH_errorcode; - fn sse2_XXH3_64bits_update( - state: *mut XXH3_state_t, - buffer: *const libc::c_void, - length: libc::size_t, - ) -> XXH_errorcode; - fn sse2_XXH3_64bits_digest(state: *mut XXH3_state_t) -> XXH64_hash_t; - fn sse2_XXH3_freeState(state: *mut XXH3_state_t) -> XXH_errorcode; - } - - pub struct XxHash3_64(*mut XXH3_state_t); - - impl XxHash3_64 { - pub fn oneshot(data: &[u8]) -> u64 { - unsafe { sse2_XXH3_64bits(data.as_ptr().cast(), data.len()) } - } - - pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { - unsafe { sse2_XXH3_64bits_withSeed(data.as_ptr().cast(), data.len(), seed) } - } - - pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { - unsafe { - sse2_XXH3_64bits_withSecret( - data.as_ptr().cast(), - data.len(), - secret.as_ptr().cast(), - secret.len(), - ) - } - } - - pub fn new() -> Self { - let state = unsafe { - let state = sse2_XXH3_createState(); - sse2_XXH3_64bits_reset(state); - state - }; - - Self(state) - } - - pub fn with_seed(seed: u64) -> Self { - let state = unsafe { - let state = sse2_XXH3_createState(); - sse2_XXH3_64bits_reset_withSeed(state, seed); - state - }; - - Self(state) - } - - pub fn write(&mut self, data: &[u8]) { - let retval = - unsafe { sse2_XXH3_64bits_update(self.0, data.as_ptr().cast(), data.len()) }; - assert_eq!(retval, XXH_OK); - } - - pub fn finish(&mut self) -> u64 { - unsafe { sse2_XXH3_64bits_digest(self.0) } - } - } - - impl Drop for XxHash3_64 { - fn drop(&mut self) { - let retval = unsafe { sse2_XXH3_freeState(self.0) }; - assert_eq!(retval, XXH_OK); - } - } + crate::xxh3_64b_template!(sse2); } From 694b9460c6d222a7dc09f629a353c8442344ffcf Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 13 Aug 2024 20:15:04 -0400 Subject: [PATCH 103/166] Use forcing cfgs for streaming functions too --- src/xxhash3_64.rs | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index d47c62ca5..0718a2b7f 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -614,25 +614,6 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { - #[cfg(_internal_xxhash3_force_scalar)] - return scalar::oneshot(secret, input); - - #[cfg(_internal_xxhash3_force_neon)] - unsafe { - return neon::oneshot_unchecked(secret, input); - }; - - #[cfg(_internal_xxhash3_force_sse2)] - unsafe { - return sse2::oneshot_unchecked(secret, input); - }; - - #[cfg(_internal_xxhash3_force_avx2)] - unsafe { - return avx2::oneshot_unchecked(secret, input); - }; - - #[allow(unreachable_code)] detect::oneshot(secret, input) } @@ -1101,12 +1082,14 @@ mod neon { mod aarch64_detect { macro_rules! pick { ($f:ident, $s:ident, $($t:tt)+) => { + #[cfg(_internal_xxhash3_force_neon)] + return unsafe { super::neon::$f $($t)+ }; + if std::arch::is_aarch64_feature_detected!("neon") { return unsafe { super::neon::$f $($t)+ }; } super::scalar::$s $($t)+ - }; } @@ -1342,6 +1325,12 @@ mod sse2 { mod x86_64_detect { macro_rules! pick { ($f:ident, $s:ident, $($t:tt)+) => { + #[cfg(_internal_xxhash3_force_avx2)] + return unsafe { super::avx2::$f $($t)+ }; + + #[cfg(_internal_xxhash3_force_sse2)] + return unsafe { super::sse2::$f $($t)+ }; + if std::arch::is_x86_feature_detected!("avx2") { return unsafe { super::avx2::$f $($t)+ }; } @@ -1383,6 +1372,12 @@ mod x86_64_detect { mod detect { macro_rules! pick { ($e:expr) => { + #[cfg(_internal_xxhash3_force_scalar)] + { + use super::scalar::*; + return $e; + } + #[cfg(all(target_arch = "aarch64", feature = "std"))] { use super::aarch64_detect::*; @@ -1395,9 +1390,11 @@ mod detect { return $e; } - use super::scalar::*; #[allow(unreachable_code)] - $e + { + use super::scalar::*; + $e + } }; } From 9ef64a717634c8922905dd7371580575ce837d4e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 13 Aug 2024 21:10:42 -0400 Subject: [PATCH 104/166] Lift computing the secret end up a function call --- src/xxhash3_64.rs | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 0718a2b7f..e8335f45b 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -667,8 +667,10 @@ impl Algorithm { #[inline] fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { + let secret_end = secret.last_chunk().unwrap(); + self.round_accumulate(acc, stripes, secret); - self.0.round_scramble(acc, secret); + self.0.round_scramble(acc, secret_end); } #[inline] @@ -766,7 +768,7 @@ unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExac } trait Vector { - fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]); + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]); fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); } @@ -774,8 +776,6 @@ trait Vector { // This module is not `cfg`-gated because it is used by some of the // SIMD implementations. mod scalar { - use core::mem; - #[inline] pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { super::Algorithm(Impl).oneshot(secret, input) @@ -807,11 +807,8 @@ mod scalar { impl Vector for Impl { #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { - let last = secret - .last_chunk::<{ mem::size_of::<[u8; 64]>() }>() - .unwrap(); - let (last, _) = last.bp_as_chunks(); + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + let (last, _) = secret_end.bp_as_chunks(); let last = last.iter().copied().map(u64::from_ne_bytes); for (acc, secret) in acc.iter_mut().zip(last) { @@ -923,8 +920,8 @@ mod neon { impl Vector for Impl { #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { - unsafe { round_scramble_neon(acc, secret) } + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + unsafe { round_scramble_neon(acc, secret_end) } } #[inline] @@ -935,9 +932,9 @@ mod neon { #[inline] #[target_feature(enable = "neon")] - unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret: &[u8]) { + unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) { unsafe { - let secret_base = secret.as_ptr().add(secret.len()).sub(64).cast::(); + let secret_base = secret_end.as_ptr().cast::(); let (acc, _) = acc.bp_as_chunks_mut::<2>(); for (i, acc) in acc.iter_mut().enumerate() { let mut accv = vld1q_u64(acc.as_ptr()); @@ -1172,9 +1169,9 @@ mod avx2 { impl Vector for Impl { #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { // The scalar implementation is autovectorized nicely enough - self.0.round_scramble(acc, secret) + self.0.round_scramble(acc, secret_end) } #[inline] @@ -1273,9 +1270,9 @@ mod sse2 { impl Vector for Impl { #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret: &[u8]) { + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { // The scalar implementation is autovectorized nicely enough - self.0.round_scramble(acc, secret) + self.0.round_scramble(acc, secret_end) } #[inline] From 6e2d4408e70e872fcccdf712e7c8b9964e73ed38 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 14 Aug 2024 11:56:11 -0400 Subject: [PATCH 105/166] checkpoint rewrite smaller buffer --- src/xxhash3_64.rs | 337 +++++++++++++++++++++++++++++----------------- 1 file changed, 215 insertions(+), 122 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index e8335f45b..6756b3078 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -32,8 +32,6 @@ const DEFAULT_SECRET: [u8; 192] = [ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]; -const DEFAULT_BUFFER_LEN: usize = 1024; - pub const SECRET_MINIMUM_LENGTH: usize = 136; pub struct XxHash3_64 { @@ -68,27 +66,33 @@ impl XxHash3_64 { } } +const STRIPE_BYTES: usize = 64; +const BUFFERED_STRIPES: usize = 4; +const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; + +// Ensure that a full buffer always implies we are in the 241+ byte case. +const _: () = assert!(BUFFERED_BYTES > 240); + /// Holds secret and temporary buffers that are ensured to be /// appropriately sized. -pub struct SecretBuffer { +pub struct SecretBuffer { seed: u64, secret: S, - buffer: B, + buffer: [u8; BUFFERED_BYTES], } -impl SecretBuffer +impl SecretBuffer where S: AsRef<[u8]>, - B: AsRef<[u8]> + AsMut<[u8]>, { /// Takes the seed, secret, and buffer and performs no /// modifications to them, only validating that the sizes are /// appropriate. - pub fn new(seed: u64, secret: S, buffer: B) -> Result { + pub fn new(seed: u64, secret: S) -> Result { let this = Self { seed, secret, - buffer, + buffer: [0; BUFFERED_BYTES], }; if this.is_valid() { @@ -101,21 +105,24 @@ where fn is_valid(&self) -> bool { let secret = self.secret.as_ref(); - assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: return result + secret.len() >= SECRET_MINIMUM_LENGTH + } - let required_buffer_len = block_size(secret); - let buffer_len = self.buffer.as_ref().len(); + #[inline] + fn n_stripes(&self) -> usize { + let secret = self.secret.as_ref(); - required_buffer_len == buffer_len + // stripes_per_block + (secret.len() - 64) / 8 } /// Returns the secret and buffer values. - pub fn decompose(self) -> (S, B) { - (self.secret, self.buffer) + pub fn decompose(self) -> S { + self.secret } } -impl SecretBuffer<&'static [u8; 192], [u8; 1024]> { +impl SecretBuffer<&'static [u8; 192]> { /// Use the default seed and secret values while allocating nothing. /// /// Note that this type may take up a surprising amount of stack space. @@ -124,7 +131,7 @@ impl SecretBuffer<&'static [u8; 192], [u8; 1024]> { SecretBuffer { seed: DEFAULT_SEED, secret: &DEFAULT_SECRET, - buffer: [0; DEFAULT_BUFFER_LEN], + buffer: [0; BUFFERED_BYTES], } } } @@ -132,7 +139,7 @@ impl SecretBuffer<&'static [u8; 192], [u8; 1024]> { #[cfg(feature = "alloc")] #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] mod with_alloc { - use ::alloc::{boxed::Box, vec}; + use ::alloc::boxed::Box; use super::*; @@ -177,7 +184,7 @@ mod with_alloc { } } - type AllocSecretBuffer = SecretBuffer, Box<[u8]>>; + type AllocSecretBuffer = SecretBuffer>; impl AllocSecretBuffer { /// Allocates the secret and temporary buffers and fills them @@ -186,7 +193,7 @@ mod with_alloc { Self { seed: DEFAULT_SEED, secret: DEFAULT_SECRET.to_vec().into(), - buffer: vec![0; DEFAULT_BUFFER_LEN].into(), + buffer: [0; BUFFERED_BYTES], } } @@ -199,7 +206,7 @@ mod with_alloc { Self { seed, secret: secret.to_vec().into(), - buffer: vec![0; DEFAULT_BUFFER_LEN].into(), + buffer: [0; BUFFERED_BYTES], } } @@ -208,17 +215,16 @@ mod with_alloc { pub fn allocate_with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { let secret = secret.into(); assert!(secret.len() > SECRET_MINIMUM_LENGTH); // todo result - let block_size = block_size(&secret); Self { seed, secret, - buffer: vec![0; block_size].into(), + buffer: [0; BUFFERED_BYTES], } } } - pub type AllocRawHasher = RawHasher, Box<[u8]>>; + pub type AllocRawHasher = RawHasher>; impl AllocRawHasher { fn allocate_default() -> Self { @@ -235,56 +241,92 @@ mod with_alloc { } } -impl SecretBuffer +impl SecretBuffer where S: AsRef<[u8]> + AsMut<[u8]>, - B: AsRef<[u8]> + AsMut<[u8]>, { /// Fills the secret buffer with a secret derived from the seed /// and the default secret. - pub fn with_seed(seed: u64, mut secret: S, buffer: B) -> Result { + pub fn with_seed(seed: u64, mut secret: S) -> Result { let secret_slice: &mut [u8; 192] = match secret.as_mut().try_into() { Ok(s) => s, - Err(_) => return Err((secret, buffer)), + Err(_) => return Err(secret), }; *secret_slice = DEFAULT_SECRET; derive_secret(seed, secret_slice); - Self::new(seed, secret, buffer) + Self::new(seed, secret) + } +} + +#[derive(Copy, Clone)] +struct Grug { + // TODO FIXME + accumulator: [u64; 8], + current_stripe: usize, +} + +impl Grug { + fn new() -> Self { + Self { + accumulator: INITIAL_ACCUMULATORS, + current_stripe: 0, + } + } + + fn process_stripe(&mut self, stripe: &[u8; 64], n_stripes: usize, secret: &[u8]) { + let Self { + accumulator, + current_stripe, + .. + } = self; + + let secret_end = secret.last_chunk().unwrap(); + + // each stripe + let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() }; + detect::accumulate(accumulator, stripe, secret); + + *current_stripe += 1; + + if *current_stripe == n_stripes { + // after block's worth + detect::round_scramble(accumulator, secret_end); + *current_stripe = 0; + } } } /// A lower-level interface for computing a hash from streaming data. /// -/// The algorithm requires two reasonably large pieces of data: the -/// secret and a temporary buffer. [`XxHash3_64`][] makes one concrete -/// implementation decision that uses dynamic memory allocation, but -/// specialized usages may desire more flexibility. This type, -/// combined with [`SecretBuffer`][], offer that flexibility at the -/// cost of a generic type. -pub struct RawHasher { - secret_buffer: SecretBuffer, +/// The algorithm requires a secret which can be a reasonably large +/// piece of data. [`XxHash3_64`][] makes one concrete implementation +/// decision that uses dynamic memory allocation, but specialized +/// usages may desire more flexibility. This type, combined with +/// [`SecretBuffer`][], offer that flexibility at the cost of a +/// generic type. +pub struct RawHasher { + secret_buffer: SecretBuffer, buffer_len: usize, - accumulator: [u64; 8], + grug: Grug, total_bytes: usize, } -impl RawHasher { - pub fn new(secret_buffer: SecretBuffer) -> Self { +impl RawHasher { + pub fn new(secret_buffer: SecretBuffer) -> Self { Self { secret_buffer, buffer_len: 0, - accumulator: INITIAL_ACCUMULATORS, + grug: Grug::new(), total_bytes: 0, } } } -impl hash::Hasher for RawHasher +impl hash::Hasher for RawHasher where S: AsRef<[u8]>, - B: AsRef<[u8]> + AsMut<[u8]>, { #[inline(never)] fn write(&mut self, mut input: &[u8]) { @@ -295,27 +337,20 @@ where let Self { secret_buffer, buffer_len, - accumulator, + grug, total_bytes, + .. } = self; - let SecretBuffer { - seed: _, - secret, - buffer, - } = secret_buffer; + + let n_stripes = secret_buffer.n_stripes(); + + let SecretBuffer { secret, buffer, .. } = secret_buffer; let secret = secret.as_ref(); - let buffer = buffer.as_mut(); - let input_len = input.len(); - - // Short-circuit if the buffer is empty and we have one or - // more full buffers-worth on the input. - if buffer.is_empty() { - let (blocks, remainder) = unsafe { chunks_and_last(input, buffer.len()) }; - detect::rounds(accumulator, blocks, secret); - input = remainder; - } - while !input.is_empty() { + *total_bytes += input.len(); + + // We have some previous data saved; try to fill it up and process it first + if !buffer.is_empty() { let remaining = &mut buffer[*buffer_len..]; let n_to_copy = usize::min(remaining.len(), input.len()); @@ -325,29 +360,50 @@ where remaining_head.copy_from_slice(input_head); *buffer_len += n_to_copy; - // We have not filled the whole buffer, no need to - // process it now + input = input_tail; + + // We did not fill up the buffer if !remaining_tail.is_empty() { - break; + return; } - // We filled the buffer, but we don't know we have - // more data so we have to leave it in case it is the - // last full block. - if input_tail.is_empty() { - break; + // We don't know this isn't the last of the data + if input.is_empty() { + return; } - // We have a full buffer *and* we know there's more - // data after the buffer, so we can process this as a - // full block. - detect::rounds(accumulator, [&*buffer], secret); + let (stripes, _) = buffer.bp_as_chunks(); + for stripe in stripes { + grug.process_stripe(stripe, n_stripes, secret); + } *buffer_len = 0; + } - input = input_tail; + debug_assert!(*buffer_len == 0); + + // Process as much of the input data in-place as possible, + // while leaving at least one full stripe for the + // finalization. + if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) { + let nn = dd / STRIPE_BYTES; + let nn = nn * STRIPE_BYTES; + let (aa, remainder) = input.split_at(nn); + let (stripes, _) = aa.bp_as_chunks(); + + for stripe in stripes { + grug.process_stripe(stripe, n_stripes, secret) + } + input = remainder; } - *total_bytes += input_len; + // Any remaining data has to be less than the buffer, and the + // buffer is empty so just fill up the buffer. + debug_assert!(*buffer_len == 0); + debug_assert!(!input.is_empty()); + debug_assert!(input.len() < buffer.len()); + + buffer[..input.len()].copy_from_slice(input); + *buffer_len = input.len(); } #[inline(never)] @@ -355,15 +411,15 @@ where let Self { ref secret_buffer, buffer_len, - accumulator, + mut grug, total_bytes, } = *self; + let n_stripes = secret_buffer.n_stripes(); let SecretBuffer { seed, ref secret, ref buffer, } = *secret_buffer; - let secret = secret.as_ref(); let buffer = buffer.as_ref(); @@ -371,6 +427,12 @@ where match total_bytes { 241.. => { + // Ingest final stripes + let (stripes, remainder) = fun_name(input); + for stripe in stripes { + grug.process_stripe(stripe, n_stripes, secret); + } + let mut temp = [0; 64]; let last_stripe = match input.last_chunk() { @@ -387,7 +449,13 @@ where } }; - detect::finalize(accumulator, input, last_stripe, secret, total_bytes) + detect::finalize( + grug.accumulator, + remainder, + last_stripe, + secret, + total_bytes, + ) } 129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input), @@ -675,6 +743,7 @@ impl Algorithm { #[inline] fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { + // TODO: [unify] let secrets = (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); @@ -692,6 +761,7 @@ impl Algorithm { secret: &[u8], len: usize, ) -> u64 { + debug_assert!(!last_block.is_empty()); self.last_round(&mut acc, last_block, last_stripe, secret); self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret, 11) @@ -701,10 +771,9 @@ impl Algorithm { fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { // Accumulation steps are run for the stripes in the last block, // except for the last stripe (whether it is full or not) - let stripes = match block.bp_as_chunks() { - ([stripes @ .., _last], []) => stripes, - (stripes, _last) => stripes, - }; + let (stripes, _) = fun_name(block); + + // TODO: [unify] let secrets = (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); @@ -746,6 +815,14 @@ impl Algorithm { } } +#[inline] +fn fun_name(block: &[u8]) -> (&[[u8; 64]], &[u8]) { + match block.bp_as_chunks() { + ([stripes @ .., last], []) => (stripes, last), + (stripes, last) => (stripes, last), + } +} + /// # Safety /// `input` must be non-empty. #[inline] @@ -782,12 +859,13 @@ mod scalar { } #[inline] - pub fn rounds<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - super::Algorithm(Impl).rounds(acc, blocks, secret) + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + Impl.accumulate(acc, stripe, secret) + } + + #[inline] + pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + Impl.round_scramble(acc, secret_end); } #[inline] @@ -885,12 +963,16 @@ mod neon { /// You must ensure that the CPU has the NEON feature #[inline] #[target_feature(enable = "neon")] - pub unsafe fn rounds_unchecked<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret) + pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + Impl::new_unchecked().round_scramble(acc, secret_end) + } + + /// # Safety + /// You must ensure that the CPU has the NEON feature + #[inline] + #[target_feature(enable = "neon")] + pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + Impl::new_unchecked().accumulate(acc, stripe, secret) } /// # Safety @@ -1096,12 +1178,13 @@ mod aarch64_detect { } #[inline] - pub fn rounds<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - pick! { rounds_unchecked, rounds, (acc, blocks, secret) } + pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) } + } + + #[inline] + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) } } #[inline] @@ -1134,12 +1217,16 @@ mod avx2 { /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] - pub unsafe fn rounds_unchecked<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret) + pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + Impl::new_unchecked().round_scramble(acc, secret_end) + } + + /// # Safety + /// You must ensure that the CPU has the AVX2 feature + #[inline] + #[target_feature(enable = "avx2")] + pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + Impl::new_unchecked().accumulate(acc, stripe, secret) } /// # Safety @@ -1235,12 +1322,16 @@ mod sse2 { /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] - pub unsafe fn rounds_unchecked<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - super::Algorithm(Impl::new_unchecked()).rounds(acc, blocks, secret) + pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + Impl::new_unchecked().round_scramble(acc, secret_end) + } + + /// # Safety + /// You must ensure that the CPU has the SSE2 feature + #[inline] + #[target_feature(enable = "sse2")] + pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + Impl::new_unchecked().accumulate(acc, stripe, secret) } /// # Safety @@ -1346,12 +1437,13 @@ mod x86_64_detect { } #[inline] - pub fn rounds<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - pick! { rounds_unchecked, rounds, (acc, blocks, secret) } + pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) } + } + + #[inline] + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) } } #[inline] @@ -1401,12 +1493,13 @@ mod detect { } #[inline] - pub fn rounds<'a>( - acc: &mut [u64; 8], - blocks: impl IntoIterator, - secret: &[u8], - ) { - pick! { rounds(acc, blocks, secret) } + pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + pick! { accumulate(acc, stripe, secret) } + } + + #[inline] + pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + pick! { round_scramble(acc, secret_end) } } #[inline] From 0f1980ff3495ca9d4ecbfbf662ce1566f71683ba Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 14 Aug 2024 15:57:30 -0400 Subject: [PATCH 106/166] speeeeds --- src/xxhash3_64.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 6756b3078..1d190b36b 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -275,6 +275,8 @@ impl Grug { } } + // TODO: NEXT: inline this? pass in secret_end? + #[inline] fn process_stripe(&mut self, stripe: &[u8; 64], n_stripes: usize, secret: &[u8]) { let Self { accumulator, @@ -282,7 +284,7 @@ impl Grug { .. } = self; - let secret_end = secret.last_chunk().unwrap(); + let secret_end = unsafe { secret.last_chunk().unwrap_unchecked() }; // each stripe let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() }; From 7c3a8ed6ccc49b059b518f6dd79329212a0389a3 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 15 Aug 2024 20:09:20 -0400 Subject: [PATCH 107/166] this is actually neon oops --- src/xxhash3_64.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 1d190b36b..84176a375 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1154,7 +1154,8 @@ mod neon { // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 #[inline] - fn reordering_barrier(r: uint64x2_t) { + #[target_feature(enable = "neon")] + unsafe fn reordering_barrier(r: uint64x2_t) { unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } } } From 3a4132681f767b6830cda53d3a18b5b816dec2f4 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 15 Aug 2024 20:21:04 -0400 Subject: [PATCH 108/166] push it --- src/xxhash3_64.rs | 541 ++++++++++++++++++++-------------------------- 1 file changed, 239 insertions(+), 302 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 84176a375..7bb2bd4ff 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -277,7 +277,13 @@ impl Grug { // TODO: NEXT: inline this? pass in secret_end? #[inline] - fn process_stripe(&mut self, stripe: &[u8; 64], n_stripes: usize, secret: &[u8]) { + fn process_stripe( + &mut self, + vector: V, + stripe: &[u8; 64], + n_stripes: usize, + secret: &[u8], + ) { let Self { accumulator, current_stripe, @@ -288,13 +294,13 @@ impl Grug { // each stripe let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() }; - detect::accumulate(accumulator, stripe, secret); + vector.accumulate(accumulator, stripe, secret); *current_stripe += 1; if *current_stripe == n_stripes { // after block's worth - detect::round_scramble(accumulator, secret_end); + vector.round_scramble(accumulator, secret_end); *current_stripe = 0; } } @@ -326,152 +332,239 @@ impl RawHasher { } } +macro_rules! dispatch { + ( + fn $fn_name:ident<$($gen:ident),*>($($arg_name:ident : $arg_ty:ty),*) $(-> $ret_ty:ty)? + [$($wheres:tt)*] + ) => { + #[inline] + fn do_scalar<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + $fn_name(scalar::Impl, $($arg_name),*) + } + + #[inline] + #[target_feature(enable = "neon")] + #[cfg(target_arch = "aarch64")] + unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + $fn_name(neon::Impl::new_unchecked(), $($arg_name),*) + } + + #[inline] + #[target_feature(enable = "avx2")] + #[cfg(target_arch = "x86_64")] + unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*) + } + + #[inline] + #[target_feature(enable = "sse2")] + #[cfg(target_arch = "x86_64")] + unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? + where + $($wheres)* + { + $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*) + } + + #[cfg(target_arch = "aarch64")] + { + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { do_neon($($arg_name),*) }; + } + } + + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx2") { + return unsafe { do_avx2($($arg_name),*) }; + } else if is_x86_feature_detected!("sse2") { + return unsafe { do_sse2($($arg_name),*) }; + } + } + + do_scalar($($arg_name),*) + }; +} + impl hash::Hasher for RawHasher where S: AsRef<[u8]>, { #[inline(never)] - fn write(&mut self, mut input: &[u8]) { - if input.is_empty() { - return; + fn write(&mut self, input: &[u8]) { + let this = self; + dispatch! { + fn write_impl(this: &mut RawHasher, input: &[u8]) + [S: AsRef<[u8]>] + } + } + + #[inline(never)] + fn finish(&self) -> u64 { + let this = self; + dispatch! { + fn finish_impl(this: &RawHasher) -> u64 + [S: AsRef<[u8]>] } + } +} - let Self { - secret_buffer, - buffer_len, - grug, - total_bytes, - .. - } = self; +#[inline(always)] +fn write_impl(vector: impl Vector, this: &mut RawHasher, mut input: &[u8]) +where + S: AsRef<[u8]>, +{ + if input.is_empty() { + return; + } - let n_stripes = secret_buffer.n_stripes(); + let RawHasher { + secret_buffer, + buffer_len, + grug, + total_bytes, + .. + } = this; - let SecretBuffer { secret, buffer, .. } = secret_buffer; - let secret = secret.as_ref(); + let n_stripes = secret_buffer.n_stripes(); - *total_bytes += input.len(); + let SecretBuffer { secret, buffer, .. } = secret_buffer; + let secret = secret.as_ref(); - // We have some previous data saved; try to fill it up and process it first - if !buffer.is_empty() { - let remaining = &mut buffer[*buffer_len..]; - let n_to_copy = usize::min(remaining.len(), input.len()); + *total_bytes += input.len(); - let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); - let (input_head, input_tail) = input.split_at(n_to_copy); + // We have some previous data saved; try to fill it up and process it first + if !buffer.is_empty() { + let remaining = &mut buffer[*buffer_len..]; + let n_to_copy = usize::min(remaining.len(), input.len()); - remaining_head.copy_from_slice(input_head); - *buffer_len += n_to_copy; + let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); + let (input_head, input_tail) = input.split_at(n_to_copy); - input = input_tail; + remaining_head.copy_from_slice(input_head); + *buffer_len += n_to_copy; - // We did not fill up the buffer - if !remaining_tail.is_empty() { - return; - } + input = input_tail; - // We don't know this isn't the last of the data - if input.is_empty() { - return; - } + // We did not fill up the buffer + if !remaining_tail.is_empty() { + return; + } - let (stripes, _) = buffer.bp_as_chunks(); - for stripe in stripes { - grug.process_stripe(stripe, n_stripes, secret); - } - *buffer_len = 0; + // We don't know this isn't the last of the data + if input.is_empty() { + return; } - debug_assert!(*buffer_len == 0); + let (stripes, _) = buffer.bp_as_chunks(); + for stripe in stripes { + grug.process_stripe(vector, stripe, n_stripes, secret); + } + *buffer_len = 0; + } + + debug_assert!(*buffer_len == 0); - // Process as much of the input data in-place as possible, - // while leaving at least one full stripe for the - // finalization. - if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) { - let nn = dd / STRIPE_BYTES; - let nn = nn * STRIPE_BYTES; - let (aa, remainder) = input.split_at(nn); - let (stripes, _) = aa.bp_as_chunks(); + // Process as much of the input data in-place as possible, + // while leaving at least one full stripe for the + // finalization. + if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) { + let nn = dd / STRIPE_BYTES; + let nn = nn * STRIPE_BYTES; + let (aa, remainder) = input.split_at(nn); + let (stripes, _) = aa.bp_as_chunks(); + for stripe in stripes { + grug.process_stripe(vector, stripe, n_stripes, secret) + } + input = remainder; + } + + // Any remaining data has to be less than the buffer, and the + // buffer is empty so just fill up the buffer. + debug_assert!(*buffer_len == 0); + debug_assert!(!input.is_empty()); + debug_assert!(input.len() < buffer.len()); + + buffer[..input.len()].copy_from_slice(input); + *buffer_len = input.len(); +} + +#[inline(always)] +fn finish_impl(vector: impl Vector, this: &RawHasher) -> u64 +where + S: AsRef<[u8]>, +{ + let RawHasher { + ref secret_buffer, + buffer_len, + mut grug, + total_bytes, + } = *this; + let n_stripes = secret_buffer.n_stripes(); + let SecretBuffer { + seed, + ref secret, + ref buffer, + } = *secret_buffer; + let secret = secret.as_ref(); + let buffer = buffer.as_ref(); + + let input = &buffer[..buffer_len]; + + match total_bytes { + 241.. => { + // Ingest final stripes + let (stripes, remainder) = fun_name(input); for stripe in stripes { - grug.process_stripe(stripe, n_stripes, secret) + grug.process_stripe(vector, stripe, n_stripes, secret); } - input = remainder; - } - // Any remaining data has to be less than the buffer, and the - // buffer is empty so just fill up the buffer. - debug_assert!(*buffer_len == 0); - debug_assert!(!input.is_empty()); - debug_assert!(input.len() < buffer.len()); + let mut temp = [0; 64]; - buffer[..input.len()].copy_from_slice(input); - *buffer_len = input.len(); - } + let last_stripe = match input.last_chunk() { + Some(chunk) => chunk, + None => { + let n_to_reuse = 64 - input.len(); + let to_reuse = buffer.len() - n_to_reuse; - #[inline(never)] - fn finish(&self) -> u64 { - let Self { - ref secret_buffer, - buffer_len, - mut grug, - total_bytes, - } = *self; - let n_stripes = secret_buffer.n_stripes(); - let SecretBuffer { - seed, - ref secret, - ref buffer, - } = *secret_buffer; - let secret = secret.as_ref(); - let buffer = buffer.as_ref(); - - let input = &buffer[..buffer_len]; - - match total_bytes { - 241.. => { - // Ingest final stripes - let (stripes, remainder) = fun_name(input); - for stripe in stripes { - grug.process_stripe(stripe, n_stripes, secret); + let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); + temp_head.copy_from_slice(&buffer[to_reuse..]); + temp_tail.copy_from_slice(input); + + &temp } + }; - let mut temp = [0; 64]; - - let last_stripe = match input.last_chunk() { - Some(chunk) => chunk, - None => { - let n_to_reuse = 64 - input.len(); - let to_reuse = buffer.len() - n_to_reuse; - - let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); - temp_head.copy_from_slice(&buffer[to_reuse..]); - temp_tail.copy_from_slice(input); - - &temp - } - }; - - detect::finalize( - grug.accumulator, - remainder, - last_stripe, - secret, - total_bytes, - ) - } + Algorithm(vector).finalize( + grug.accumulator, + remainder, + last_stripe, + secret, + total_bytes, + ) + } - 129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input), + 129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input), - 17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input), + 17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input), - 9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input), + 9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input), - 4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input), + 4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input), - 1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input), + 1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input), - 0 => impl_0_bytes(&DEFAULT_SECRET, seed), - } + 0 => impl_0_bytes(&DEFAULT_SECRET, seed), } } @@ -846,7 +939,7 @@ unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExac (blocks, last_block) } -trait Vector { +trait Vector: Copy { fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]); fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); @@ -860,29 +953,9 @@ mod scalar { super::Algorithm(Impl).oneshot(secret, input) } - #[inline] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - Impl.accumulate(acc, stripe, secret) - } - - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - Impl.round_scramble(acc, secret_end); - } - - #[inline] - pub fn finalize( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - super::Algorithm(Impl).finalize(acc, last_block, last_stripe, secret, len) - } - use super::{SliceBackport as _, Vector, PRIME32_1}; + #[derive(Copy, Clone)] pub struct Impl; impl Vector for Impl { @@ -961,43 +1034,14 @@ mod neon { super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } - /// # Safety - /// You must ensure that the CPU has the NEON feature - #[inline] - #[target_feature(enable = "neon")] - pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - Impl::new_unchecked().round_scramble(acc, secret_end) - } - - /// # Safety - /// You must ensure that the CPU has the NEON feature - #[inline] - #[target_feature(enable = "neon")] - pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - Impl::new_unchecked().accumulate(acc, stripe, secret) - } - - /// # Safety - /// You must ensure that the CPU has the NEON feature - #[inline] - #[target_feature(enable = "neon")] - pub unsafe fn finalize_unchecked( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len) - } - - struct Impl(()); + #[derive(Copy, Clone)] + pub struct Impl(()); impl Impl { /// # Safety /// You must ensure that the CPU has the NEON feature #[inline] - unsafe fn new_unchecked() -> Self { + pub unsafe fn new_unchecked() -> Self { Self(()) } } @@ -1179,34 +1223,13 @@ mod aarch64_detect { pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { pick! { oneshot_unchecked, oneshot, (secret, input) } } - - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) } - } - - #[inline] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) } - } - - #[inline] - pub fn finalize( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) } - } } #[cfg(target_arch = "x86_64")] mod avx2 { use core::arch::x86_64::*; - use super::Vector; + use super::{scalar, Vector}; /// # Safety /// You must ensure that the CPU has the AVX2 feature @@ -1216,52 +1239,23 @@ mod avx2 { super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } - /// # Safety - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - Impl::new_unchecked().round_scramble(acc, secret_end) - } - - /// # Safety - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - Impl::new_unchecked().accumulate(acc, stripe, secret) - } - - /// # Safety - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - pub unsafe fn finalize_unchecked( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len) - } - - pub struct Impl(super::scalar::Impl); + #[derive(Copy, Clone)] + pub struct Impl(()); impl Impl { /// # Safety /// You must ensure that the CPU has the AVX2 feature #[inline] pub unsafe fn new_unchecked() -> Impl { - Impl(super::scalar::Impl) + Impl(()) } } impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // The scalar implementation is autovectorized nicely enough - self.0.round_scramble(acc, secret_end) + // SAFETY: Type can only be constructed when AVX2 feature is present + unsafe { round_scramble_avx2(acc, secret_end) } } #[inline] @@ -1271,6 +1265,13 @@ mod avx2 { } } + #[inline] + #[target_feature(enable = "avx2")] + unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // The scalar implementation is autovectorized nicely enough + scalar::Impl.round_scramble(acc, secret_end) + } + #[inline] #[target_feature(enable = "avx2")] unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { @@ -1311,7 +1312,7 @@ mod avx2 { mod sse2 { use core::arch::x86_64::*; - use super::Vector; + use super::{scalar, Vector}; /// # Safety /// You must ensure that the CPU has the SSE2 feature @@ -1321,52 +1322,23 @@ mod sse2 { super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) } - /// # Safety - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - pub unsafe fn round_scramble_unchecked(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - Impl::new_unchecked().round_scramble(acc, secret_end) - } - - /// # Safety - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - pub unsafe fn accumulate_unchecked(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - Impl::new_unchecked().accumulate(acc, stripe, secret) - } - - /// # Safety - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - pub unsafe fn finalize_unchecked( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - super::Algorithm(Impl::new_unchecked()).finalize(acc, last_block, last_stripe, secret, len) - } - - pub struct Impl(super::scalar::Impl); + #[derive(Copy, Clone)] + pub struct Impl(()); impl Impl { /// # Safety /// You must ensure that the CPU has the SSE2 feature #[inline] pub unsafe fn new_unchecked() -> Impl { - Impl(super::scalar::Impl) + Impl(()) } } impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // The scalar implementation is autovectorized nicely enough - self.0.round_scramble(acc, secret_end) + // SAFETY: Type can only be constructed when SSE2 feature is present + unsafe { round_scramble_sse2(acc, secret_end) } } #[inline] @@ -1376,6 +1348,13 @@ mod sse2 { } } + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // The scalar implementation is autovectorized nicely enough + scalar::Impl.round_scramble(acc, secret_end) + } + #[inline] #[target_feature(enable = "sse2")] unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { @@ -1438,27 +1417,6 @@ mod x86_64_detect { pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { pick! { oneshot_unchecked, oneshot, (secret, input) } } - - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - pick! { round_scramble_unchecked, round_scramble, (acc, secret_end) } - } - - #[inline] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - pick! { accumulate_unchecked, accumulate, (acc, stripe, secret) } - } - - #[inline] - pub fn finalize( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - pick! { finalize_unchecked, finalize, (acc, last_block, last_stripe, secret, len) } - } } mod detect { @@ -1494,27 +1452,6 @@ mod detect { pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { pick! { oneshot(secret, input) } } - - #[inline] - pub fn accumulate(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - pick! { accumulate(acc, stripe, secret) } - } - - #[inline] - pub fn round_scramble(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - pick! { round_scramble(acc, secret_end) } - } - - #[inline] - pub fn finalize( - acc: [u64; 8], - last_block: &[u8], - last_stripe: &[u8; 64], - secret: &[u8], - len: usize, - ) -> u64 { - pick! { finalize(acc, last_block, last_stripe, secret, len) } - } } #[inline] From 9657b00fc82b3888290492c644a796b092d1df47 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 16 Aug 2024 20:55:18 -0400 Subject: [PATCH 109/166] Address some todos --- src/xxhash3_64.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 7bb2bd4ff..662e6e36f 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -260,14 +260,15 @@ where } } +/// Tracks which stripe we are currently on to know which part of the +/// secret we should be using. #[derive(Copy, Clone)] -struct Grug { - // TODO FIXME +struct StripeAccumulator { accumulator: [u64; 8], current_stripe: usize, } -impl Grug { +impl StripeAccumulator { fn new() -> Self { Self { accumulator: INITIAL_ACCUMULATORS, @@ -275,7 +276,6 @@ impl Grug { } } - // TODO: NEXT: inline this? pass in secret_end? #[inline] fn process_stripe( &mut self, @@ -317,7 +317,7 @@ impl Grug { pub struct RawHasher { secret_buffer: SecretBuffer, buffer_len: usize, - grug: Grug, + stripe_accumulator: StripeAccumulator, total_bytes: usize, } @@ -326,7 +326,7 @@ impl RawHasher { Self { secret_buffer, buffer_len: 0, - grug: Grug::new(), + stripe_accumulator: StripeAccumulator::new(), total_bytes: 0, } } @@ -430,7 +430,7 @@ where let RawHasher { secret_buffer, buffer_len, - grug, + stripe_accumulator, total_bytes, .. } = this; @@ -467,7 +467,7 @@ where let (stripes, _) = buffer.bp_as_chunks(); for stripe in stripes { - grug.process_stripe(vector, stripe, n_stripes, secret); + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } *buffer_len = 0; } @@ -484,7 +484,7 @@ where let (stripes, _) = aa.bp_as_chunks(); for stripe in stripes { - grug.process_stripe(vector, stripe, n_stripes, secret) + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret) } input = remainder; } @@ -507,7 +507,7 @@ where let RawHasher { ref secret_buffer, buffer_len, - mut grug, + mut stripe_accumulator, total_bytes, } = *this; let n_stripes = secret_buffer.n_stripes(); @@ -526,7 +526,7 @@ where // Ingest final stripes let (stripes, remainder) = fun_name(input); for stripe in stripes { - grug.process_stripe(vector, stripe, n_stripes, secret); + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } let mut temp = [0; 64]; @@ -546,7 +546,7 @@ where }; Algorithm(vector).finalize( - grug.accumulator, + stripe_accumulator.accumulator, remainder, last_stripe, secret, @@ -999,7 +999,6 @@ mod scalar { #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 // https://github.com/llvm/llvm-project/issues/98481 - // TODO: this is probably if NEON, yeah? #[cfg(target_arch = "aarch64")] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { use core::arch::asm; @@ -1280,7 +1279,9 @@ mod avx2 { let secret = secret.as_ptr().cast::<__m256i>(); for i in 0..2 { - // todo: align the accumulator and avoid the unaligned load and store + // [align-acc]: The C code aligns the accumulator to avoid + // the unaligned load and store here, but that doesn't + // seem to be a big performance loss. let mut acc_0 = _mm256_loadu_si256(acc.add(i)); let stripe_0 = _mm256_loadu_si256(stripe.add(i)); let secret_0 = _mm256_loadu_si256(secret.add(i)); @@ -1363,7 +1364,7 @@ mod sse2 { let secret = secret.as_ptr().cast::<__m128i>(); for i in 0..4 { - // todo: align the accumulator and avoid the unaligned load and store + // See [align-acc]. let mut acc_0 = _mm_loadu_si128(acc.add(i)); let stripe_0 = _mm_loadu_si128(stripe.add(i)); let secret_0 = _mm_loadu_si128(secret.add(i)); From 9566f322768a320c45c5a188a7627e750c739cc8 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 09:27:56 -0400 Subject: [PATCH 110/166] Use unified dispatch mechanism --- src/xxhash3_64.rs | 141 ++++++++-------------------------------------- 1 file changed, 25 insertions(+), 116 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 662e6e36f..6be59778b 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -375,14 +375,28 @@ macro_rules! dispatch { $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*) } - #[cfg(target_arch = "aarch64")] + // Now we invoke the right function + + #[cfg(_internal_xxhash3_force_neon)] + return unsafe { do_neon($($arg_name),*) }; + + #[cfg(_internal_xxhash3_force_avx2)] + return unsafe { do_avx2($($arg_name),*) }; + + #[cfg(_internal_xxhash3_force_sse2)] + return unsafe { do_sse2($($arg_name),*) }; + + #[cfg(_internal_xxhash3_force_scalar)] + return do_scalar($($arg_name),*); + + #[cfg(all(target_arch = "aarch64", feature = "std"))] { if std::arch::is_aarch64_feature_detected!("neon") { return unsafe { do_neon($($arg_name),*) }; } } - #[cfg(target_arch = "x86_64")] + #[cfg(all(target_arch = "x86_64", feature = "std"))] { if is_x86_feature_detected!("avx2") { return unsafe { do_avx2($($arg_name),*) }; @@ -777,7 +791,15 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { - detect::oneshot(secret, input) + dispatch! { + fn oneshot_x<>(secret: &[u8], input: &[u8]) -> u64 + [] + } +} + +#[inline(always)] +fn oneshot_x(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 { + Algorithm(vector).oneshot(secret, input) } fn block_size(secret: &[u8]) -> usize { @@ -948,11 +970,6 @@ trait Vector: Copy { // This module is not `cfg`-gated because it is used by some of the // SIMD implementations. mod scalar { - #[inline] - pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - super::Algorithm(Impl).oneshot(secret, input) - } - use super::{SliceBackport as _, Vector, PRIME32_1}; #[derive(Copy, Clone)] @@ -1025,14 +1042,6 @@ mod neon { use super::{SliceBackport as _, Vector, PRIME32_1}; - /// # Safety - /// You must ensure that the CPU has the NEON feature - #[inline] - #[target_feature(enable = "neon")] - pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { - super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) - } - #[derive(Copy, Clone)] pub struct Impl(()); @@ -1203,41 +1212,12 @@ mod neon { } } -#[cfg(all(target_arch = "aarch64", feature = "std"))] -mod aarch64_detect { - macro_rules! pick { - ($f:ident, $s:ident, $($t:tt)+) => { - #[cfg(_internal_xxhash3_force_neon)] - return unsafe { super::neon::$f $($t)+ }; - - if std::arch::is_aarch64_feature_detected!("neon") { - return unsafe { super::neon::$f $($t)+ }; - } - - super::scalar::$s $($t)+ - }; - } - - #[inline] - pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - pick! { oneshot_unchecked, oneshot, (secret, input) } - } -} - #[cfg(target_arch = "x86_64")] mod avx2 { use core::arch::x86_64::*; use super::{scalar, Vector}; - /// # Safety - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { - super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) - } - #[derive(Copy, Clone)] pub struct Impl(()); @@ -1315,14 +1295,6 @@ mod sse2 { use super::{scalar, Vector}; - /// # Safety - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - pub unsafe fn oneshot_unchecked(secret: &[u8], input: &[u8]) -> u64 { - super::Algorithm(Impl::new_unchecked()).oneshot(secret, input) - } - #[derive(Copy, Clone)] pub struct Impl(()); @@ -1392,69 +1364,6 @@ mod sse2 { } } -#[cfg(all(target_arch = "x86_64", feature = "std"))] -mod x86_64_detect { - macro_rules! pick { - ($f:ident, $s:ident, $($t:tt)+) => { - #[cfg(_internal_xxhash3_force_avx2)] - return unsafe { super::avx2::$f $($t)+ }; - - #[cfg(_internal_xxhash3_force_sse2)] - return unsafe { super::sse2::$f $($t)+ }; - - if std::arch::is_x86_feature_detected!("avx2") { - return unsafe { super::avx2::$f $($t)+ }; - } - - if std::arch::is_x86_feature_detected!("sse2") { - return unsafe { super::sse2::$f $($t)+ }; - } - - super::scalar::$s $($t)+ - }; - } - - #[inline] - pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - pick! { oneshot_unchecked, oneshot, (secret, input) } - } -} - -mod detect { - macro_rules! pick { - ($e:expr) => { - #[cfg(_internal_xxhash3_force_scalar)] - { - use super::scalar::*; - return $e; - } - - #[cfg(all(target_arch = "aarch64", feature = "std"))] - { - use super::aarch64_detect::*; - return $e; - } - - #[cfg(all(target_arch = "x86_64", feature = "std"))] - { - use super::x86_64_detect::*; - return $e; - } - - #[allow(unreachable_code)] - { - use super::scalar::*; - $e - } - }; - } - - #[inline] - pub fn oneshot(secret: &[u8], input: &[u8]) -> u64 { - pick! { oneshot(secret, input) } - } -} - #[inline] fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; From e9d17b923f17ff0e1d7287d36fbadbcaa6d3fcad Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 09:29:21 -0400 Subject: [PATCH 111/166] re-inline helpers --- src/xxhash3_64.rs | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 6be59778b..2e2dc768f 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -802,11 +802,6 @@ fn oneshot_x(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 { Algorithm(vector).oneshot(secret, input) } -fn block_size(secret: &[u8]) -> usize { - let stripes_per_block = (secret.len() - 64) / 8; - 64 * stripes_per_block -} - struct Algorithm(V); impl Algorithm { @@ -817,9 +812,20 @@ impl Algorithm { assert!(secret.len() >= SECRET_MINIMUM_LENGTH); assert!(input.len() >= 241); - let block_size = block_size(secret); + let stripes_per_block = (secret.len() - 64) / 8; + let block_size = 64 * stripes_per_block; - let (blocks, last_block) = unsafe { chunks_and_last(input, block_size) }; + let mut blocks = input.chunks_exact(block_size); + + let last_block = if blocks.remainder().is_empty() { + // SAFETY: We know that `input` is non-empty, which means + // that either there will be a remainder or one or more + // full blocks. That info isn't flowing to the optimizer, + // so we use `unwrap_unchecked`. + unsafe { blocks.next_back().unwrap_unchecked() } + } else { + blocks.remainder() + }; self.rounds(&mut acc, blocks, secret); @@ -940,27 +946,6 @@ fn fun_name(block: &[u8]) -> (&[[u8; 64]], &[u8]) { } } -/// # Safety -/// `input` must be non-empty. -#[inline] -unsafe fn chunks_and_last(input: &[u8], block_size: usize) -> (slice::ChunksExact<'_, u8>, &[u8]) { - debug_assert!(!input.is_empty()); - - let mut blocks = input.chunks_exact(block_size); - - let last_block = if blocks.remainder().is_empty() { - // SAFETY: We know that `input` is non-empty, which means - // that either there will be a remainder or one or more - // full blocks. That info isn't flowing to the optimizer, - // so we use `unwrap_unchecked`. - unsafe { blocks.next_back().unwrap_unchecked() } - } else { - blocks.remainder() - }; - - (blocks, last_block) -} - trait Vector: Copy { fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]); From 935e70132dc10a3117d2183b0791e34bd75e96eb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 12:51:22 -0400 Subject: [PATCH 112/166] improve names --- src/xxhash3_64.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 2e2dc768f..d3916ec75 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -538,7 +538,7 @@ where match total_bytes { 241.. => { // Ingest final stripes - let (stripes, remainder) = fun_name(input); + let (stripes, remainder) = stripes_with_tail(input); for stripe in stripes { stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } @@ -792,13 +792,13 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { dispatch! { - fn oneshot_x<>(secret: &[u8], input: &[u8]) -> u64 + fn oneshot_impl<>(secret: &[u8], input: &[u8]) -> u64 [] } } #[inline(always)] -fn oneshot_x(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 { +fn oneshot_impl(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 { Algorithm(vector).oneshot(secret, input) } @@ -894,7 +894,7 @@ impl Algorithm { fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { // Accumulation steps are run for the stripes in the last block, // except for the last stripe (whether it is full or not) - let (stripes, _) = fun_name(block); + let (stripes, _) = stripes_with_tail(block); // TODO: [unify] let secrets = @@ -939,7 +939,7 @@ impl Algorithm { } #[inline] -fn fun_name(block: &[u8]) -> (&[[u8; 64]], &[u8]) { +fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) { match block.bp_as_chunks() { ([stripes @ .., last], []) => (stripes, last), (stripes, last) => (stripes, last), From e7662a4db9a1d24a3e3a3d881bd3ba62e752b6b0 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 12:54:54 -0400 Subject: [PATCH 113/166] reduce unsafe --- src/xxhash3_64.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index d3916ec75..7e76d7ae7 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -831,14 +831,7 @@ impl Algorithm { let len = input.len(); - let last_stripe: &[u8; 64] = unsafe { - &*input - .as_ptr() - .add(len) - .sub(mem::size_of::<[u8; 64]>()) - .cast() - }; - + let last_stripe = input.last_chunk().unwrap(); self.finalize(acc, last_block, last_stripe, secret, len) } From ab749c25764ff653c1ba478aabf79add33790062 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 13:11:50 -0400 Subject: [PATCH 114/166] reduce unsafe --- src/xxhash3_64.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 7e76d7ae7..ec2cd57b4 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -897,8 +897,7 @@ impl Algorithm { self.0.accumulate(acc, stripe, secret); } - let q = &secret[secret.len() - 71..]; - let q: &[u8; 64] = unsafe { &*q.as_ptr().cast() }; + let q = secret[secret.len() - 71..].first_chunk().unwrap(); self.0.accumulate(acc, last_stripe, q); } @@ -910,19 +909,17 @@ impl Algorithm { secret: &[u8], secret_offset: usize, ) -> u64 { - let secret_words = unsafe { - secret - .as_ptr() - .add(secret_offset) - .cast::<[u64; 8]>() - .read_unaligned() - }; + let secrets = secret[secret_offset..].first_chunk::<64>().unwrap(); + let (secrets, _) = secrets.bp_as_chunks(); let mut result = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result let mul_result = { - let a = (acc[i * 2] ^ secret_words[i * 2]).into_u128(); - let b = (acc[i * 2 + 1] ^ secret_words[i * 2 + 1]).into_u128(); + let sa = u64::from_ne_bytes(secrets[i * 2]); + let sb = u64::from_ne_bytes(secrets[i * 2 + 1]); + + let a = (acc[i * 2] ^ sa).into_u128(); + let b = (acc[i * 2 + 1] ^ sb).into_u128(); a.wrapping_mul(b) }; result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); @@ -968,9 +965,12 @@ mod scalar { #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let (stripe, _) = stripe.bp_as_chunks(); + let (secret, _) = secret.bp_as_chunks(); + for i in 0..8 { - let stripe = unsafe { stripe.as_ptr().cast::().add(i).read_unaligned() }; - let secret = unsafe { secret.as_ptr().cast::().add(i).read_unaligned() }; + let stripe = u64::from_ne_bytes(stripe[i]); + let secret = u64::from_ne_bytes(secret[i]); let value = stripe ^ secret; acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); From 7c0cc3f9eb8bee3353485b207f1442b20a0c545c Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 13:14:25 -0400 Subject: [PATCH 115/166] unsafe-op-in-fn --- src/xxhash3_64.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index ec2cd57b4..36a58a040 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,4 +1,5 @@ #![allow(missing_docs)] +#![deny(unsafe_op_in_unsafe_fn)] use core::{hash, mem, slice}; @@ -345,6 +346,8 @@ macro_rules! dispatch { $fn_name(scalar::Impl, $($arg_name),*) } + /// # Safety + /// You must ensure that the CPU has the NEON feature #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "aarch64")] @@ -352,7 +355,10 @@ macro_rules! dispatch { where $($wheres)* { - $fn_name(neon::Impl::new_unchecked(), $($arg_name),*) + // SAFETY: the caller has ensured we have the NEON feature + unsafe { + $fn_name(neon::Impl::new_unchecked(), $($arg_name),*) + } } #[inline] From dde22b4b5b993cf3c2a5179e533a955916707d77 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 17 Aug 2024 13:38:58 -0400 Subject: [PATCH 116/166] extract secret start --- src/xxhash3_64.rs | 166 +++++++++++++++++++++++++++++++++------------- 1 file changed, 121 insertions(+), 45 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 36a58a040..c1f43ce42 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -33,8 +33,76 @@ const DEFAULT_SECRET: [u8; 192] = [ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]; +const DEFAULT_SECRET2: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET) }; + pub const SECRET_MINIMUM_LENGTH: usize = 136; +#[repr(transparent)] +struct Secret([u8]); + +impl Secret { + #[inline] + fn new(bytes: &[u8]) -> Result<&Self, ()> { + if bytes.len() >= SECRET_MINIMUM_LENGTH { + unsafe { Ok(Self::new_unchecked(bytes)) } + } else { + Err(()) // TODO error + } + } + + #[inline] + const unsafe fn new_unchecked(bytes: &[u8]) -> &Self { + unsafe { mem::transmute(bytes) } + } + + #[inline] + fn words_for_0(&self) -> [u64; 2] { + // unsafe { self.0.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() } + + let (q, _) = self.0[56..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_ne_bytes) + } + + #[inline] + fn words_for_1_to_3(&self) -> [u32; 2] { + // unsafe { self.0.as_ptr().cast::<[u32; 2]>().read_unaligned() } + + let (q, _) = self.0.bp_as_chunks(); + [q[0], q[1]].map(u32::from_ne_bytes) + } + + #[inline] + fn words_for_4_to_8(&self) -> [u64; 2] { + //unsafe { self.0.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() } + + let (q, _) = self.0[8..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_ne_bytes) + } + + #[inline] + fn words_for_9_to_16(&self) -> [u64; 4] { + // unsafe { self.0.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() } + + let (q, _) = self.0[24..].bp_as_chunks(); + [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes) + } + + #[inline] + fn stripe(&self, i: usize) -> &[u8; 64] { + unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() } + } + + #[inline] + fn end(&self) -> &[u8; 64] { + unsafe { self.0.last_chunk().unwrap_unchecked() } + } + + #[inline] + fn len(&self) -> usize { + self.0.len() + } +} + pub struct XxHash3_64 { #[cfg(feature = "alloc")] inner: with_alloc::AllocRawHasher, @@ -44,7 +112,7 @@ pub struct XxHash3_64 { impl XxHash3_64 { #[inline(never)] pub fn oneshot(input: &[u8]) -> u64 { - impl_oneshot(&DEFAULT_SECRET, DEFAULT_SEED, input) + impl_oneshot(DEFAULT_SECRET2, DEFAULT_SEED, input) } #[inline(never)] @@ -57,12 +125,14 @@ impl XxHash3_64 { derive_secret(seed, &mut secret); } - impl_oneshot(&secret, seed, input) + let s = unsafe { Secret::new_unchecked(&secret) }; + + impl_oneshot(s, seed, input) } #[inline(never)] pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 { - assert!(secret.len() >= SECRET_MINIMUM_LENGTH); // TODO: ERROR + let secret = Secret::new(secret).unwrap(); // TODO: ERROR impl_oneshot(secret, DEFAULT_SEED, input) } } @@ -283,7 +353,7 @@ impl StripeAccumulator { vector: V, stripe: &[u8; 64], n_stripes: usize, - secret: &[u8], + secret: &Secret, ) { let Self { accumulator, @@ -291,10 +361,10 @@ impl StripeAccumulator { .. } = self; - let secret_end = unsafe { secret.last_chunk().unwrap_unchecked() }; + let secret_end = secret.end(); // each stripe - let secret = unsafe { &*secret.get_unchecked(*current_stripe * 8..).as_ptr().cast() }; + let secret = secret.stripe(*current_stripe); vector.accumulate(accumulator, stripe, secret); *current_stripe += 1; @@ -459,6 +529,7 @@ where let SecretBuffer { secret, buffer, .. } = secret_buffer; let secret = secret.as_ref(); + let secret = unsafe { Secret::new_unchecked(secret) }; *total_bytes += input.len(); @@ -537,6 +608,7 @@ where ref buffer, } = *secret_buffer; let secret = secret.as_ref(); + let secret = unsafe { Secret::new_unchecked(secret) }; let buffer = buffer.as_ref(); let input = &buffer[..buffer_len]; @@ -574,17 +646,17 @@ where ) } - 129..=240 => impl_129_to_240_bytes(&DEFAULT_SECRET, seed, input), + 129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input), - 17..=128 => impl_17_to_128_bytes(&DEFAULT_SECRET, seed, input), + 17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input), - 9..=16 => impl_9_to_16_bytes(&DEFAULT_SECRET, seed, input), + 9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input), - 4..=8 => impl_4_to_8_bytes(&DEFAULT_SECRET, seed, input), + 4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input), - 1..=3 => impl_1_to_3_bytes(&DEFAULT_SECRET, seed, input), + 1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input), - 0 => impl_0_bytes(&DEFAULT_SECRET, seed), + 0 => impl_0_bytes(DEFAULT_SECRET2, seed), } } @@ -616,7 +688,7 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) { } #[inline(always)] -fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 { +fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { match input.len() { 241.. => impl_241_plus_bytes(secret, input), @@ -635,13 +707,13 @@ fn impl_oneshot(secret: &[u8], seed: u64, input: &[u8]) -> u64 { } #[inline] -fn impl_0_bytes(secret: &[u8], seed: u64) -> u64 { - let secret_words = unsafe { secret.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() }; +fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { + let secret_words = secret.words_for_0(); avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) } #[inline] -fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { +fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_length = input.len() as u8; // OK as we checked that the length fits let combined = input[input.len() - 1].into_u32() @@ -649,7 +721,7 @@ fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { | input[0].into_u32() << 16 | input[input.len() >> 1].into_u32() << 24; - let secret_words = unsafe { secret.as_ptr().cast::<[u32; 2]>().read_unaligned() }; + let secret_words = secret.words_for_1_to_3(); let value = ((secret_words[0] ^ secret_words[1]).into_u64() + seed) ^ combined.into_u64(); @@ -658,7 +730,7 @@ fn impl_1_to_3_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { } #[inline] -fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { +fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; let input_last = unsafe { input @@ -670,7 +742,7 @@ fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { }; let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); - let secret_words = unsafe { secret.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() }; + let secret_words = secret.words_for_4_to_8(); let combined = input_last.into_u64() | (input_first.into_u64() << 32); @@ -688,7 +760,7 @@ fn impl_4_to_8_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { } #[inline] -fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { +fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; let input_last = unsafe { input @@ -699,7 +771,7 @@ fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { .read_unaligned() }; - let secret_words = unsafe { secret.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() }; + let secret_words = secret.words_for_9_to_16(); let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; let mul_result = low.into_u128().wrapping_mul(high.into_u128()); @@ -714,10 +786,10 @@ fn impl_9_to_16_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { } #[inline] -fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { +fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - let (secret, _) = secret.bp_as_chunks(); + let (secret, _) = secret.0.bp_as_chunks(); let (secret, _) = secret.bp_as_chunks::<2>(); let (fwd, _) = input.bp_as_chunks(); let (_, bwd) = input.bp_as_rchunks(); @@ -746,15 +818,15 @@ fn impl_17_to_128_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { } #[inline] -fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { +fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let (head, _) = input.bp_as_chunks(); let last_chunk = input.last_chunk().unwrap(); let mut head = head.iter(); - let (ss, _) = secret.bp_as_chunks(); - let (ss2, _) = secret[3..].bp_as_chunks(); + let (ss, _) = secret.0.bp_as_chunks(); + let (ss2, _) = secret.0[3..].bp_as_chunks(); let qq = head.by_ref().zip(ss); @@ -768,7 +840,7 @@ fn impl_129_to_240_bytes(secret: &[u8], seed: u64, input: &[u8]) -> u64 { acc = acc.wrapping_add(mix_step(chunk, s, seed)); } - let ss3 = &secret[119..].first_chunk().unwrap(); + let ss3 = &secret.0[119..].first_chunk().unwrap(); acc = acc.wrapping_add(mix_step(last_chunk, ss3, seed)); avalanche(acc) @@ -796,15 +868,15 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ ]; #[inline] -fn impl_241_plus_bytes(secret: &[u8], input: &[u8]) -> u64 { +fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 { dispatch! { - fn oneshot_impl<>(secret: &[u8], input: &[u8]) -> u64 + fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64 [] } } #[inline(always)] -fn oneshot_impl(vector: impl Vector, secret: &[u8], input: &[u8]) -> u64 { +fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 { Algorithm(vector).oneshot(secret, input) } @@ -812,10 +884,10 @@ struct Algorithm(V); impl Algorithm { #[inline] - fn oneshot(&self, secret: &[u8], input: &[u8]) -> u64 { + fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 { let mut acc = INITIAL_ACCUMULATORS; - assert!(secret.len() >= SECRET_MINIMUM_LENGTH); + //assert!(secret.len() >= SECRET_MINIMUM_LENGTH); assert!(input.len() >= 241); let stripes_per_block = (secret.len() - 64) / 8; @@ -846,7 +918,7 @@ impl Algorithm { &self, acc: &mut [u64; 8], blocks: impl IntoIterator, - secret: &[u8], + secret: &Secret, ) { for block in blocks { let (stripes, _) = block.bp_as_chunks(); @@ -856,18 +928,17 @@ impl Algorithm { } #[inline] - fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { - let secret_end = secret.last_chunk().unwrap(); + fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { + let secret_end = secret.0.last_chunk().unwrap(); self.round_accumulate(acc, stripes, secret); self.0.round_scramble(acc, secret_end); } #[inline] - fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &[u8]) { + fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { // TODO: [unify] - let secrets = - (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); + let secrets = (0..stripes.len()).map(|i| secret.stripe(i)); for (stripe, secret) in stripes.iter().zip(secrets) { self.0.accumulate(acc, stripe, secret); @@ -880,7 +951,7 @@ impl Algorithm { mut acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], - secret: &[u8], + secret: &Secret, len: usize, ) -> u64 { debug_assert!(!last_block.is_empty()); @@ -890,20 +961,25 @@ impl Algorithm { } #[inline] - fn last_round(&self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &[u8]) { + fn last_round( + &self, + acc: &mut [u64; 8], + block: &[u8], + last_stripe: &[u8; 64], + secret: &Secret, + ) { // Accumulation steps are run for the stripes in the last block, // except for the last stripe (whether it is full or not) let (stripes, _) = stripes_with_tail(block); // TODO: [unify] - let secrets = - (0..stripes.len()).map(|i| unsafe { &*secret.get_unchecked(i * 8..).as_ptr().cast() }); + let secrets = (0..stripes.len()).map(|i| secret.stripe(i)); for (stripe, secret) in stripes.iter().zip(secrets) { self.0.accumulate(acc, stripe, secret); } - let q = secret[secret.len() - 71..].first_chunk().unwrap(); + let q = secret.0[secret.len() - 71..].first_chunk().unwrap(); self.0.accumulate(acc, last_stripe, q); } @@ -912,10 +988,10 @@ impl Algorithm { &self, acc: &mut [u64; 8], init_value: u64, - secret: &[u8], + secret: &Secret, secret_offset: usize, ) -> u64 { - let secrets = secret[secret_offset..].first_chunk::<64>().unwrap(); + let secrets = secret.0[secret_offset..].first_chunk::<64>().unwrap(); let (secrets, _) = secrets.bp_as_chunks(); let mut result = init_value; for i in 0..4 { From 849434ae097b5f553eb176d738b4dbd4826ebb20 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 08:47:02 -0400 Subject: [PATCH 117/166] oneshot asmasm --- asmasm/src/main.rs | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs index 706702566..e515bcdfc 100644 --- a/asmasm/src/main.rs +++ b/asmasm/src/main.rs @@ -1,4 +1,4 @@ -use std::{hash::Hasher, hint::black_box, time::Instant}; +use std::{hint::black_box, time::Instant}; use xx_hash_sys::XxHash3_64 as C; use xx_renu::xxhash3_64::XxHash3_64; @@ -8,33 +8,18 @@ fn main() { .nth(2) .map_or(false, |a| a.eq_ignore_ascii_case("C")); let file = std::fs::read(filename).expect("read"); - let seed = 0xdead_beef; if use_c { let start = Instant::now(); - let hash = do_c(seed, &file); + let hash = C::oneshot(&file); let elapsed = start.elapsed(); black_box(hash); eprintln!("C {elapsed:?}"); } else { let start = Instant::now(); - let hash = do_rust(seed, &file); + let hash = XxHash3_64::oneshot(&file); let elapsed = start.elapsed(); black_box(hash); eprintln!("Rust {elapsed:?}"); } } - -#[inline(never)] -fn do_c(seed: u64, file: &[u8]) -> u64 { - let mut hasher = C::with_seed(seed); - hasher.write(file); - hasher.finish() -} - -#[inline(never)] -fn do_rust(seed: u64, file: &[u8]) -> u64 { - let mut hasher = XxHash3_64::with_seed(seed); - hasher.write(&file); - hasher.finish() -} From 4b181d994d907099a5947fe0dd2474fafa45bbce Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 09:49:57 -0400 Subject: [PATCH 118/166] keep order consistent for now --- src/xxhash3_64.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index c1f43ce42..9e6270895 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -614,6 +614,18 @@ where let input = &buffer[..buffer_len]; match total_bytes { + 0 => impl_0_bytes(DEFAULT_SECRET2, seed), + + 1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input), + + 4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input), + + 9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input), + + 17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input), + + 129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input), + 241.. => { // Ingest final stripes let (stripes, remainder) = stripes_with_tail(input); @@ -645,18 +657,6 @@ where total_bytes, ) } - - 129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input), - - 17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input), - - 9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input), - - 4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input), - - 1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input), - - 0 => impl_0_bytes(DEFAULT_SECRET2, seed), } } @@ -690,19 +690,19 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { match input.len() { - 241.. => impl_241_plus_bytes(secret, input), + 0 => impl_0_bytes(secret, seed), - 129..=240 => impl_129_to_240_bytes(secret, seed, input), + 1..=3 => impl_1_to_3_bytes(secret, seed, input), - 17..=128 => impl_17_to_128_bytes(secret, seed, input), + 4..=8 => impl_4_to_8_bytes(secret, seed, input), 9..=16 => impl_9_to_16_bytes(secret, seed, input), - 4..=8 => impl_4_to_8_bytes(secret, seed, input), + 17..=128 => impl_17_to_128_bytes(secret, seed, input), - 1..=3 => impl_1_to_3_bytes(secret, seed, input), + 129..=240 => impl_129_to_240_bytes(secret, seed, input), - 0 => impl_0_bytes(secret, seed), + 241.. => impl_241_plus_bytes(secret, input), } } From eee337e70326f8642271625da11466cd266dac1b Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 13:32:47 -0400 Subject: [PATCH 119/166] tweak inlines --- src/xxhash3_64.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 9e6270895..291e9482f 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -706,13 +706,13 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { } } -#[inline] +#[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { let secret_words = secret.words_for_0(); avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) } -#[inline] +#[inline(always)] fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_length = input.len() as u8; // OK as we checked that the length fits @@ -729,7 +729,7 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { avalanche_xxh64(value) } -#[inline] +#[inline(always)] fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; let input_last = unsafe { @@ -759,7 +759,7 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { value } -#[inline] +#[inline(always)] fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; let input_last = unsafe { @@ -875,7 +875,7 @@ fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 { } } -#[inline(always)] +#[inline] fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 { Algorithm(vector).oneshot(secret, input) } From 019ef1136896640b1a12c839a2002ad38ce7c32c Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 15:18:31 -0400 Subject: [PATCH 120/166] tweak bench --- compare/benches/benchmark.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 52000e427..fbb2fe10d 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -140,7 +140,7 @@ fn gen_data(length: usize) -> (u64, Vec) { } fn gen_chunked_data(length: usize, n_chunks: usize) -> (u64, Vec>) { - assert!(length > n_chunks); + assert!(length >= n_chunks); let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); @@ -254,8 +254,8 @@ mod xxhash3_64 { fn streaming(c: &mut Criterion) { let mut g = c.benchmark_group("xxhash3_64/streaming_many_chunks"); - for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { - for n_chunks in half_sizes(MAX_CHUNKS) { + for size in [1024 * 1024] { + for n_chunks in half_sizes(size) { let (seed, chunks) = gen_chunked_data(size, n_chunks); g.throughput(Throughput::Bytes(size as _)); From a2c946a9785ce62445a6ceb3a1b70b552d87bb24 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 15:30:47 -0400 Subject: [PATCH 121/166] disable lib bench --- compare/Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compare/Cargo.toml b/compare/Cargo.toml index bbb8d0a59..c405aefa4 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -3,6 +3,9 @@ name = "compare" version = "0.1.0" edition = "2021" +[lib] +bench = false + [[bench]] name = "benchmark" harness = false From 0c0597d6f721e348bc93f4f95815aaeea3e38362 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 15:34:09 -0400 Subject: [PATCH 122/166] Revert "keep order consistent for now" This reverts commit 794d72a41c8752edc98da81cd3aa715324fcc0f7. --- src/xxhash3_64.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 291e9482f..80a7e5b93 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -614,18 +614,6 @@ where let input = &buffer[..buffer_len]; match total_bytes { - 0 => impl_0_bytes(DEFAULT_SECRET2, seed), - - 1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input), - - 4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input), - - 9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input), - - 17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input), - - 129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input), - 241.. => { // Ingest final stripes let (stripes, remainder) = stripes_with_tail(input); @@ -657,6 +645,18 @@ where total_bytes, ) } + + 129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input), + + 17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input), + + 9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input), + + 4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input), + + 1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input), + + 0 => impl_0_bytes(DEFAULT_SECRET2, seed), } } @@ -690,19 +690,19 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) { #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { match input.len() { - 0 => impl_0_bytes(secret, seed), + 241.. => impl_241_plus_bytes(secret, input), - 1..=3 => impl_1_to_3_bytes(secret, seed, input), + 129..=240 => impl_129_to_240_bytes(secret, seed, input), - 4..=8 => impl_4_to_8_bytes(secret, seed, input), + 17..=128 => impl_17_to_128_bytes(secret, seed, input), 9..=16 => impl_9_to_16_bytes(secret, seed, input), - 17..=128 => impl_17_to_128_bytes(secret, seed, input), + 4..=8 => impl_4_to_8_bytes(secret, seed, input), - 129..=240 => impl_129_to_240_bytes(secret, seed, input), + 1..=3 => impl_1_to_3_bytes(secret, seed, input), - 241.. => impl_241_plus_bytes(secret, input), + 0 => impl_0_bytes(secret, seed), } } From 3591d7bbc705dafb8aa4608ffd744f97136d73cf Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 16:32:10 -0400 Subject: [PATCH 123/166] One category for each range --- compare/benches/benchmark.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index fbb2fe10d..1c40f1d72 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -177,7 +177,7 @@ mod xxhash3_64 { // Visual inspection of all the data points showed these as // examples of thier nearby neighbors. - let categories = [0, 2, 9, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230]; + let categories = [0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230]; for size in categories { let data = &data[..size]; From 5a6f1a40aff6b2dde70317131e680f9bb3828b92 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 16:41:49 -0400 Subject: [PATCH 124/166] asmasm and inline never --- asmasm/src/main.rs | 59 +++++++++++++++++++++++++++++++++------------- src/xxhash3_64.rs | 10 ++++---- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs index e515bcdfc..b1244716b 100644 --- a/asmasm/src/main.rs +++ b/asmasm/src/main.rs @@ -1,25 +1,52 @@ -use std::{hint::black_box, time::Instant}; +use std::{hash::Hasher, time::Instant}; use xx_hash_sys::XxHash3_64 as C; use xx_renu::xxhash3_64::XxHash3_64; fn main() { let filename = std::env::args().nth(1).expect("filename"); - let use_c = std::env::args() - .nth(2) - .map_or(false, |a| a.eq_ignore_ascii_case("C")); + let mode = std::env::args().nth(2); + let mode = mode.as_deref().unwrap_or("rust-oneshot"); let file = std::fs::read(filename).expect("read"); + let chunk_size = file.len() / 100; + let chunk_size = usize::max(chunk_size, 1); - if use_c { - let start = Instant::now(); - let hash = C::oneshot(&file); - let elapsed = start.elapsed(); - black_box(hash); - eprintln!("C {elapsed:?}"); - } else { - let start = Instant::now(); - let hash = XxHash3_64::oneshot(&file); - let elapsed = start.elapsed(); - black_box(hash); - eprintln!("Rust {elapsed:?}"); + let start = Instant::now(); + let hash = match mode { + "rust-oneshot" => rust_oneshot(&file), + "c-oneshot" => c_oneshot(&file), + "rust-chunked" => rust_chunked(&file, chunk_size), + "c-chunked" => c_chunked(&file, chunk_size), + other => panic!("Unknown mode {other}"), + }; + let elapsed = start.elapsed(); + + eprintln!("{mode}\t{elapsed:?}\t{hash:016X}"); +} + +#[inline(never)] +fn rust_oneshot(file: &[u8]) -> u64 { + XxHash3_64::oneshot(file) +} + +#[inline(never)] +fn c_oneshot(file: &[u8]) -> u64 { + C::oneshot(file) +} + +#[inline(never)] +fn rust_chunked(file: &[u8], chunk_size: usize) -> u64 { + let mut hasher = XxHash3_64::new(); + for chunk in file.chunks(chunk_size) { + hasher.write(chunk); + } + hasher.finish() +} + +#[inline(never)] +fn c_chunked(file: &[u8], chunk_size: usize) -> u64 { + let mut hasher = C::new(); + for chunk in file.chunks(chunk_size) { + hasher.write(chunk); } + hasher.finish() } diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 80a7e5b93..ec467a959 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -110,12 +110,12 @@ pub struct XxHash3_64 { } impl XxHash3_64 { - #[inline(never)] + #[inline] pub fn oneshot(input: &[u8]) -> u64 { impl_oneshot(DEFAULT_SECRET2, DEFAULT_SEED, input) } - #[inline(never)] + #[inline] pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 { let mut secret = DEFAULT_SECRET; @@ -130,7 +130,7 @@ impl XxHash3_64 { impl_oneshot(s, seed, input) } - #[inline(never)] + #[inline] pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 { let secret = Secret::new(secret).unwrap(); // TODO: ERROR impl_oneshot(secret, DEFAULT_SEED, input) @@ -489,7 +489,7 @@ impl hash::Hasher for RawHasher where S: AsRef<[u8]>, { - #[inline(never)] + #[inline] fn write(&mut self, input: &[u8]) { let this = self; dispatch! { @@ -498,7 +498,7 @@ where } } - #[inline(never)] + #[inline] fn finish(&self) -> u64 { let this = self; dispatch! { From e0f44663af7a48c2f673d36dd9c52471d6a09913 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sun, 18 Aug 2024 23:43:12 -0400 Subject: [PATCH 125/166] asserts and unsafe --- src/xxhash3_64.rs | 135 +++++++++++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index ec467a959..84fcd96e2 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,7 +1,7 @@ #![allow(missing_docs)] #![deny(unsafe_op_in_unsafe_fn)] -use core::{hash, mem, slice}; +use core::{hash, hint::assert_unchecked, mem, slice}; use crate::{IntoU128, IntoU32, IntoU64}; @@ -59,6 +59,7 @@ impl Secret { fn words_for_0(&self) -> [u64; 2] { // unsafe { self.0.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() } + self.reassert_preconditions(); let (q, _) = self.0[56..].bp_as_chunks(); [q[0], q[1]].map(u64::from_ne_bytes) } @@ -67,6 +68,7 @@ impl Secret { fn words_for_1_to_3(&self) -> [u32; 2] { // unsafe { self.0.as_ptr().cast::<[u32; 2]>().read_unaligned() } + self.reassert_preconditions(); let (q, _) = self.0.bp_as_chunks(); [q[0], q[1]].map(u32::from_ne_bytes) } @@ -75,6 +77,7 @@ impl Secret { fn words_for_4_to_8(&self) -> [u64; 2] { //unsafe { self.0.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() } + self.reassert_preconditions(); let (q, _) = self.0[8..].bp_as_chunks(); [q[0], q[1]].map(u64::from_ne_bytes) } @@ -83,24 +86,38 @@ impl Secret { fn words_for_9_to_16(&self) -> [u64; 4] { // unsafe { self.0.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() } + self.reassert_preconditions(); let (q, _) = self.0[24..].bp_as_chunks(); [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes) } + #[inline] + fn words_for_17_to_128(&self) -> &[[u8; 16]] { + self.reassert_preconditions(); + let (words, _) = self.0.bp_as_chunks(); + words + } + #[inline] fn stripe(&self, i: usize) -> &[u8; 64] { unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() } } #[inline] - fn end(&self) -> &[u8; 64] { - unsafe { self.0.last_chunk().unwrap_unchecked() } + fn last_stripe(&self) -> &[u8; 64] { + self.reassert_preconditions(); + self.0.last_chunk().unwrap() } #[inline] fn len(&self) -> usize { self.0.len() } + + #[inline(always)] + fn reassert_preconditions(&self) { + unsafe { assert_unchecked(self.0.len() >= SECRET_MINIMUM_LENGTH) } + } } pub struct XxHash3_64 { @@ -361,7 +378,7 @@ impl StripeAccumulator { .. } = self; - let secret_end = secret.end(); + let secret_end = secret.last_stripe(); // each stripe let secret = secret.stripe(*current_stripe); @@ -533,6 +550,9 @@ where *total_bytes += input.len(); + debug_assert!(*buffer_len <= buffer.len()); + unsafe { assert_unchecked(*buffer_len <= buffer.len()) }; + // We have some previous data saved; try to fill it up and process it first if !buffer.is_empty() { let remaining = &mut buffer[*buffer_len..]; @@ -568,11 +588,15 @@ where // Process as much of the input data in-place as possible, // while leaving at least one full stripe for the // finalization. - if let Some(dd) = input.len().checked_sub(STRIPE_BYTES) { - let nn = dd / STRIPE_BYTES; - let nn = nn * STRIPE_BYTES; - let (aa, remainder) = input.split_at(nn); - let (stripes, _) = aa.bp_as_chunks(); + if let Some(len) = input.len().checked_sub(STRIPE_BYTES) { + let full_block_point = (len / STRIPE_BYTES) * STRIPE_BYTES; + // Safety: We know that `full_block_point` must be less than + // `input.len()` as we subtracted and then integer-divided + // (which rounds down) and then multiplied back. That's not + // evident to the compiler and `split_at` results in a + // potential panic. + let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) }; + let (stripes, _) = stripes.bp_as_chunks(); for stripe in stripes { stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret) @@ -584,9 +608,13 @@ where // buffer is empty so just fill up the buffer. debug_assert!(*buffer_len == 0); debug_assert!(!input.is_empty()); - debug_assert!(input.len() < buffer.len()); + debug_assert!(input.len() < 2 * STRIPE_BYTES); + debug_assert!(2 * STRIPE_BYTES < buffer.len()); - buffer[..input.len()].copy_from_slice(input); + // SAFETY: We have parsed all the full blocks of input except one + // and potentially a full block minus one byte. That amount of + // data must be less than the buffer. + unsafe { buffer.get_unchecked_mut(..input.len()) }.copy_from_slice(input); *buffer_len = input.len(); } @@ -611,52 +639,43 @@ where let secret = unsafe { Secret::new_unchecked(secret) }; let buffer = buffer.as_ref(); - let input = &buffer[..buffer_len]; + unsafe { assert_unchecked(buffer_len <= buffer.len()) }; - match total_bytes { - 241.. => { - // Ingest final stripes - let (stripes, remainder) = stripes_with_tail(input); - for stripe in stripes { - stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); - } - - let mut temp = [0; 64]; - - let last_stripe = match input.last_chunk() { - Some(chunk) => chunk, - None => { - let n_to_reuse = 64 - input.len(); - let to_reuse = buffer.len() - n_to_reuse; - let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); - temp_head.copy_from_slice(&buffer[to_reuse..]); - temp_tail.copy_from_slice(input); + if total_bytes >= 241 { + let input = &buffer[..buffer_len]; - &temp - } - }; - - Algorithm(vector).finalize( - stripe_accumulator.accumulator, - remainder, - last_stripe, - secret, - total_bytes, - ) + // Ingest final stripes + let (stripes, remainder) = stripes_with_tail(input); + for stripe in stripes { + stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } - 129..=240 => impl_129_to_240_bytes(DEFAULT_SECRET2, seed, input), + let mut temp = [0; 64]; - 17..=128 => impl_17_to_128_bytes(DEFAULT_SECRET2, seed, input), + let last_stripe = match input.last_chunk() { + Some(chunk) => chunk, + None => { + let n_to_reuse = 64 - input.len(); + let to_reuse = buffer.len() - n_to_reuse; - 9..=16 => impl_9_to_16_bytes(DEFAULT_SECRET2, seed, input), + let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); + temp_head.copy_from_slice(&buffer[to_reuse..]); + temp_tail.copy_from_slice(input); - 4..=8 => impl_4_to_8_bytes(DEFAULT_SECRET2, seed, input), - - 1..=3 => impl_1_to_3_bytes(DEFAULT_SECRET2, seed, input), + &temp + } + }; - 0 => impl_0_bytes(DEFAULT_SECRET2, seed), + Algorithm(vector).finalize( + stripe_accumulator.accumulator, + remainder, + last_stripe, + secret, + total_bytes, + ) + } else { + impl_oneshot(&DEFAULT_SECRET2, seed, &buffer[..total_bytes]) } } @@ -706,6 +725,16 @@ fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { } } +macro_rules! assert_input_range { + ($min:literal.., $len:expr) => { + assert!($min <= $len); + }; + ($min:literal..=$max:literal, $len:expr) => { + assert!($min <= $len); + assert!($len <= $max); + }; +} + #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { let secret_words = secret.words_for_0(); @@ -714,6 +743,7 @@ fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { #[inline(always)] fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { + assert_input_range!(1..=3, input.len()); let input_length = input.len() as u8; // OK as we checked that the length fits let combined = input[input.len() - 1].into_u32() @@ -731,6 +761,7 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline(always)] fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { + assert_input_range!(4..=8, input.len()); let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; let input_last = unsafe { input @@ -761,6 +792,7 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline(always)] fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { + assert_input_range!(9..=16, input.len()); let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; let input_last = unsafe { input @@ -787,9 +819,10 @@ fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline] fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { + assert_input_range!(17..=128, input.len()); let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); - let (secret, _) = secret.0.bp_as_chunks(); + let secret = secret.words_for_17_to_128(); let (secret, _) = secret.bp_as_chunks::<2>(); let (fwd, _) = input.bp_as_chunks(); let (_, bwd) = input.bp_as_rchunks(); @@ -819,6 +852,7 @@ fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline] fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { + assert_input_range!(129..=240, input.len()); let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let (head, _) = input.bp_as_chunks(); @@ -869,6 +903,7 @@ const INITIAL_ACCUMULATORS: [u64; 8] = [ #[inline] fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 { + assert_input_range!(241.., input.len()); dispatch! { fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64 [] @@ -979,6 +1014,8 @@ impl Algorithm { self.0.accumulate(acc, stripe, secret); } + unsafe { assert_unchecked(secret.len() >= SECRET_MINIMUM_LENGTH) }; + let q = secret.0[secret.len() - 71..].first_chunk().unwrap(); self.0.accumulate(acc, last_stripe, q); } From 1e6f66d179e53bb38c75fd1f1b5d718490711fe6 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 20 Aug 2024 10:49:18 -0400 Subject: [PATCH 126/166] sum with new --- renu-sum/src/main.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/renu-sum/src/main.rs b/renu-sum/src/main.rs index 7a1055778..b86a5e26c 100644 --- a/renu-sum/src/main.rs +++ b/renu-sum/src/main.rs @@ -7,7 +7,7 @@ use std::{ sync::mpsc::{self, SendError}, thread, }; -use xx_renu::XxHash64; +use xx_renu::XxHash3_64; type Error = Box; type Result = std::result::Result; @@ -53,9 +53,10 @@ fn main() -> Result<()> { Ok(()) } +#[inline(never)] fn hash_one_file(config: &Config, path: &Path, buffer: &mut [u8]) -> Result { let mut file = File::open(path)?; - let mut hasher = XxHash64::with_seed(0); + let mut hasher = XxHash3_64::with_seed(0); let (tx_empty, rx_empty) = mpsc::channel(); let (tx_filled, rx_filled) = mpsc::channel(); From b9fccc5700dda1e7201809d614f9d40ec5b13362 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 20 Aug 2024 10:50:59 -0400 Subject: [PATCH 127/166] categ --- compare/benches/benchmark.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 1c40f1d72..fe2c7f693 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -177,7 +177,9 @@ mod xxhash3_64 { // Visual inspection of all the data points showed these as // examples of thier nearby neighbors. - let categories = [0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230]; + let categories = [ + 0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230, + ]; for size in categories { let data = &data[..size]; From 88ecd94130bc619313abde63e243f607f964d949 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 20 Aug 2024 10:53:22 -0400 Subject: [PATCH 128/166] document LLVM missed optimization --- src/xxhash3_64.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 84fcd96e2..d450313b0 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -595,6 +595,8 @@ where // (which rounds down) and then multiplied back. That's not // evident to the compiler and `split_at` results in a // potential panic. + // + // https://github.com/llvm/llvm-project/issues/104827 let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) }; let (stripes, _) = stripes.bp_as_chunks(); From 80db4d946a4002e139d6472fd55d44a9cbc3b33b Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 20 Aug 2024 13:54:35 -0400 Subject: [PATCH 129/166] safety --- src/xxhash3_64.rs | 416 +++++++++++++++++++++++++++++++--------------- 1 file changed, 281 insertions(+), 135 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index d450313b0..696f8e626 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -18,7 +18,7 @@ const PRIME_MX2: u64 = 0x9FB21C651E98DF25; const DEFAULT_SEED: u64 = 0; -const DEFAULT_SECRET: [u8; 192] = [ +const DEFAULT_SECRET_RAW: [u8; 192] = [ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, @@ -33,7 +33,8 @@ const DEFAULT_SECRET: [u8; 192] = [ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]; -const DEFAULT_SECRET2: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET) }; +// Safety: The default secret is long enough +const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; pub const SECRET_MINIMUM_LENGTH: usize = 136; @@ -43,50 +44,56 @@ struct Secret([u8]); impl Secret { #[inline] fn new(bytes: &[u8]) -> Result<&Self, ()> { - if bytes.len() >= SECRET_MINIMUM_LENGTH { - unsafe { Ok(Self::new_unchecked(bytes)) } - } else { - Err(()) // TODO error + // Safety: We check for validity before returning. + unsafe { + let this = Self::new_unchecked(bytes); + if this.is_valid() { + Ok(this) + } else { + Err(()) // TODO error + } } } + /// # Safety + /// + /// You must ensure that the secret byte length is >= + /// SECRET_MINIMUM_LENGTH. #[inline] const unsafe fn new_unchecked(bytes: &[u8]) -> &Self { + // Safety: We are `#[repr(transparent)]`. It's up to the + // caller to ensure the length unsafe { mem::transmute(bytes) } } #[inline] fn words_for_0(&self) -> [u64; 2] { - // unsafe { self.0.as_ptr().add(56).cast::<[u64; 2]>().read_unaligned() } - self.reassert_preconditions(); + let (q, _) = self.0[56..].bp_as_chunks(); [q[0], q[1]].map(u64::from_ne_bytes) } #[inline] fn words_for_1_to_3(&self) -> [u32; 2] { - // unsafe { self.0.as_ptr().cast::<[u32; 2]>().read_unaligned() } - self.reassert_preconditions(); + let (q, _) = self.0.bp_as_chunks(); [q[0], q[1]].map(u32::from_ne_bytes) } #[inline] fn words_for_4_to_8(&self) -> [u64; 2] { - //unsafe { self.0.as_ptr().add(8).cast::<[u64; 2]>().read_unaligned() } - self.reassert_preconditions(); + let (q, _) = self.0[8..].bp_as_chunks(); [q[0], q[1]].map(u64::from_ne_bytes) } #[inline] fn words_for_9_to_16(&self) -> [u64; 4] { - // unsafe { self.0.as_ptr().add(24).cast::<[u64; 4]>().read_unaligned() } - self.reassert_preconditions(); + let (q, _) = self.0[24..].bp_as_chunks(); [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes) } @@ -94,21 +101,62 @@ impl Secret { #[inline] fn words_for_17_to_128(&self) -> &[[u8; 16]] { self.reassert_preconditions(); + let (words, _) = self.0.bp_as_chunks(); words } + #[inline] + fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] { + self.reassert_preconditions(); + + let (ss, _) = self.0.bp_as_chunks(); + ss + } + + #[inline] + fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] { + self.reassert_preconditions(); + + let (ss, _) = self.0[3..].bp_as_chunks(); + ss + } + + #[inline] + fn words_for_127_to_240_part3(&self) -> &[u8; 16] { + self.reassert_preconditions(); + + self.0[119..].first_chunk().unwrap() + } + #[inline] fn stripe(&self, i: usize) -> &[u8; 64] { + self.reassert_preconditions(); + unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() } } #[inline] fn last_stripe(&self) -> &[u8; 64] { self.reassert_preconditions(); + self.0.last_chunk().unwrap() } + #[inline] + fn last_stripe_secret_better_name(&self) -> &[u8; 64] { + self.reassert_preconditions(); + + self.0[self.0.len() - 71..].first_chunk().unwrap() + } + + #[inline] + fn final_secret(&self) -> &[u8; 64] { + self.reassert_preconditions(); + + self.0[11..].first_chunk().unwrap() + } + #[inline] fn len(&self) -> usize { self.0.len() @@ -116,7 +164,17 @@ impl Secret { #[inline(always)] fn reassert_preconditions(&self) { - unsafe { assert_unchecked(self.0.len() >= SECRET_MINIMUM_LENGTH) } + // Safety: The length of the bytes was checked at value + // construction time. + unsafe { + debug_assert!(self.is_valid()); + assert_unchecked(self.is_valid()); + } + } + + #[inline(always)] + fn is_valid(&self) -> bool { + self.0.len() >= SECRET_MINIMUM_LENGTH } } @@ -129,12 +187,12 @@ pub struct XxHash3_64 { impl XxHash3_64 { #[inline] pub fn oneshot(input: &[u8]) -> u64 { - impl_oneshot(DEFAULT_SECRET2, DEFAULT_SEED, input) + impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input) } #[inline] pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 { - let mut secret = DEFAULT_SECRET; + let mut secret = DEFAULT_SECRET_RAW; // We know that the secret will only be used if we have more // than 240 bytes, so don't waste time computing it otherwise. @@ -142,9 +200,9 @@ impl XxHash3_64 { derive_secret(seed, &mut secret); } - let s = unsafe { Secret::new_unchecked(&secret) }; + let secret = Secret::new(&secret).expect("The default secret length is invalid"); - impl_oneshot(s, seed, input) + impl_oneshot(secret, seed, input) } #[inline] @@ -157,21 +215,36 @@ impl XxHash3_64 { const STRIPE_BYTES: usize = 64; const BUFFERED_STRIPES: usize = 4; const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; +type Buffer = [u8; BUFFERED_BYTES]; // Ensure that a full buffer always implies we are in the 241+ byte case. const _: () = assert!(BUFFERED_BYTES > 240); +/// # Safety +/// +/// Must always return a slice with the same number of elements. +pub unsafe trait FixedBuffer: AsRef<[u8]> {} + +// Safety: An array will never change size. +unsafe impl FixedBuffer for [u8; N] {} + +// Safety: An array will never change size. +unsafe impl FixedBuffer for &[u8; N] {} + +// Safety: A plain slice will never change size. +unsafe impl FixedBuffer for Box<[u8]> {} + /// Holds secret and temporary buffers that are ensured to be /// appropriately sized. pub struct SecretBuffer { seed: u64, secret: S, - buffer: [u8; BUFFERED_BYTES], + buffer: Buffer, } impl SecretBuffer where - S: AsRef<[u8]>, + S: FixedBuffer, { /// Takes the seed, secret, and buffer and performs no /// modifications to them, only validating that the sizes are @@ -190,6 +263,7 @@ where } } + #[inline(always)] fn is_valid(&self) -> bool { let secret = self.secret.as_ref(); @@ -208,6 +282,26 @@ where pub fn decompose(self) -> S { self.secret } + + #[inline] + fn parts(&self) -> (u64, &Secret, &Buffer) { + let secret = self.secret.as_ref(); + // Safety: We established the length at construction and the + // length is not allowed to change. + let secret = unsafe { Secret::new_unchecked(secret) }; + + (self.seed, secret, &self.buffer) + } + + #[inline] + fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) { + let secret = self.secret.as_ref(); + // Safety: We established the length at construction and the + // length is not allowed to change. + let secret = unsafe { Secret::new_unchecked(secret) }; + + (self.seed, secret, &mut self.buffer) + } } impl SecretBuffer<&'static [u8; 192]> { @@ -218,7 +312,7 @@ impl SecretBuffer<&'static [u8; 192]> { pub const fn default() -> Self { SecretBuffer { seed: DEFAULT_SEED, - secret: &DEFAULT_SECRET, + secret: &DEFAULT_SECRET_RAW, buffer: [0; BUFFERED_BYTES], } } @@ -280,7 +374,7 @@ mod with_alloc { pub fn allocate_default() -> Self { Self { seed: DEFAULT_SEED, - secret: DEFAULT_SECRET.to_vec().into(), + secret: DEFAULT_SECRET_RAW.to_vec().into(), buffer: [0; BUFFERED_BYTES], } } @@ -288,7 +382,7 @@ mod with_alloc { /// Allocates the secret and temporary buffers and uses the /// provided seed to construct the secret value. pub fn allocate_with_seed(seed: u64) -> Self { - let mut secret = DEFAULT_SECRET; + let mut secret = DEFAULT_SECRET_RAW; derive_secret(seed, &mut secret); Self { @@ -331,7 +425,7 @@ mod with_alloc { impl SecretBuffer where - S: AsRef<[u8]> + AsMut<[u8]>, + S: FixedBuffer + AsMut<[u8]>, { /// Fills the secret buffer with a secret derived from the seed /// and the default secret. @@ -341,7 +435,7 @@ where Err(_) => return Err(secret), }; - *secret_slice = DEFAULT_SECRET; + *secret_slice = DEFAULT_SECRET_RAW; derive_secret(seed, secret_slice); Self::new(seed, secret) @@ -404,7 +498,7 @@ impl StripeAccumulator { /// generic type. pub struct RawHasher { secret_buffer: SecretBuffer, - buffer_len: usize, + buffer_usage: usize, stripe_accumulator: StripeAccumulator, total_bytes: usize, } @@ -413,7 +507,7 @@ impl RawHasher { pub fn new(secret_buffer: SecretBuffer) -> Self { Self { secret_buffer, - buffer_len: 0, + buffer_usage: 0, stripe_accumulator: StripeAccumulator::new(), total_bytes: 0, } @@ -442,7 +536,7 @@ macro_rules! dispatch { where $($wheres)* { - // SAFETY: the caller has ensured we have the NEON feature + // Safety: The caller has ensured we have the NEON feature unsafe { $fn_name(neon::Impl::new_unchecked(), $($arg_name),*) } @@ -485,6 +579,7 @@ macro_rules! dispatch { #[cfg(all(target_arch = "aarch64", feature = "std"))] { if std::arch::is_aarch64_feature_detected!("neon") { + // Safety: We just ensured we have the NEON feature return unsafe { do_neon($($arg_name),*) }; } } @@ -492,8 +587,10 @@ macro_rules! dispatch { #[cfg(all(target_arch = "x86_64", feature = "std"))] { if is_x86_feature_detected!("avx2") { + // Safety: We just ensured we have the AVX2 feature return unsafe { do_avx2($($arg_name),*) }; } else if is_x86_feature_detected!("sse2") { + // Safety: We just ensured we have the SSE2 feature return unsafe { do_sse2($($arg_name),*) }; } } @@ -504,14 +601,14 @@ macro_rules! dispatch { impl hash::Hasher for RawHasher where - S: AsRef<[u8]>, + S: FixedBuffer, { #[inline] fn write(&mut self, input: &[u8]) { let this = self; dispatch! { fn write_impl(this: &mut RawHasher, input: &[u8]) - [S: AsRef<[u8]>] + [S: FixedBuffer] } } @@ -520,7 +617,7 @@ where let this = self; dispatch! { fn finish_impl(this: &RawHasher) -> u64 - [S: AsRef<[u8]>] + [S: FixedBuffer] } } } @@ -528,7 +625,7 @@ where #[inline(always)] fn write_impl(vector: impl Vector, this: &mut RawHasher, mut input: &[u8]) where - S: AsRef<[u8]>, + S: FixedBuffer, { if input.is_empty() { return; @@ -536,33 +633,33 @@ where let RawHasher { secret_buffer, - buffer_len, + buffer_usage, stripe_accumulator, total_bytes, .. } = this; let n_stripes = secret_buffer.n_stripes(); - - let SecretBuffer { secret, buffer, .. } = secret_buffer; - let secret = secret.as_ref(); - let secret = unsafe { Secret::new_unchecked(secret) }; + let (_, secret, buffer) = secret_buffer.parts_mut(); *total_bytes += input.len(); - debug_assert!(*buffer_len <= buffer.len()); - unsafe { assert_unchecked(*buffer_len <= buffer.len()) }; + // Safety: This is an invariant of the buffer. + unsafe { + debug_assert!(*buffer_usage <= buffer.len()); + assert_unchecked(*buffer_usage <= buffer.len()) + }; // We have some previous data saved; try to fill it up and process it first if !buffer.is_empty() { - let remaining = &mut buffer[*buffer_len..]; + let remaining = &mut buffer[*buffer_usage..]; let n_to_copy = usize::min(remaining.len(), input.len()); let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); let (input_head, input_tail) = input.split_at(n_to_copy); remaining_head.copy_from_slice(input_head); - *buffer_len += n_to_copy; + *buffer_usage += n_to_copy; input = input_tail; @@ -580,10 +677,10 @@ where for stripe in stripes { stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } - *buffer_len = 0; + *buffer_usage = 0; } - debug_assert!(*buffer_len == 0); + debug_assert!(*buffer_usage == 0); // Process as much of the input data in-place as possible, // while leaving at least one full stripe for the @@ -608,44 +705,45 @@ where // Any remaining data has to be less than the buffer, and the // buffer is empty so just fill up the buffer. - debug_assert!(*buffer_len == 0); + debug_assert!(*buffer_usage == 0); debug_assert!(!input.is_empty()); - debug_assert!(input.len() < 2 * STRIPE_BYTES); - debug_assert!(2 * STRIPE_BYTES < buffer.len()); - // SAFETY: We have parsed all the full blocks of input except one + // Safety: We have parsed all the full blocks of input except one // and potentially a full block minus one byte. That amount of // data must be less than the buffer. - unsafe { buffer.get_unchecked_mut(..input.len()) }.copy_from_slice(input); - *buffer_len = input.len(); + let buffer_head = unsafe { + debug_assert!(input.len() < 2 * STRIPE_BYTES); + debug_assert!(2 * STRIPE_BYTES < buffer.len()); + buffer.get_unchecked_mut(..input.len()) + }; + + buffer_head.copy_from_slice(input); + *buffer_usage = input.len(); } #[inline(always)] fn finish_impl(vector: impl Vector, this: &RawHasher) -> u64 where - S: AsRef<[u8]>, + S: FixedBuffer, { let RawHasher { ref secret_buffer, - buffer_len, + buffer_usage, mut stripe_accumulator, total_bytes, } = *this; - let n_stripes = secret_buffer.n_stripes(); - let SecretBuffer { - seed, - ref secret, - ref buffer, - } = *secret_buffer; - let secret = secret.as_ref(); - let secret = unsafe { Secret::new_unchecked(secret) }; - let buffer = buffer.as_ref(); - unsafe { assert_unchecked(buffer_len <= buffer.len()) }; + let n_stripes = secret_buffer.n_stripes(); + let (seed, secret, buffer) = secret_buffer.parts(); + // Safety: This is an invariant of the buffer. + unsafe { + debug_assert!(buffer_usage <= buffer.len()); + assert_unchecked(buffer_usage <= buffer.len()) + }; if total_bytes >= 241 { - let input = &buffer[..buffer_len]; + let input = &buffer[..buffer_usage]; // Ingest final stripes let (stripes, remainder) = stripes_with_tail(input); @@ -677,7 +775,7 @@ where total_bytes, ) } else { - impl_oneshot(&DEFAULT_SECRET2, seed, &buffer[..total_bytes]) + impl_oneshot(DEFAULT_SECRET, seed, &buffer[..total_bytes]) } } @@ -691,20 +789,18 @@ fn derive_secret(seed: u64, secret: &mut [u8; 192]) { return; } - let base = secret.as_mut_ptr().cast::(); - - for i in 0..12 { - let a_p = unsafe { base.add(i * 2) }; - let b_p = unsafe { base.add(i * 2 + 1) }; + let (words, _) = secret.bp_as_chunks_mut(); + let (pairs, _) = words.bp_as_chunks_mut(); - let mut a = unsafe { a_p.read_unaligned() }; - let mut b = unsafe { b_p.read_unaligned() }; + for [a_p, b_p] in pairs { + let a = u64::from_ne_bytes(*a_p); + let b = u64::from_ne_bytes(*b_p); - a = a.wrapping_add(seed); - b = b.wrapping_sub(seed); + let a = a.wrapping_add(seed); + let b = b.wrapping_sub(seed); - unsafe { a_p.write_unaligned(a) }; - unsafe { b_p.write_unaligned(b) }; + *a_p = a.to_ne_bytes(); + *b_p = b.to_ne_bytes(); } } @@ -764,15 +860,8 @@ fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline(always)] fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(4..=8, input.len()); - let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; - let input_last = unsafe { - input - .as_ptr() - .add(input.len()) - .sub(mem::size_of::()) - .cast::() - .read_unaligned() - }; + let input_first = input.first_u32().unwrap(); + let input_last = input.last_u32().unwrap(); let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); let secret_words = secret.words_for_4_to_8(); @@ -795,15 +884,8 @@ fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { #[inline(always)] fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(9..=16, input.len()); - let input_first = unsafe { input.as_ptr().cast::().read_unaligned() }; - let input_last = unsafe { - input - .as_ptr() - .add(input.len()) - .sub(mem::size_of::()) - .cast::() - .read_unaligned() - }; + let input_first = input.first_u64().unwrap(); + let input_last = input.last_u64().unwrap(); let secret_words = secret.words_for_9_to_16(); let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; @@ -858,34 +940,37 @@ fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let (head, _) = input.bp_as_chunks(); - let last_chunk = input.last_chunk().unwrap(); let mut head = head.iter(); - let (ss, _) = secret.0.bp_as_chunks(); - let (ss2, _) = secret.0[3..].bp_as_chunks(); - - let qq = head.by_ref().zip(ss); - - for (chunk, s) in qq.take(8) { - acc = acc.wrapping_add(mix_step(chunk, s, seed)); + let ss = secret.words_for_127_to_240_part1(); + for (chunk, secret) in head.by_ref().zip(ss).take(8) { + acc = acc.wrapping_add(mix_step(chunk, secret, seed)); } acc = avalanche(acc); - for (chunk, s) in head.zip(ss2) { - acc = acc.wrapping_add(mix_step(chunk, s, seed)); + let ss = secret.words_for_127_to_240_part2(); + for (chunk, secret) in head.zip(ss) { + acc = acc.wrapping_add(mix_step(chunk, secret, seed)); } - let ss3 = &secret.0[119..].first_chunk().unwrap(); - acc = acc.wrapping_add(mix_step(last_chunk, ss3, seed)); + let last_chunk = input.last_chunk().unwrap(); + let ss = secret.words_for_127_to_240_part3(); + acc = acc.wrapping_add(mix_step(last_chunk, ss, seed)); avalanche(acc) } #[inline] fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { - let data_words = unsafe { data.as_ptr().cast::<[u64; 2]>().read_unaligned() }; - let secret_words = unsafe { secret.as_ptr().cast::<[u64; 2]>().read_unaligned() }; + #[inline] + fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] { + let (pair, _) = bytes.bp_as_chunks::<8>(); + [pair[0], pair[1]].map(u64::from_ne_bytes) + } + + let data_words = to_u64s(data); + let secret_words = to_u64s(secret); let mul_result = { let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); @@ -922,18 +1007,16 @@ struct Algorithm(V); impl Algorithm { #[inline] fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 { + assert_input_range!(241.., input.len()); let mut acc = INITIAL_ACCUMULATORS; - //assert!(secret.len() >= SECRET_MINIMUM_LENGTH); - assert!(input.len() >= 241); - let stripes_per_block = (secret.len() - 64) / 8; let block_size = 64 * stripes_per_block; let mut blocks = input.chunks_exact(block_size); let last_block = if blocks.remainder().is_empty() { - // SAFETY: We know that `input` is non-empty, which means + // Safety: We know that `input` is non-empty, which means // that either there will be a remainder or one or more // full blocks. That info isn't flowing to the optimizer, // so we use `unwrap_unchecked`. @@ -966,7 +1049,7 @@ impl Algorithm { #[inline] fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { - let secret_end = secret.0.last_chunk().unwrap(); + let secret_end = secret.last_stripe(); self.round_accumulate(acc, stripes, secret); self.0.round_scramble(acc, secret_end); @@ -994,7 +1077,7 @@ impl Algorithm { debug_assert!(!last_block.is_empty()); self.last_round(&mut acc, last_block, last_stripe, secret); - self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret, 11) + self.final_merge(&mut acc, len.into_u64().wrapping_mul(PRIME64_1), secret) } #[inline] @@ -1016,22 +1099,14 @@ impl Algorithm { self.0.accumulate(acc, stripe, secret); } - unsafe { assert_unchecked(secret.len() >= SECRET_MINIMUM_LENGTH) }; - - let q = secret.0[secret.len() - 71..].first_chunk().unwrap(); - self.0.accumulate(acc, last_stripe, q); + let last_stripe_secret = secret.last_stripe_secret_better_name(); + self.0.accumulate(acc, last_stripe, last_stripe_secret); } #[inline] - fn final_merge( - &self, - acc: &mut [u64; 8], - init_value: u64, - secret: &Secret, - secret_offset: usize, - ) -> u64 { - let secrets = secret.0[secret_offset..].first_chunk::<64>().unwrap(); - let (secrets, _) = secrets.bp_as_chunks(); + fn final_merge(&self, acc: &mut [u64; 8], init_value: u64, secret: &Secret) -> u64 { + let secret = secret.final_secret(); + let (secrets, _) = secret.bp_as_chunks(); let mut result = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result @@ -1117,17 +1192,18 @@ mod scalar { // https://github.com/llvm/llvm-project/issues/98481 #[cfg(target_arch = "aarch64")] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - use core::arch::asm; - let res; + // Safety: We only compute using our argument values and do + // not change memory. unsafe { - asm!( + core::arch::asm!( "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", lhs = in(reg) lhs, rhs = in(reg) rhs, acc = in(reg) acc, res = out(reg) res, + options(pure, nomem, nostack), ) } @@ -1146,6 +1222,7 @@ mod neon { impl Impl { /// # Safety + /// /// You must ensure that the CPU has the NEON feature #[inline] pub unsafe fn new_unchecked() -> Self { @@ -1156,11 +1233,13 @@ mod neon { impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // Safety: Type can only be constructed when NEON feature is present unsafe { round_scramble_neon(acc, secret_end) } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + // Safety: Type can only be constructed when NEON feature is present unsafe { accumulate_neon(acc, stripe, secret) } } } @@ -1199,7 +1278,7 @@ mod neon { #[inline] unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let (acc2, _) = acc.bp_as_chunks_mut::<4>(); - for (i, acc) in acc2.into_iter().enumerate() { + for (i, acc) in acc2.iter_mut().enumerate() { unsafe { let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); @@ -1289,6 +1368,8 @@ mod neon { // operation. #[inline] pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { + // Safety: We only compute using our argument values and do + // not change memory. unsafe { let input_as_u32 = vreinterpretq_u32_u64(input); let factor = vmov_n_u32(og_factor); @@ -1303,11 +1384,24 @@ mod neon { } } + /// # Safety + /// + /// You must ensure that the CPU has the NEON feature + // // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 #[inline] #[target_feature(enable = "neon")] unsafe fn reordering_barrier(r: uint64x2_t) { - unsafe { core::arch::asm!("/* {r:v} */", r = in(vreg) r) } + // Safety: The caller has ensured we have the NEON feature. We + // aren't doing anything with the argument, so we shouldn't be + // able to cause unsafety! + unsafe { + core::arch::asm!( + "/* {r:v} */", + r = in(vreg) r, + options(nomem, nostack), + ) + } } } @@ -1332,13 +1426,13 @@ mod avx2 { impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // SAFETY: Type can only be constructed when AVX2 feature is present + // Safety: Type can only be constructed when AVX2 feature is present unsafe { round_scramble_avx2(acc, secret_end) } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - // SAFETY: Type can only be constructed when AVX2 feature is present + // Safety: Type can only be constructed when AVX2 feature is present unsafe { accumulate_avx2(acc, stripe, secret) } } } @@ -1409,13 +1503,13 @@ mod sse2 { impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // SAFETY: Type can only be constructed when SSE2 feature is present + // Safety: Type can only be constructed when SSE2 feature is present unsafe { round_scramble_sse2(acc, secret_end) } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - // SAFETY: Type can only be constructed when SSE2 feature is present + // Safety: Type can only be constructed when SSE2 feature is present unsafe { accumulate_sse2(acc, stripe, secret) } } } @@ -1516,6 +1610,38 @@ impl Halves for u128 { } } +trait U8SliceExt { + fn first_u32(&self) -> Option; + + fn last_u32(&self) -> Option; + + fn first_u64(&self) -> Option; + + fn last_u64(&self) -> Option; +} + +impl U8SliceExt for [u8] { + #[inline] + fn first_u32(&self) -> Option { + self.first_chunk().copied().map(u32::from_ne_bytes) + } + + #[inline] + fn last_u32(&self) -> Option { + self.last_chunk().copied().map(u32::from_ne_bytes) + } + + #[inline] + fn first_u64(&self) -> Option { + self.first_chunk().copied().map(u64::from_ne_bytes) + } + + #[inline] + fn last_u64(&self) -> Option { + self.last_chunk().copied().map(u64::from_ne_bytes) + } +} + trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); @@ -1529,7 +1655,12 @@ impl SliceBackport for [T] { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]) { assert_ne!(N, 0); let len = self.len() / N; + // Safety: `(len / N) * N` has to be less-than-or-equal to `len` let (head, tail) = unsafe { self.split_at_unchecked(len * N) }; + // Safety: (1) `head` points to valid data, (2) the alignment + // of an array and the individual type are the same, (3) the + // valid elements are less-than-or-equal to the original + // slice. let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) }; (head, tail) } @@ -1538,7 +1669,12 @@ impl SliceBackport for [T] { fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { assert_ne!(N, 0); let len = self.len() / N; + // Safety: `(len / N) * N` has to be less than or equal to `len` let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) }; + // Safety: (1) `head` points to valid data, (2) the alignment + // of an array and the individual type are the same, (3) the + // valid elements are less-than-or-equal to the original + // slice. let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) }; (head, tail) } @@ -1546,7 +1682,12 @@ impl SliceBackport for [T] { fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { assert_ne!(N, 0); let len = self.len() / N; + // Safety: `(len / N) * N` has to be less than or equal to `len` let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; + // Safety: (1) `tail` points to valid data, (2) the alignment + // of an array and the individual type are the same, (3) the + // valid elements are less-than-or-equal to the original + // slice. let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; (head, tail) } @@ -1558,6 +1699,11 @@ mod test { use super::*; + #[test] + fn default_secret_is_valid() { + assert!(DEFAULT_SECRET.is_valid()) + } + #[test] fn secret_buffer_default_is_valid() { assert!(SecretBuffer::default().is_valid()); From bb684842b471dbdaa452cfee28bc17e330fefd85 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 09:07:38 -0400 Subject: [PATCH 130/166] simpler blackbox --- compare/Cargo.toml | 2 +- compare/benches/benchmark.rs | 82 +++++++++--------------------------- 2 files changed, 22 insertions(+), 62 deletions(-) diff --git a/compare/Cargo.toml b/compare/Cargo.toml index c405aefa4..f71c802db 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -11,7 +11,7 @@ name = "benchmark" harness = false [dependencies] -criterion = "0.5.1" +criterion = { version = "0.5.1", features = ["real_blackbox"] } proptest = "1.5.0" rand = "0.8.5" twox-hash = "1.6.3" diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index fe2c7f693..0affcbb89 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -1,6 +1,6 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use rand::{Rng, RngCore, SeedableRng}; -use std::{hash::Hasher, hint::black_box, iter}; +use std::{hash::Hasher, iter}; use xx_hash_sys as c; use xx_renu as rust; @@ -20,42 +20,26 @@ fn tiny_data(c: &mut Criterion) { g.throughput(Throughput::Bytes(data.len() as _)); let id = format!("impl-c/fn-oneshot/size-{size:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let hash = c::XxHash64::oneshot(seed, data); - black_box(hash); - }) - }); + g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data))); let id = format!("impl-c/fn-streaming/size-{size:02}"); g.bench_function(id, |b| { b.iter(|| { - let hash = { - let mut hasher = c::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - black_box(hash); + let mut hasher = c::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() }) }); let id = format!("impl-rust/fn-oneshot/size-{size:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let hash = rust::XxHash64::oneshot(seed, data); - black_box(hash); - }) - }); + g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data))); let id = format!("impl-rust/fn-streaming/size-{size:02}"); g.bench_function(id, |b| { b.iter(|| { - let hash = { - let mut hasher = rust::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - black_box(hash); + let mut hasher = rust::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() }) }); } @@ -72,20 +56,10 @@ fn oneshot(c: &mut Criterion) { g.throughput(Throughput::Bytes(data.len() as _)); let id = format!("impl-c/size-{size:07}"); - g.bench_function(id, |b| { - b.iter(|| { - let hash = c::XxHash64::oneshot(seed, data); - black_box(hash); - }) - }); + g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data))); let id = format!("impl-rust/size-{size:07}"); - g.bench_function(id, |b| { - b.iter(|| { - let hash = rust::XxHash64::oneshot(seed, data); - black_box(hash); - }) - }); + g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data))); } g.finish(); @@ -106,8 +80,7 @@ fn streaming(c: &mut Criterion) { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); @@ -118,8 +91,7 @@ fn streaming(c: &mut Criterion) { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); } @@ -187,18 +159,12 @@ mod xxhash3_64 { let id = format!("impl-c/fn-oneshot/size-{size:03}"); g.bench_function(id, |b| { - b.iter(|| { - let hash = c::XxHash3_64::oneshot_with_seed(seed, data); - black_box(hash); - }) + b.iter(|| c::XxHash3_64::oneshot_with_seed(seed, data)) }); let id = format!("impl-rust/fn-oneshot/size-{size:03}"); g.bench_function(id, |b| { - b.iter(|| { - let hash = rust::XxHash3_64::oneshot_with_seed(seed, data); - black_box(hash); - }) + b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data)) }); } @@ -268,8 +234,7 @@ mod xxhash3_64 { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); @@ -280,8 +245,7 @@ mod xxhash3_64 { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); @@ -294,8 +258,7 @@ mod xxhash3_64 { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); } @@ -309,8 +272,7 @@ mod xxhash3_64 { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); @@ -321,8 +283,7 @@ mod xxhash3_64 { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); } @@ -334,8 +295,7 @@ mod xxhash3_64 { for chunk in &chunks { hasher.write(chunk); } - let hash = hasher.finish(); - black_box(hash); + hasher.finish() }) }); } From dc2ba205f635863c18d7ee6fbe17ee30bb52f9cc Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 09:21:37 -0400 Subject: [PATCH 131/166] Inline the sys crate functions --- xx_hash-sys/src/lib.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 0de80c556..6cf194ea1 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -30,10 +30,12 @@ extern "C" { pub struct XxHash32(*mut XXH32_state_t); impl XxHash32 { + #[inline] pub fn oneshot(seed: u32, data: &[u8]) -> u32 { unsafe { XXH32(data.as_ptr().cast(), data.len(), seed) } } + #[inline] pub fn with_seed(seed: u32) -> Self { let state = unsafe { let state = XXH32_createState(); @@ -44,11 +46,13 @@ impl XxHash32 { Self(state) } + #[inline] pub fn write(&mut self, data: &[u8]) { let retval = unsafe { XXH32_update(self.0, data.as_ptr().cast(), data.len()) }; assert_eq!(retval, XXH_OK); } + #[inline] pub fn finish(&mut self) -> u32 { unsafe { XXH32_digest(self.0) } } @@ -88,10 +92,12 @@ extern "C" { pub struct XxHash64(*mut XXH64_state_t); impl XxHash64 { + #[inline] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { unsafe { XXH64(data.as_ptr().cast(), data.len(), seed) } } + #[inline] pub fn with_seed(seed: u64) -> Self { let state = unsafe { let state = XXH64_createState(); @@ -102,11 +108,13 @@ impl XxHash64 { Self(state) } + #[inline] pub fn write(&mut self, data: &[u8]) { let retval = unsafe { XXH64_update(self.0, data.as_ptr().cast(), data.len()) }; assert_eq!(retval, XXH_OK); } + #[inline] pub fn finish(&mut self) -> u64 { unsafe { XXH64_digest(self.0) } } @@ -169,14 +177,17 @@ macro_rules! xxh3_64b_template { pub struct XxHash3_64(*mut crate::XXH3_state_t); impl XxHash3_64 { + #[inline] pub fn oneshot(data: &[u8]) -> u64 { unsafe { [<$prefix _64bits>](data.as_ptr().cast(), data.len()) } } + #[inline] pub fn oneshot_with_seed(seed: u64, data: &[u8]) -> u64 { unsafe { [<$prefix _64bits_withSeed>](data.as_ptr().cast(), data.len(), seed) } } + #[inline] pub fn oneshot_with_secret(secret: &[u8], data: &[u8]) -> u64 { unsafe { [<$prefix _64bits_withSecret>]( @@ -188,6 +199,7 @@ macro_rules! xxh3_64b_template { } } + #[inline] pub fn new() -> Self { let state = unsafe { let state = [<$prefix _createState>](); @@ -198,6 +210,7 @@ macro_rules! xxh3_64b_template { Self(state) } + #[inline] pub fn with_seed(seed: u64) -> Self { let state = unsafe { let state = [<$prefix _createState>](); @@ -208,12 +221,14 @@ macro_rules! xxh3_64b_template { Self(state) } + #[inline] pub fn write(&mut self, data: &[u8]) { let retval = unsafe { [<$prefix _64bits_update>](self.0, data.as_ptr().cast(), data.len()) }; assert_eq!(retval, crate::XXH_OK); } + #[inline] pub fn finish(&mut self) -> u64 { unsafe { [<$prefix _64bits_digest>](self.0) } } From 902a0a72e638dcfaebfa541960df54e5551a0735 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 20 Aug 2024 20:38:34 -0400 Subject: [PATCH 132/166] x86fixin --- src/xxhash3_64.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 696f8e626..838790af0 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -549,7 +549,9 @@ macro_rules! dispatch { where $($wheres)* { - $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*) + unsafe { + $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*) + } } #[inline] @@ -559,7 +561,9 @@ macro_rules! dispatch { where $($wheres)* { - $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*) + unsafe { + $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*) + } } // Now we invoke the right function @@ -1451,6 +1455,7 @@ mod avx2 { let stripe = stripe.as_ptr().cast::<__m256i>(); let secret = secret.as_ptr().cast::<__m256i>(); + unsafe { for i in 0..2 { // [align-acc]: The C code aligns the accumulator to avoid // the unaligned load and store here, but that doesn't @@ -1480,6 +1485,7 @@ mod avx2 { _mm256_storeu_si256(acc.add(i), acc_0); } } + } } #[cfg(target_arch = "x86_64")] @@ -1528,6 +1534,7 @@ mod sse2 { let stripe = stripe.as_ptr().cast::<__m128i>(); let secret = secret.as_ptr().cast::<__m128i>(); + unsafe { for i in 0..4 { // See [align-acc]. let mut acc_0 = _mm_loadu_si128(acc.add(i)); @@ -1555,6 +1562,7 @@ mod sse2 { _mm_storeu_si128(acc.add(i), acc_0); } } + } } #[inline] @@ -1645,7 +1653,6 @@ impl U8SliceExt for [u8] { trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); - #[cfg(target_arch = "aarch64")] fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); @@ -1665,7 +1672,6 @@ impl SliceBackport for [T] { (head, tail) } - #[cfg(target_arch = "aarch64")] fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { assert_ne!(N, 0); let len = self.len() / N; From 31a751c9cdcbc49d45a2282f95c8309ee276f743 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 11:00:18 -0400 Subject: [PATCH 133/166] x86 --- src/xxhash3_64.rs | 80 +++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 838790af0..304788a19 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1456,36 +1456,36 @@ mod avx2 { let secret = secret.as_ptr().cast::<__m256i>(); unsafe { - for i in 0..2 { - // [align-acc]: The C code aligns the accumulator to avoid - // the unaligned load and store here, but that doesn't - // seem to be a big performance loss. - let mut acc_0 = _mm256_loadu_si256(acc.add(i)); - let stripe_0 = _mm256_loadu_si256(stripe.add(i)); - let secret_0 = _mm256_loadu_si256(secret.add(i)); + for i in 0..2 { + // [align-acc]: The C code aligns the accumulator to avoid + // the unaligned load and store here, but that doesn't + // seem to be a big performance loss. + let mut acc_0 = _mm256_loadu_si256(acc.add(i)); + let stripe_0 = _mm256_loadu_si256(stripe.add(i)); + let secret_0 = _mm256_loadu_si256(secret.add(i)); - // let value[i] = stripe[i] ^ secret[i]; - let value_0 = _mm256_xor_si256(stripe_0, secret_0); + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm256_xor_si256(stripe_0, secret_0); - // stripe_swap[i] = stripe[i ^ 1] - let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0); + // stripe_swap[i] = stripe[i ^ 1] + let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0); - // acc[i] += stripe_swap[i] - acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); + // acc[i] += stripe_swap[i] + acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); - // value_shift[i] = value[i] >> 32 - let value_shift_0 = _mm256_srli_epi64::<32>(value_0); + // value_shift[i] = value[i] >> 32 + let value_shift_0 = _mm256_srli_epi64::<32>(value_0); - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) - let product_0 = _mm256_mul_epu32(value_0, value_shift_0); + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) + let product_0 = _mm256_mul_epu32(value_0, value_shift_0); - // acc[i] += product[i] - acc_0 = _mm256_add_epi64(acc_0, product_0); + // acc[i] += product[i] + acc_0 = _mm256_add_epi64(acc_0, product_0); - _mm256_storeu_si256(acc.add(i), acc_0); + _mm256_storeu_si256(acc.add(i), acc_0); + } } } - } } #[cfg(target_arch = "x86_64")] @@ -1535,34 +1535,34 @@ mod sse2 { let secret = secret.as_ptr().cast::<__m128i>(); unsafe { - for i in 0..4 { - // See [align-acc]. - let mut acc_0 = _mm_loadu_si128(acc.add(i)); - let stripe_0 = _mm_loadu_si128(stripe.add(i)); - let secret_0 = _mm_loadu_si128(secret.add(i)); + for i in 0..4 { + // See [align-acc]. + let mut acc_0 = _mm_loadu_si128(acc.add(i)); + let stripe_0 = _mm_loadu_si128(stripe.add(i)); + let secret_0 = _mm_loadu_si128(secret.add(i)); - // let value[i] = stripe[i] ^ secret[i]; - let value_0 = _mm_xor_si128(stripe_0, secret_0); + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm_xor_si128(stripe_0, secret_0); - // stripe_swap[i] = stripe[i ^ 1] - let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0); + // stripe_swap[i] = stripe[i ^ 1] + let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0); - // acc[i] += stripe_swap[i] - acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); + // acc[i] += stripe_swap[i] + acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); - // value_shift[i] = value[i] >> 32 - let value_shift_0 = _mm_srli_epi64::<32>(value_0); + // value_shift[i] = value[i] >> 32 + let value_shift_0 = _mm_srli_epi64::<32>(value_0); - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) - let product_0 = _mm_mul_epu32(value_0, value_shift_0); + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) + let product_0 = _mm_mul_epu32(value_0, value_shift_0); - // acc[i] += product[i] - acc_0 = _mm_add_epi64(acc_0, product_0); + // acc[i] += product[i] + acc_0 = _mm_add_epi64(acc_0, product_0); - _mm_storeu_si128(acc.add(i), acc_0); + _mm_storeu_si128(acc.add(i), acc_0); + } } } - } } #[inline] From 2391766afa1d3f2ad09bafb2395a6a818a604256 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 11:15:11 -0400 Subject: [PATCH 134/166] safety --- src/xxhash3_64.rs | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 304788a19..574fb8c8d 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1248,13 +1248,22 @@ mod neon { } } - #[inline] + /// # Safety + /// + /// You must ensure that the CPU has the NEON feature #[target_feature(enable = "neon")] + #[inline] unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - unsafe { - let secret_base = secret_end.as_ptr().cast::(); - let (acc, _) = acc.bp_as_chunks_mut::<2>(); - for (i, acc) in acc.iter_mut().enumerate() { + let secret_base = secret_end.as_ptr().cast::(); + let (acc, _) = acc.bp_as_chunks_mut::<2>(); + + for (i, acc) in acc.iter_mut().enumerate() { + // Safety: The caller has ensured we have the NEON + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. + unsafe { let mut accv = vld1q_u64(acc.as_ptr()); let secret = vld1q_u64(secret_base.add(i * 2)); @@ -1275,14 +1284,23 @@ mod neon { } } - // We process 4x u64 at a time as that allows us to completely - // fill a `uint64x2_t` with useful values when performing the - // multiplication. + /// We process 4x u64 at a time as that allows us to completely + /// fill a `uint64x2_t` with useful values when performing the + /// multiplication. + /// + /// # Safety + /// + /// You must ensure that the CPU has the NEON feature #[target_feature(enable = "neon")] #[inline] unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let (acc2, _) = acc.bp_as_chunks_mut::<4>(); for (i, acc) in acc2.iter_mut().enumerate() { + // Safety: The caller has ensured we have the NEON + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. unsafe { let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); From b6825e696458619ee6ca55634bd3b39111c7ec9e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 11:39:50 -0400 Subject: [PATCH 135/166] safety --- clippy.toml | 1 + src/xxhash3_64.rs | 116 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 88 insertions(+), 29 deletions(-) create mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 000000000..8483b87c6 --- /dev/null +++ b/clippy.toml @@ -0,0 +1 @@ +check-private-items = true diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 574fb8c8d..20532e4a6 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,5 +1,9 @@ #![allow(missing_docs)] -#![deny(unsafe_op_in_unsafe_fn)] +#![deny( + clippy::missing_safety_doc, + clippy::undocumented_unsafe_blocks, + unsafe_op_in_unsafe_fn +)] use core::{hash, hint::assert_unchecked, mem, slice}; @@ -129,11 +133,20 @@ impl Secret { self.0[119..].first_chunk().unwrap() } + /// # Safety + /// + /// `i` must be less than the number of stripes in the secret + /// ([`Self::n_stripes`][]). #[inline] - fn stripe(&self, i: usize) -> &[u8; 64] { + unsafe fn stripe(&self, i: usize) -> &[u8; 64] { self.reassert_preconditions(); - unsafe { &*self.0.get_unchecked(i * 8..).as_ptr().cast() } + // Safety: The caller has ensured that `i` is + // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment. + unsafe { + debug_assert!(i < self.n_stripes()); + &*self.0.get_unchecked(i * 8..).as_ptr().cast() + } } #[inline] @@ -162,6 +175,12 @@ impl Secret { self.0.len() } + #[inline] + fn n_stripes(&self) -> usize { + // stripes_per_block + (self.len() - 64) / 8 + } + #[inline(always)] fn reassert_preconditions(&self) { // Safety: The length of the bytes was checked at value @@ -272,10 +291,7 @@ where #[inline] fn n_stripes(&self) -> usize { - let secret = self.secret.as_ref(); - - // stripes_per_block - (secret.len() - 64) / 8 + Self::secret(&self.secret).n_stripes() } /// Returns the secret and buffer values. @@ -285,22 +301,19 @@ where #[inline] fn parts(&self) -> (u64, &Secret, &Buffer) { - let secret = self.secret.as_ref(); - // Safety: We established the length at construction and the - // length is not allowed to change. - let secret = unsafe { Secret::new_unchecked(secret) }; - - (self.seed, secret, &self.buffer) + (self.seed, Self::secret(&self.secret), &self.buffer) } #[inline] fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) { - let secret = self.secret.as_ref(); + (self.seed, Self::secret(&self.secret), &mut self.buffer) + } + + fn secret(secret: &S) -> &Secret { + let secret = secret.as_ref(); // Safety: We established the length at construction and the // length is not allowed to change. - let secret = unsafe { Secret::new_unchecked(secret) }; - - (self.seed, secret, &mut self.buffer) + unsafe { Secret::new_unchecked(secret) } } } @@ -459,9 +472,9 @@ impl StripeAccumulator { } #[inline] - fn process_stripe( + fn process_stripe( &mut self, - vector: V, + vector: impl Vector, stripe: &[u8; 64], n_stripes: usize, secret: &Secret, @@ -472,17 +485,20 @@ impl StripeAccumulator { .. } = self; - let secret_end = secret.last_stripe(); + // For each stripe - // each stripe - let secret = secret.stripe(*current_stripe); - vector.accumulate(accumulator, stripe, secret); + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + let secret_stripe = unsafe { secret.stripe(*current_stripe) }; + vector.accumulate(accumulator, stripe, secret_stripe); *current_stripe += 1; + // After a full block's worth if *current_stripe == n_stripes { - // after block's worth + let secret_end = secret.last_stripe(); vector.round_scramble(accumulator, secret_end); + *current_stripe = 0; } } @@ -528,6 +544,7 @@ macro_rules! dispatch { } /// # Safety + /// /// You must ensure that the CPU has the NEON feature #[inline] #[target_feature(enable = "neon")] @@ -542,6 +559,9 @@ macro_rules! dispatch { } } + /// # Safety + /// + /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] #[cfg(target_arch = "x86_64")] @@ -549,11 +569,15 @@ macro_rules! dispatch { where $($wheres)* { + // Safety: The caller has ensured we have the AVX2 feature unsafe { $fn_name(avx2::Impl::new_unchecked(), $($arg_name),*) } } + /// # Safety + /// + /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] #[cfg(target_arch = "x86_64")] @@ -561,6 +585,7 @@ macro_rules! dispatch { where $($wheres)* { + // Safety: The caller has ensured we have the SSE2 feature unsafe { $fn_name(sse2::Impl::new_unchecked(), $($arg_name),*) } @@ -1008,7 +1033,10 @@ fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 { struct Algorithm(V); -impl Algorithm { +impl Algorithm +where + V: Vector, +{ #[inline] fn oneshot(&self, secret: &Secret, input: &[u8]) -> u64 { assert_input_range!(241.., input.len()); @@ -1061,8 +1089,11 @@ impl Algorithm { #[inline] fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { - // TODO: [unify] - let secrets = (0..stripes.len()).map(|i| secret.stripe(i)); + let secrets = (0..stripes.len()).map(|i| { + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + unsafe { secret.stripe(i) } + }); for (stripe, secret) in stripes.iter().zip(secrets) { self.0.accumulate(acc, stripe, secret); @@ -1096,8 +1127,11 @@ impl Algorithm { // except for the last stripe (whether it is full or not) let (stripes, _) = stripes_with_tail(block); - // TODO: [unify] - let secrets = (0..stripes.len()).map(|i| secret.stripe(i)); + let secrets = (0..stripes.len()).map(|i| { + // Safety: The number of stripes is determined by the + // block size, which is determined by the secret size. + unsafe { secret.stripe(i) } + }); for (stripe, secret) in stripes.iter().zip(secrets) { self.0.accumulate(acc, stripe, secret); @@ -1438,6 +1472,7 @@ mod avx2 { impl Impl { /// # Safety + /// /// You must ensure that the CPU has the AVX2 feature #[inline] pub unsafe fn new_unchecked() -> Impl { @@ -1459,6 +1494,9 @@ mod avx2 { } } + /// # Safety + /// + /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { @@ -1466,6 +1504,9 @@ mod avx2 { scalar::Impl.round_scramble(acc, secret_end) } + /// # Safety + /// + /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { @@ -1473,6 +1514,11 @@ mod avx2 { let stripe = stripe.as_ptr().cast::<__m256i>(); let secret = secret.as_ptr().cast::<__m256i>(); + // Safety: The caller has ensured we have the AVX2 + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. unsafe { for i in 0..2 { // [align-acc]: The C code aligns the accumulator to avoid @@ -1517,6 +1563,7 @@ mod sse2 { impl Impl { /// # Safety + /// /// You must ensure that the CPU has the SSE2 feature #[inline] pub unsafe fn new_unchecked() -> Impl { @@ -1538,6 +1585,9 @@ mod sse2 { } } + /// # Safety + /// + /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { @@ -1545,6 +1595,9 @@ mod sse2 { scalar::Impl.round_scramble(acc, secret_end) } + /// # Safety + /// + /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { @@ -1552,6 +1605,11 @@ mod sse2 { let stripe = stripe.as_ptr().cast::<__m128i>(); let secret = secret.as_ptr().cast::<__m128i>(); + // Safety: The caller has ensured we have the SSE2 + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. unsafe { for i in 0..4 { // See [align-acc]. From c955390e8024fbeb7abb96d9fe0303eccfdb6023 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 12:04:36 -0400 Subject: [PATCH 136/166] Move details to separate modules --- src/xxhash3_64.rs | 643 ++------------------------------------- src/xxhash3_64/avx2.rs | 87 ++++++ src/xxhash3_64/neon.rs | 208 +++++++++++++ src/xxhash3_64/scalar.rs | 68 +++++ src/xxhash3_64/secret.rs | 160 ++++++++++ src/xxhash3_64/sse2.rs | 85 ++++++ 6 files changed, 628 insertions(+), 623 deletions(-) create mode 100644 src/xxhash3_64/avx2.rs create mode 100644 src/xxhash3_64/neon.rs create mode 100644 src/xxhash3_64/scalar.rs create mode 100644 src/xxhash3_64/secret.rs create mode 100644 src/xxhash3_64/sse2.rs diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 20532e4a6..0e4b3a5fe 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -5,10 +5,29 @@ unsafe_op_in_unsafe_fn )] -use core::{hash, hint::assert_unchecked, mem, slice}; +use core::{hash, hint::assert_unchecked, slice}; use crate::{IntoU128, IntoU32, IntoU64}; +mod secret; + +use secret::Secret; + +pub use secret::SECRET_MINIMUM_LENGTH; + +// This module is not `cfg`-gated because it is used by some of the +// SIMD implementations. +mod scalar; + +#[cfg(target_arch = "aarch64")] +mod neon; + +#[cfg(target_arch = "x86_64")] +mod avx2; + +#[cfg(target_arch = "x86_64")] +mod sse2; + const PRIME32_1: u64 = 0x9E3779B1; const PRIME32_2: u64 = 0x85EBCA77; const PRIME32_3: u64 = 0xC2B2AE3D; @@ -40,163 +59,6 @@ const DEFAULT_SECRET_RAW: [u8; 192] = [ // Safety: The default secret is long enough const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; -pub const SECRET_MINIMUM_LENGTH: usize = 136; - -#[repr(transparent)] -struct Secret([u8]); - -impl Secret { - #[inline] - fn new(bytes: &[u8]) -> Result<&Self, ()> { - // Safety: We check for validity before returning. - unsafe { - let this = Self::new_unchecked(bytes); - if this.is_valid() { - Ok(this) - } else { - Err(()) // TODO error - } - } - } - - /// # Safety - /// - /// You must ensure that the secret byte length is >= - /// SECRET_MINIMUM_LENGTH. - #[inline] - const unsafe fn new_unchecked(bytes: &[u8]) -> &Self { - // Safety: We are `#[repr(transparent)]`. It's up to the - // caller to ensure the length - unsafe { mem::transmute(bytes) } - } - - #[inline] - fn words_for_0(&self) -> [u64; 2] { - self.reassert_preconditions(); - - let (q, _) = self.0[56..].bp_as_chunks(); - [q[0], q[1]].map(u64::from_ne_bytes) - } - - #[inline] - fn words_for_1_to_3(&self) -> [u32; 2] { - self.reassert_preconditions(); - - let (q, _) = self.0.bp_as_chunks(); - [q[0], q[1]].map(u32::from_ne_bytes) - } - - #[inline] - fn words_for_4_to_8(&self) -> [u64; 2] { - self.reassert_preconditions(); - - let (q, _) = self.0[8..].bp_as_chunks(); - [q[0], q[1]].map(u64::from_ne_bytes) - } - - #[inline] - fn words_for_9_to_16(&self) -> [u64; 4] { - self.reassert_preconditions(); - - let (q, _) = self.0[24..].bp_as_chunks(); - [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes) - } - - #[inline] - fn words_for_17_to_128(&self) -> &[[u8; 16]] { - self.reassert_preconditions(); - - let (words, _) = self.0.bp_as_chunks(); - words - } - - #[inline] - fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] { - self.reassert_preconditions(); - - let (ss, _) = self.0.bp_as_chunks(); - ss - } - - #[inline] - fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] { - self.reassert_preconditions(); - - let (ss, _) = self.0[3..].bp_as_chunks(); - ss - } - - #[inline] - fn words_for_127_to_240_part3(&self) -> &[u8; 16] { - self.reassert_preconditions(); - - self.0[119..].first_chunk().unwrap() - } - - /// # Safety - /// - /// `i` must be less than the number of stripes in the secret - /// ([`Self::n_stripes`][]). - #[inline] - unsafe fn stripe(&self, i: usize) -> &[u8; 64] { - self.reassert_preconditions(); - - // Safety: The caller has ensured that `i` is - // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment. - unsafe { - debug_assert!(i < self.n_stripes()); - &*self.0.get_unchecked(i * 8..).as_ptr().cast() - } - } - - #[inline] - fn last_stripe(&self) -> &[u8; 64] { - self.reassert_preconditions(); - - self.0.last_chunk().unwrap() - } - - #[inline] - fn last_stripe_secret_better_name(&self) -> &[u8; 64] { - self.reassert_preconditions(); - - self.0[self.0.len() - 71..].first_chunk().unwrap() - } - - #[inline] - fn final_secret(&self) -> &[u8; 64] { - self.reassert_preconditions(); - - self.0[11..].first_chunk().unwrap() - } - - #[inline] - fn len(&self) -> usize { - self.0.len() - } - - #[inline] - fn n_stripes(&self) -> usize { - // stripes_per_block - (self.len() - 64) / 8 - } - - #[inline(always)] - fn reassert_preconditions(&self) { - // Safety: The length of the bytes was checked at value - // construction time. - unsafe { - debug_assert!(self.is_valid()); - assert_unchecked(self.is_valid()); - } - } - - #[inline(always)] - fn is_valid(&self) -> bool { - self.0.len() >= SECRET_MINIMUM_LENGTH - } -} - pub struct XxHash3_64 { #[cfg(feature = "alloc")] inner: with_alloc::AllocRawHasher, @@ -1176,471 +1038,6 @@ trait Vector: Copy { fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); } -// This module is not `cfg`-gated because it is used by some of the -// SIMD implementations. -mod scalar { - use super::{SliceBackport as _, Vector, PRIME32_1}; - - #[derive(Copy, Clone)] - pub struct Impl; - - impl Vector for Impl { - #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - let (last, _) = secret_end.bp_as_chunks(); - let last = last.iter().copied().map(u64::from_ne_bytes); - - for (acc, secret) in acc.iter_mut().zip(last) { - *acc ^= *acc >> 47; - *acc ^= secret; - *acc = acc.wrapping_mul(PRIME32_1); - } - } - - #[inline] - fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - let (stripe, _) = stripe.bp_as_chunks(); - let (secret, _) = secret.bp_as_chunks(); - - for i in 0..8 { - let stripe = u64::from_ne_bytes(stripe[i]); - let secret = u64::from_ne_bytes(secret[i]); - - let value = stripe ^ secret; - acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); - acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); - } - } - } - - #[inline] - #[cfg(not(target_arch = "aarch64"))] - fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - use super::IntoU64; - - let lhs = (lhs as u32).into_u64(); - let rhs = (rhs as u32).into_u64(); - - let product = lhs.wrapping_mul(rhs); - acc.wrapping_add(product) - } - - #[inline] - // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 - // https://github.com/llvm/llvm-project/issues/98481 - #[cfg(target_arch = "aarch64")] - fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { - let res; - - // Safety: We only compute using our argument values and do - // not change memory. - unsafe { - core::arch::asm!( - "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", - lhs = in(reg) lhs, - rhs = in(reg) rhs, - acc = in(reg) acc, - res = out(reg) res, - options(pure, nomem, nostack), - ) - } - - res - } -} - -#[cfg(target_arch = "aarch64")] -mod neon { - use core::arch::aarch64::*; - - use super::{SliceBackport as _, Vector, PRIME32_1}; - - #[derive(Copy, Clone)] - pub struct Impl(()); - - impl Impl { - /// # Safety - /// - /// You must ensure that the CPU has the NEON feature - #[inline] - pub unsafe fn new_unchecked() -> Self { - Self(()) - } - } - - impl Vector for Impl { - #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // Safety: Type can only be constructed when NEON feature is present - unsafe { round_scramble_neon(acc, secret_end) } - } - - #[inline] - fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - // Safety: Type can only be constructed when NEON feature is present - unsafe { accumulate_neon(acc, stripe, secret) } - } - } - - /// # Safety - /// - /// You must ensure that the CPU has the NEON feature - #[target_feature(enable = "neon")] - #[inline] - unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - let secret_base = secret_end.as_ptr().cast::(); - let (acc, _) = acc.bp_as_chunks_mut::<2>(); - - for (i, acc) in acc.iter_mut().enumerate() { - // Safety: The caller has ensured we have the NEON - // feature. We load from and store to references so we - // know that data is valid. We use unaligned loads / - // stores. Data manipulation is otherwise done on - // intermediate values. - unsafe { - let mut accv = vld1q_u64(acc.as_ptr()); - let secret = vld1q_u64(secret_base.add(i * 2)); - - // tmp[i] = acc[i] >> 47 - let shifted = vshrq_n_u64::<47>(accv); - - // acc[i] ^= tmp[i] - accv = veorq_u64(accv, shifted); - - // acc[i] ^= secret[i] - accv = veorq_u64(accv, secret); - - // acc[i] *= PRIME32_1 - accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); - - vst1q_u64(acc.as_mut_ptr(), accv); - } - } - } - - /// We process 4x u64 at a time as that allows us to completely - /// fill a `uint64x2_t` with useful values when performing the - /// multiplication. - /// - /// # Safety - /// - /// You must ensure that the CPU has the NEON feature - #[target_feature(enable = "neon")] - #[inline] - unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - let (acc2, _) = acc.bp_as_chunks_mut::<4>(); - for (i, acc) in acc2.iter_mut().enumerate() { - // Safety: The caller has ensured we have the NEON - // feature. We load from and store to references so we - // know that data is valid. We use unaligned loads / - // stores. Data manipulation is otherwise done on - // intermediate values. - unsafe { - let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); - let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); - let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); - let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); - let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); - let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); - - // stripe_rot[i ^ 1] = stripe[i]; - let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); - let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); - - // value[i] = stripe[i] ^ secret[i]; - let value_0 = veorq_u64(stripe_0, secret_0); - let value_1 = veorq_u64(stripe_1, secret_1); - - // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] - // - // Each vector has 64-bit values, but we treat them as - // 32-bit and then unzip them. This naturally splits - // the upper and lower 32 bits. - let parts_0 = vreinterpretq_u32_u64(value_0); - let parts_1 = vreinterpretq_u32_u64(value_1); - - let hi = vuzp1q_u32(parts_0, parts_1); - let lo = vuzp2q_u32(parts_0, parts_1); - - let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); - let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); - - reordering_barrier(sum_0); - reordering_barrier(sum_1); - - // acc[i] += sum[i] - accv_0 = vaddq_u64(accv_0, sum_0); - accv_1 = vaddq_u64(accv_1, sum_1); - - vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); - vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); - }; - } - } - - // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the - // lower 64 bits of the result) operation, so we have to make our - // own out of 32-bit operations . We can simplify by realizing - // that we are always multiplying by a 32-bit number. - // - // The basic algorithm is traditional long multiplication. `[]` - // denotes groups of 32 bits. - // - // [AAAA][BBBB] - // x [CCCC] - // -------------------- - // [BCBC][BCBC] - // + [ACAC][ACAC] - // -------------------- - // [ACBC][BCBC] // 64-bit truncation occurs - // - // This can be written in NEON as a vectorwise wrapping - // multiplication of the high-order chunk of the input (`A`) - // against the constant and then a multiply-widen-and-accumulate - // of the low-order chunk of the input and the constant: - // - // 1. High-order, vectorwise - // - // [AAAA][BBBB] - // x [CCCC][0000] - // -------------------- - // [ACAC][0000] - // - // 2. Low-order, widening - // - // [BBBB] - // x [CCCC] // widening - // -------------------- - // [BCBC][BCBC] - // - // 3. Accumulation - // - // [ACAC][0000] - // + [BCBC][BCBC] // vectorwise - // -------------------- - // [ACBC][BCBC] - // - // Thankfully, NEON has a single multiply-widen-and-accumulate - // operation. - #[inline] - pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { - // Safety: We only compute using our argument values and do - // not change memory. - unsafe { - let input_as_u32 = vreinterpretq_u32_u64(input); - let factor = vmov_n_u32(og_factor); - let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); - let factor_striped = vreinterpretq_u32_u64(factor_striped); - - let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); - let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); - - let input_lo = vmovn_u64(input); - vmlal_u32(high_shifted, input_lo, factor) - } - } - - /// # Safety - /// - /// You must ensure that the CPU has the NEON feature - // - // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 - #[inline] - #[target_feature(enable = "neon")] - unsafe fn reordering_barrier(r: uint64x2_t) { - // Safety: The caller has ensured we have the NEON feature. We - // aren't doing anything with the argument, so we shouldn't be - // able to cause unsafety! - unsafe { - core::arch::asm!( - "/* {r:v} */", - r = in(vreg) r, - options(nomem, nostack), - ) - } - } -} - -#[cfg(target_arch = "x86_64")] -mod avx2 { - use core::arch::x86_64::*; - - use super::{scalar, Vector}; - - #[derive(Copy, Clone)] - pub struct Impl(()); - - impl Impl { - /// # Safety - /// - /// You must ensure that the CPU has the AVX2 feature - #[inline] - pub unsafe fn new_unchecked() -> Impl { - Impl(()) - } - } - - impl Vector for Impl { - #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // Safety: Type can only be constructed when AVX2 feature is present - unsafe { round_scramble_avx2(acc, secret_end) } - } - - #[inline] - fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - // Safety: Type can only be constructed when AVX2 feature is present - unsafe { accumulate_avx2(acc, stripe, secret) } - } - } - - /// # Safety - /// - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // The scalar implementation is autovectorized nicely enough - scalar::Impl.round_scramble(acc, secret_end) - } - - /// # Safety - /// - /// You must ensure that the CPU has the AVX2 feature - #[inline] - #[target_feature(enable = "avx2")] - unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - let acc = acc.as_mut_ptr().cast::<__m256i>(); - let stripe = stripe.as_ptr().cast::<__m256i>(); - let secret = secret.as_ptr().cast::<__m256i>(); - - // Safety: The caller has ensured we have the AVX2 - // feature. We load from and store to references so we - // know that data is valid. We use unaligned loads / - // stores. Data manipulation is otherwise done on - // intermediate values. - unsafe { - for i in 0..2 { - // [align-acc]: The C code aligns the accumulator to avoid - // the unaligned load and store here, but that doesn't - // seem to be a big performance loss. - let mut acc_0 = _mm256_loadu_si256(acc.add(i)); - let stripe_0 = _mm256_loadu_si256(stripe.add(i)); - let secret_0 = _mm256_loadu_si256(secret.add(i)); - - // let value[i] = stripe[i] ^ secret[i]; - let value_0 = _mm256_xor_si256(stripe_0, secret_0); - - // stripe_swap[i] = stripe[i ^ 1] - let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0); - - // acc[i] += stripe_swap[i] - acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); - - // value_shift[i] = value[i] >> 32 - let value_shift_0 = _mm256_srli_epi64::<32>(value_0); - - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) - let product_0 = _mm256_mul_epu32(value_0, value_shift_0); - - // acc[i] += product[i] - acc_0 = _mm256_add_epi64(acc_0, product_0); - - _mm256_storeu_si256(acc.add(i), acc_0); - } - } - } -} - -#[cfg(target_arch = "x86_64")] -mod sse2 { - use core::arch::x86_64::*; - - use super::{scalar, Vector}; - - #[derive(Copy, Clone)] - pub struct Impl(()); - - impl Impl { - /// # Safety - /// - /// You must ensure that the CPU has the SSE2 feature - #[inline] - pub unsafe fn new_unchecked() -> Impl { - Impl(()) - } - } - - impl Vector for Impl { - #[inline] - fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // Safety: Type can only be constructed when SSE2 feature is present - unsafe { round_scramble_sse2(acc, secret_end) } - } - - #[inline] - fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - // Safety: Type can only be constructed when SSE2 feature is present - unsafe { accumulate_sse2(acc, stripe, secret) } - } - } - - /// # Safety - /// - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { - // The scalar implementation is autovectorized nicely enough - scalar::Impl.round_scramble(acc, secret_end) - } - - /// # Safety - /// - /// You must ensure that the CPU has the SSE2 feature - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { - let acc = acc.as_mut_ptr().cast::<__m128i>(); - let stripe = stripe.as_ptr().cast::<__m128i>(); - let secret = secret.as_ptr().cast::<__m128i>(); - - // Safety: The caller has ensured we have the SSE2 - // feature. We load from and store to references so we - // know that data is valid. We use unaligned loads / - // stores. Data manipulation is otherwise done on - // intermediate values. - unsafe { - for i in 0..4 { - // See [align-acc]. - let mut acc_0 = _mm_loadu_si128(acc.add(i)); - let stripe_0 = _mm_loadu_si128(stripe.add(i)); - let secret_0 = _mm_loadu_si128(secret.add(i)); - - // let value[i] = stripe[i] ^ secret[i]; - let value_0 = _mm_xor_si128(stripe_0, secret_0); - - // stripe_swap[i] = stripe[i ^ 1] - let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0); - - // acc[i] += stripe_swap[i] - acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); - - // value_shift[i] = value[i] >> 32 - let value_shift_0 = _mm_srli_epi64::<32>(value_0); - - // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) - let product_0 = _mm_mul_epu32(value_0, value_shift_0); - - // acc[i] += product[i] - acc_0 = _mm_add_epi64(acc_0, product_0); - - _mm_storeu_si128(acc.add(i), acc_0); - } - } - } -} - #[inline] fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; diff --git a/src/xxhash3_64/avx2.rs b/src/xxhash3_64/avx2.rs new file mode 100644 index 000000000..8cfb54f15 --- /dev/null +++ b/src/xxhash3_64/avx2.rs @@ -0,0 +1,87 @@ +use core::arch::x86_64::*; + +use super::{scalar, Vector}; + +#[derive(Copy, Clone)] +pub struct Impl(()); + +impl Impl { + /// # Safety + /// + /// You must ensure that the CPU has the AVX2 feature + #[inline] + pub unsafe fn new_unchecked() -> Impl { + Impl(()) + } +} + +impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // Safety: Type can only be constructed when AVX2 feature is present + unsafe { round_scramble_avx2(acc, secret_end) } + } + + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + // Safety: Type can only be constructed when AVX2 feature is present + unsafe { accumulate_avx2(acc, stripe, secret) } + } +} + +/// # Safety +/// +/// You must ensure that the CPU has the AVX2 feature +#[inline] +#[target_feature(enable = "avx2")] +unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // The scalar implementation is autovectorized nicely enough + scalar::Impl.round_scramble(acc, secret_end) +} + +/// # Safety +/// +/// You must ensure that the CPU has the AVX2 feature +#[inline] +#[target_feature(enable = "avx2")] +unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let acc = acc.as_mut_ptr().cast::<__m256i>(); + let stripe = stripe.as_ptr().cast::<__m256i>(); + let secret = secret.as_ptr().cast::<__m256i>(); + + // Safety: The caller has ensured we have the AVX2 + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. + unsafe { + for i in 0..2 { + // [align-acc]: The C code aligns the accumulator to avoid + // the unaligned load and store here, but that doesn't + // seem to be a big performance loss. + let mut acc_0 = _mm256_loadu_si256(acc.add(i)); + let stripe_0 = _mm256_loadu_si256(stripe.add(i)); + let secret_0 = _mm256_loadu_si256(secret.add(i)); + + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm256_xor_si256(stripe_0, secret_0); + + // stripe_swap[i] = stripe[i ^ 1] + let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0); + + // acc[i] += stripe_swap[i] + acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); + + // value_shift[i] = value[i] >> 32 + let value_shift_0 = _mm256_srli_epi64::<32>(value_0); + + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) + let product_0 = _mm256_mul_epu32(value_0, value_shift_0); + + // acc[i] += product[i] + acc_0 = _mm256_add_epi64(acc_0, product_0); + + _mm256_storeu_si256(acc.add(i), acc_0); + } + } +} diff --git a/src/xxhash3_64/neon.rs b/src/xxhash3_64/neon.rs new file mode 100644 index 000000000..372bca749 --- /dev/null +++ b/src/xxhash3_64/neon.rs @@ -0,0 +1,208 @@ +use core::arch::aarch64::*; + +use super::{SliceBackport as _, Vector, PRIME32_1}; + +#[derive(Copy, Clone)] +pub struct Impl(()); + +impl Impl { + /// # Safety + /// + /// You must ensure that the CPU has the NEON feature + #[inline] + pub unsafe fn new_unchecked() -> Self { + Self(()) + } +} + +impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // Safety: Type can only be constructed when NEON feature is present + unsafe { round_scramble_neon(acc, secret_end) } + } + + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + // Safety: Type can only be constructed when NEON feature is present + unsafe { accumulate_neon(acc, stripe, secret) } + } +} + +/// # Safety +/// +/// You must ensure that the CPU has the NEON feature +#[target_feature(enable = "neon")] +#[inline] +unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + let secret_base = secret_end.as_ptr().cast::(); + let (acc, _) = acc.bp_as_chunks_mut::<2>(); + + for (i, acc) in acc.iter_mut().enumerate() { + // Safety: The caller has ensured we have the NEON + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. + unsafe { + let mut accv = vld1q_u64(acc.as_ptr()); + let secret = vld1q_u64(secret_base.add(i * 2)); + + // tmp[i] = acc[i] >> 47 + let shifted = vshrq_n_u64::<47>(accv); + + // acc[i] ^= tmp[i] + accv = veorq_u64(accv, shifted); + + // acc[i] ^= secret[i] + accv = veorq_u64(accv, secret); + + // acc[i] *= PRIME32_1 + accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); + + vst1q_u64(acc.as_mut_ptr(), accv); + } + } +} + +/// We process 4x u64 at a time as that allows us to completely +/// fill a `uint64x2_t` with useful values when performing the +/// multiplication. +/// +/// # Safety +/// +/// You must ensure that the CPU has the NEON feature +#[target_feature(enable = "neon")] +#[inline] +unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let (acc2, _) = acc.bp_as_chunks_mut::<4>(); + for (i, acc) in acc2.iter_mut().enumerate() { + // Safety: The caller has ensured we have the NEON + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. + unsafe { + let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); + let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); + let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); + let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); + let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); + let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); + + // stripe_rot[i ^ 1] = stripe[i]; + let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); + let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); + + // value[i] = stripe[i] ^ secret[i]; + let value_0 = veorq_u64(stripe_0, secret_0); + let value_1 = veorq_u64(stripe_1, secret_1); + + // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] + // + // Each vector has 64-bit values, but we treat them as + // 32-bit and then unzip them. This naturally splits + // the upper and lower 32 bits. + let parts_0 = vreinterpretq_u32_u64(value_0); + let parts_1 = vreinterpretq_u32_u64(value_1); + + let hi = vuzp1q_u32(parts_0, parts_1); + let lo = vuzp2q_u32(parts_0, parts_1); + + let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); + let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); + + reordering_barrier(sum_0); + reordering_barrier(sum_1); + + // acc[i] += sum[i] + accv_0 = vaddq_u64(accv_0, sum_0); + accv_1 = vaddq_u64(accv_1, sum_1); + + vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); + vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); + }; + } +} + +// There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the +// lower 64 bits of the result) operation, so we have to make our +// own out of 32-bit operations . We can simplify by realizing +// that we are always multiplying by a 32-bit number. +// +// The basic algorithm is traditional long multiplication. `[]` +// denotes groups of 32 bits. +// +// [AAAA][BBBB] +// x [CCCC] +// -------------------- +// [BCBC][BCBC] +// + [ACAC][ACAC] +// -------------------- +// [ACBC][BCBC] // 64-bit truncation occurs +// +// This can be written in NEON as a vectorwise wrapping +// multiplication of the high-order chunk of the input (`A`) +// against the constant and then a multiply-widen-and-accumulate +// of the low-order chunk of the input and the constant: +// +// 1. High-order, vectorwise +// +// [AAAA][BBBB] +// x [CCCC][0000] +// -------------------- +// [ACAC][0000] +// +// 2. Low-order, widening +// +// [BBBB] +// x [CCCC] // widening +// -------------------- +// [BCBC][BCBC] +// +// 3. Accumulation +// +// [ACAC][0000] +// + [BCBC][BCBC] // vectorwise +// -------------------- +// [ACBC][BCBC] +// +// Thankfully, NEON has a single multiply-widen-and-accumulate +// operation. +#[inline] +pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { + // Safety: We only compute using our argument values and do + // not change memory. + unsafe { + let input_as_u32 = vreinterpretq_u32_u64(input); + let factor = vmov_n_u32(og_factor); + let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); + let factor_striped = vreinterpretq_u32_u64(factor_striped); + + let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); + let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); + + let input_lo = vmovn_u64(input); + vmlal_u32(high_shifted, input_lo, factor) + } +} + +/// # Safety +/// +/// You must ensure that the CPU has the NEON feature +// +// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 +#[inline] +#[target_feature(enable = "neon")] +unsafe fn reordering_barrier(r: uint64x2_t) { + // Safety: The caller has ensured we have the NEON feature. We + // aren't doing anything with the argument, so we shouldn't be + // able to cause unsafety! + unsafe { + core::arch::asm!( + "/* {r:v} */", + r = in(vreg) r, + options(nomem, nostack), + ) + } +} diff --git a/src/xxhash3_64/scalar.rs b/src/xxhash3_64/scalar.rs new file mode 100644 index 000000000..3a91464b1 --- /dev/null +++ b/src/xxhash3_64/scalar.rs @@ -0,0 +1,68 @@ +use super::{SliceBackport as _, Vector, PRIME32_1}; + +#[derive(Copy, Clone)] +pub struct Impl; + +impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + let (last, _) = secret_end.bp_as_chunks(); + let last = last.iter().copied().map(u64::from_ne_bytes); + + for (acc, secret) in acc.iter_mut().zip(last) { + *acc ^= *acc >> 47; + *acc ^= secret; + *acc = acc.wrapping_mul(PRIME32_1); + } + } + + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let (stripe, _) = stripe.bp_as_chunks(); + let (secret, _) = secret.bp_as_chunks(); + + for i in 0..8 { + let stripe = u64::from_ne_bytes(stripe[i]); + let secret = u64::from_ne_bytes(secret[i]); + + let value = stripe ^ secret; + acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); + acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); + } + } +} + +#[inline] +#[cfg(not(target_arch = "aarch64"))] +fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + use super::IntoU64; + + let lhs = (lhs as u32).into_u64(); + let rhs = (rhs as u32).into_u64(); + + let product = lhs.wrapping_mul(rhs); + acc.wrapping_add(product) +} + +#[inline] +// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 +// https://github.com/llvm/llvm-project/issues/98481 +#[cfg(target_arch = "aarch64")] +fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { + let res; + + // Safety: We only compute using our argument values and do + // not change memory. + unsafe { + core::arch::asm!( + "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", + lhs = in(reg) lhs, + rhs = in(reg) rhs, + acc = in(reg) acc, + res = out(reg) res, + options(pure, nomem, nostack), + ) + } + + res +} diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs new file mode 100644 index 000000000..14070d8e7 --- /dev/null +++ b/src/xxhash3_64/secret.rs @@ -0,0 +1,160 @@ +use core::{hint::assert_unchecked, mem}; + +use super::SliceBackport as _; + +pub const SECRET_MINIMUM_LENGTH: usize = 136; + +#[repr(transparent)] +pub struct Secret([u8]); + +impl Secret { + #[inline] + pub fn new(bytes: &[u8]) -> Result<&Self, ()> { + // Safety: We check for validity before returning. + unsafe { + let this = Self::new_unchecked(bytes); + if this.is_valid() { + Ok(this) + } else { + Err(()) // TODO error + } + } + } + + /// # Safety + /// + /// You must ensure that the secret byte length is >= + /// SECRET_MINIMUM_LENGTH. + #[inline] + pub const unsafe fn new_unchecked(bytes: &[u8]) -> &Self { + // Safety: We are `#[repr(transparent)]`. It's up to the + // caller to ensure the length + unsafe { mem::transmute(bytes) } + } + + #[inline] + pub fn words_for_0(&self) -> [u64; 2] { + self.reassert_preconditions(); + + let (q, _) = self.0[56..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_ne_bytes) + } + + #[inline] + pub fn words_for_1_to_3(&self) -> [u32; 2] { + self.reassert_preconditions(); + + let (q, _) = self.0.bp_as_chunks(); + [q[0], q[1]].map(u32::from_ne_bytes) + } + + #[inline] + pub fn words_for_4_to_8(&self) -> [u64; 2] { + self.reassert_preconditions(); + + let (q, _) = self.0[8..].bp_as_chunks(); + [q[0], q[1]].map(u64::from_ne_bytes) + } + + #[inline] + pub fn words_for_9_to_16(&self) -> [u64; 4] { + self.reassert_preconditions(); + + let (q, _) = self.0[24..].bp_as_chunks(); + [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes) + } + + #[inline] + pub fn words_for_17_to_128(&self) -> &[[u8; 16]] { + self.reassert_preconditions(); + + let (words, _) = self.0.bp_as_chunks(); + words + } + + #[inline] + pub fn words_for_127_to_240_part1(&self) -> &[[u8; 16]] { + self.reassert_preconditions(); + + let (ss, _) = self.0.bp_as_chunks(); + ss + } + + #[inline] + pub fn words_for_127_to_240_part2(&self) -> &[[u8; 16]] { + self.reassert_preconditions(); + + let (ss, _) = self.0[3..].bp_as_chunks(); + ss + } + + #[inline] + pub fn words_for_127_to_240_part3(&self) -> &[u8; 16] { + self.reassert_preconditions(); + + self.0[119..].first_chunk().unwrap() + } + + /// # Safety + /// + /// `i` must be less than the number of stripes in the secret + /// ([`Self::n_stripes`][]). + #[inline] + pub unsafe fn stripe(&self, i: usize) -> &[u8; 64] { + self.reassert_preconditions(); + + // Safety: The caller has ensured that `i` is + // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment. + unsafe { + debug_assert!(i < self.n_stripes()); + &*self.0.get_unchecked(i * 8..).as_ptr().cast() + } + } + + #[inline] + pub fn last_stripe(&self) -> &[u8; 64] { + self.reassert_preconditions(); + + self.0.last_chunk().unwrap() + } + + #[inline] + pub fn last_stripe_secret_better_name(&self) -> &[u8; 64] { + self.reassert_preconditions(); + + self.0[self.0.len() - 71..].first_chunk().unwrap() + } + + #[inline] + pub fn final_secret(&self) -> &[u8; 64] { + self.reassert_preconditions(); + + self.0[11..].first_chunk().unwrap() + } + + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + #[inline] + pub fn n_stripes(&self) -> usize { + // stripes_per_block + (self.len() - 64) / 8 + } + + #[inline(always)] + fn reassert_preconditions(&self) { + // Safety: The length of the bytes was checked at value + // construction time. + unsafe { + debug_assert!(self.is_valid()); + assert_unchecked(self.is_valid()); + } + } + + #[inline(always)] + pub fn is_valid(&self) -> bool { + self.0.len() >= SECRET_MINIMUM_LENGTH + } +} diff --git a/src/xxhash3_64/sse2.rs b/src/xxhash3_64/sse2.rs new file mode 100644 index 000000000..0290038e4 --- /dev/null +++ b/src/xxhash3_64/sse2.rs @@ -0,0 +1,85 @@ +use core::arch::x86_64::*; + +use super::{scalar, Vector}; + +#[derive(Copy, Clone)] +pub struct Impl(()); + +impl Impl { + /// # Safety + /// + /// You must ensure that the CPU has the SSE2 feature + #[inline] + pub unsafe fn new_unchecked() -> Impl { + Impl(()) + } +} + +impl Vector for Impl { + #[inline] + fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // Safety: Type can only be constructed when SSE2 feature is present + unsafe { round_scramble_sse2(acc, secret_end) } + } + + #[inline] + fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + // Safety: Type can only be constructed when SSE2 feature is present + unsafe { accumulate_sse2(acc, stripe, secret) } + } +} + +/// # Safety +/// +/// You must ensure that the CPU has the SSE2 feature +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { + // The scalar implementation is autovectorized nicely enough + scalar::Impl.round_scramble(acc, secret_end) +} + +/// # Safety +/// +/// You must ensure that the CPU has the SSE2 feature +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { + let acc = acc.as_mut_ptr().cast::<__m128i>(); + let stripe = stripe.as_ptr().cast::<__m128i>(); + let secret = secret.as_ptr().cast::<__m128i>(); + + // Safety: The caller has ensured we have the SSE2 + // feature. We load from and store to references so we + // know that data is valid. We use unaligned loads / + // stores. Data manipulation is otherwise done on + // intermediate values. + unsafe { + for i in 0..4 { + // See [align-acc]. + let mut acc_0 = _mm_loadu_si128(acc.add(i)); + let stripe_0 = _mm_loadu_si128(stripe.add(i)); + let secret_0 = _mm_loadu_si128(secret.add(i)); + + // let value[i] = stripe[i] ^ secret[i]; + let value_0 = _mm_xor_si128(stripe_0, secret_0); + + // stripe_swap[i] = stripe[i ^ 1] + let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0); + + // acc[i] += stripe_swap[i] + acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); + + // value_shift[i] = value[i] >> 32 + let value_shift_0 = _mm_srli_epi64::<32>(value_0); + + // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) + let product_0 = _mm_mul_epu32(value_0, value_shift_0); + + // acc[i] += product[i] + acc_0 = _mm_add_epi64(acc_0, product_0); + + _mm_storeu_si128(acc.add(i), acc_0); + } + } +} From f7c221ec73c1735b63f089f3c2af42bb99b93c43 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 13:56:19 -0400 Subject: [PATCH 137/166] Return errors --- src/xxhash3_64.rs | 209 +++++++++++++++++++++++++++++---------- src/xxhash3_64/secret.rs | 18 +++- 2 files changed, 172 insertions(+), 55 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 0e4b3a5fe..dc5888318 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -41,7 +41,10 @@ const PRIME_MX2: u64 = 0x9FB21C651E98DF25; const DEFAULT_SEED: u64 = 0; -const DEFAULT_SECRET_RAW: [u8; 192] = [ +pub const DEFAULT_SECRET_LENGTH: usize = 192; +type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH]; + +const DEFAULT_SECRET_RAW: DefaultSecret = [ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, @@ -87,9 +90,22 @@ impl XxHash3_64 { } #[inline] - pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> u64 { - let secret = Secret::new(secret).unwrap(); // TODO: ERROR - impl_oneshot(secret, DEFAULT_SEED, input) + pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result { + let secret = Secret::new(secret).map_err(OneshotWithSecretError)?; + Ok(impl_oneshot(secret, DEFAULT_SEED, input)) + } +} + +/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] +/// bytes. +#[derive(Debug)] +pub struct OneshotWithSecretError(secret::Error); + +impl core::error::Error for OneshotWithSecretError {} + +impl core::fmt::Display for OneshotWithSecretError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt(f) } } @@ -101,20 +117,43 @@ type Buffer = [u8; BUFFERED_BYTES]; // Ensure that a full buffer always implies we are in the 241+ byte case. const _: () = assert!(BUFFERED_BYTES > 240); +/// A buffer containing the secret bytes. +/// /// # Safety /// /// Must always return a slice with the same number of elements. pub unsafe trait FixedBuffer: AsRef<[u8]> {} +/// A mutable buffer to contain the secret bytes. +/// +/// # Safety +/// +/// Must always return a slice with the same number of elements. The +/// slice must always be the same as that returned from +/// [`AsRef::as_ref`][]. +pub unsafe trait FixedMutBuffer: FixedBuffer + AsMut<[u8]> {} + // Safety: An array will never change size. unsafe impl FixedBuffer for [u8; N] {} +// Safety: An array will never change size. +unsafe impl FixedMutBuffer for [u8; N] {} + // Safety: An array will never change size. unsafe impl FixedBuffer for &[u8; N] {} +// Safety: An array will never change size. +unsafe impl FixedBuffer for &mut [u8; N] {} + +// Safety: An array will never change size. +unsafe impl FixedMutBuffer for &mut [u8; N] {} + // Safety: A plain slice will never change size. unsafe impl FixedBuffer for Box<[u8]> {} +// Safety: A plain slice will never change size. +unsafe impl FixedMutBuffer for Box<[u8]> {} + /// Holds secret and temporary buffers that are ensured to be /// appropriately sized. pub struct SecretBuffer { @@ -130,21 +169,19 @@ where /// Takes the seed, secret, and buffer and performs no /// modifications to them, only validating that the sizes are /// appropriate. - pub fn new(seed: u64, secret: S) -> Result { - let this = Self { - seed, - secret, - buffer: [0; BUFFERED_BYTES], - }; - - if this.is_valid() { - Ok(this) - } else { - Err(this.decompose()) + pub fn new(seed: u64, secret: S) -> Result> { + match Secret::new(secret.as_ref()) { + Ok(_) => Ok(Self { + seed, + secret, + buffer: [0; BUFFERED_BYTES], + }), + Err(e) => Err(SecretTooShortError(e, secret)), } } #[inline(always)] + #[cfg(test)] fn is_valid(&self) -> bool { let secret = self.secret.as_ref(); @@ -156,11 +193,6 @@ where Self::secret(&self.secret).n_stripes() } - /// Returns the secret and buffer values. - pub fn decompose(self) -> S { - self.secret - } - #[inline] fn parts(&self) -> (u64, &Secret, &Buffer) { (self.seed, Self::secret(&self.secret), &self.buffer) @@ -179,7 +211,14 @@ where } } -impl SecretBuffer<&'static [u8; 192]> { +impl SecretBuffer { + /// Returns the secret. + pub fn into_secret(self) -> S { + self.secret + } +} + +impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> { /// Use the default seed and secret values while allocating nothing. /// /// Note that this type may take up a surprising amount of stack space. @@ -215,11 +254,19 @@ mod with_alloc { } } - pub fn with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { - Self { - inner: RawHasher::allocate_with_seed_and_secret(seed, secret), + pub fn with_seed_and_secret( + seed: u64, + secret: impl Into>, + ) -> Result>> { + Ok(Self { + inner: RawHasher::allocate_with_seed_and_secret(seed, secret)?, _private: (), - } + }) + } + + /// Returns the secret. + pub fn into_secret(self) -> Box<[u8]> { + self.inner.into_secret() } } @@ -269,15 +316,11 @@ mod with_alloc { /// Allocates the temporary buffer and uses the provided seed /// and secret buffer. - pub fn allocate_with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { - let secret = secret.into(); - assert!(secret.len() > SECRET_MINIMUM_LENGTH); // todo result - - Self { - seed, - secret, - buffer: [0; BUFFERED_BYTES], - } + pub fn allocate_with_seed_and_secret( + seed: u64, + secret: impl Into>, + ) -> Result>> { + Self::new(seed, secret.into()) } } @@ -292,28 +335,89 @@ mod with_alloc { Self::new(SecretBuffer::allocate_with_seed(seed)) } - fn allocate_with_seed_and_secret(seed: u64, secret: impl Into>) -> Self { - Self::new(SecretBuffer::allocate_with_seed_and_secret(seed, secret)) + fn allocate_with_seed_and_secret( + seed: u64, + secret: impl Into>, + ) -> Result>> { + SecretBuffer::allocate_with_seed_and_secret(seed, secret).map(Self::new) } } } impl SecretBuffer where - S: FixedBuffer + AsMut<[u8]>, + S: FixedMutBuffer, { /// Fills the secret buffer with a secret derived from the seed - /// and the default secret. - pub fn with_seed(seed: u64, mut secret: S) -> Result { - let secret_slice: &mut [u8; 192] = match secret.as_mut().try_into() { - Ok(s) => s, - Err(_) => return Err(secret), - }; + /// and the default secret. The secret must be exactly + /// [`DEFAULT_SECRET_LENGTH`][] bytes long. + pub fn with_seed(seed: u64, mut secret: S) -> Result> { + match <&mut DefaultSecret>::try_from(secret.as_mut()) { + Ok(secret_slice) => { + *secret_slice = DEFAULT_SECRET_RAW; + derive_secret(seed, secret_slice); + + Ok(Self { + seed, + secret, + buffer: [0; BUFFERED_BYTES], + }) + } + Err(_) => Err(SecretWithSeedError(secret)), + } + } +} + +/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] +/// bytes. +pub struct SecretTooShortError(secret::Error, S); + +impl SecretTooShortError { + /// Returns the secret. + pub fn into_secret(self) -> S { + self.1 + } +} + +impl core::error::Error for SecretTooShortError {} + +impl core::fmt::Debug for SecretTooShortError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("SecretTooShortError").finish() + } +} + +impl core::fmt::Display for SecretTooShortError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt(f) + } +} + +/// The provided secret was not exactly [`DEFAULT_SECRET_LENGTH`][] +/// bytes. +pub struct SecretWithSeedError(S); + +impl SecretWithSeedError { + /// Returns the secret. + pub fn into_secret(self) -> S { + self.0 + } +} - *secret_slice = DEFAULT_SECRET_RAW; - derive_secret(seed, secret_slice); +impl core::error::Error for SecretWithSeedError {} - Self::new(seed, secret) +impl core::fmt::Debug for SecretWithSeedError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("SecretWithSeedError").finish() + } +} + +impl core::fmt::Display for SecretWithSeedError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "The secret must be exactly {DEFAULT_SECRET_LENGTH} bytes" + ) } } @@ -390,6 +494,11 @@ impl RawHasher { total_bytes: 0, } } + + /// Returns the secret. + pub fn into_secret(self) -> S { + self.secret_buffer.into_secret() + } } macro_rules! dispatch { @@ -675,7 +784,7 @@ where /// This function assumes that the incoming buffer has been populated /// with the default secret. #[inline] -fn derive_secret(seed: u64, secret: &mut [u8; 192]) { +fn derive_secret(seed: u64, secret: &mut DefaultSecret) { if seed == DEFAULT_SEED { return; } @@ -1198,12 +1307,6 @@ mod test { assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid()) } - #[test] - fn secret_buffer_allocate_with_seed_and_secret_is_valid() { - let secret = [42; 1024]; - assert!(SecretBuffer::allocate_with_seed_and_secret(0xdead_beef, secret).is_valid()) - } - macro_rules! bytes { ($($n: literal),* $(,)?) => { &[$(&gen_bytes::<$n>() as &[u8],)*] as &[&[u8]] diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs index 14070d8e7..db55597ed 100644 --- a/src/xxhash3_64/secret.rs +++ b/src/xxhash3_64/secret.rs @@ -9,14 +9,14 @@ pub struct Secret([u8]); impl Secret { #[inline] - pub fn new(bytes: &[u8]) -> Result<&Self, ()> { + pub fn new(bytes: &[u8]) -> Result<&Self, Error> { // Safety: We check for validity before returning. unsafe { let this = Self::new_unchecked(bytes); if this.is_valid() { Ok(this) } else { - Err(()) // TODO error + Err(Error(())) } } } @@ -158,3 +158,17 @@ impl Secret { self.0.len() >= SECRET_MINIMUM_LENGTH } } + +#[derive(Debug)] +pub struct Error(()); + +impl core::error::Error for Error {} + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "The secret must have at least {SECRET_MINIMUM_LENGTH} bytes" + ) + } +} From 9329ff80c34704278f9e0e0cddbe97192814fb68 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 13:56:37 -0400 Subject: [PATCH 138/166] Add basic docs --- src/xxhash3_64.rs | 22 +++++++++++++++++++++- src/xxhash3_64/secret.rs | 1 + 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index dc5888318..ab46d0e88 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1,4 +1,5 @@ -#![allow(missing_docs)] +//! The implementation of XXH3_64. + #![deny( clippy::missing_safety_doc, clippy::undocumented_unsafe_blocks, @@ -41,7 +42,9 @@ const PRIME_MX2: u64 = 0x9FB21C651E98DF25; const DEFAULT_SEED: u64 = 0; +/// The length of the default secret. pub const DEFAULT_SECRET_LENGTH: usize = 192; + type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH]; const DEFAULT_SECRET_RAW: DefaultSecret = [ @@ -62,6 +65,7 @@ const DEFAULT_SECRET_RAW: DefaultSecret = [ // Safety: The default secret is long enough const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; +/// Calculates the 64-bit hash. pub struct XxHash3_64 { #[cfg(feature = "alloc")] inner: with_alloc::AllocRawHasher, @@ -69,11 +73,18 @@ pub struct XxHash3_64 { } impl XxHash3_64 { + /// Hash all data at once. If you can use this function, you may + /// see noticable speed gains for certain types of input. + #[must_use] #[inline] pub fn oneshot(input: &[u8]) -> u64 { impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input) } + /// Hash all data at once using the provided seed and a secret + /// derived from the seed. If you can use this function, you may + /// see noticable speed gains for certain types of input. + #[must_use] #[inline] pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 { let mut secret = DEFAULT_SECRET_RAW; @@ -89,6 +100,9 @@ impl XxHash3_64 { impl_oneshot(secret, seed, input) } + /// Hash all data at once using the provided secret. If you can + /// use this function, you may see noticable speed gains for + /// certain types of input. #[inline] pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result { let secret = Secret::new(secret).map_err(OneshotWithSecretError)?; @@ -240,6 +254,7 @@ mod with_alloc { use super::*; impl XxHash3_64 { + /// Constructs the hasher using the default seed and secret values. pub fn new() -> Self { Self { inner: RawHasher::allocate_default(), @@ -247,6 +262,8 @@ mod with_alloc { } } + /// Constructs the hasher using the provided seed and a secret + /// derived from the seed. pub fn with_seed(seed: u64) -> Self { Self { inner: RawHasher::allocate_with_seed(seed), @@ -254,6 +271,7 @@ mod with_alloc { } } + /// Constructs the hasher using the provided seed and secret. pub fn with_seed_and_secret( seed: u64, secret: impl Into>, @@ -486,6 +504,8 @@ pub struct RawHasher { } impl RawHasher { + /// Construct the hasher with the provided seed, secret, and + /// temporary buffer. pub fn new(secret_buffer: SecretBuffer) -> Self { Self { secret_buffer, diff --git a/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs index db55597ed..9741fabb0 100644 --- a/src/xxhash3_64/secret.rs +++ b/src/xxhash3_64/secret.rs @@ -2,6 +2,7 @@ use core::{hint::assert_unchecked, mem}; use super::SliceBackport as _; +/// The minimum length of a secret. pub const SECRET_MINIMUM_LENGTH: usize = 136; #[repr(transparent)] From 4bebba7ececf7db832a656f318e6dfc03e6b794e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 14:07:26 -0400 Subject: [PATCH 139/166] error --- compare/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index ddda2fea2..44e171c45 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -292,7 +292,7 @@ mod xxhash3_64 { fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult { let native = c::XxHash3_64::oneshot_with_secret(secret, data); - let rust = rust::XxHash3_64::oneshot_with_secret(secret, data); + let rust = rust::XxHash3_64::oneshot_with_secret(secret, data).unwrap(); prop_assert_eq!(native, rust); Ok(()) From dec40462cacaec313a7c1a69459a15d52cf782bb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 21 Aug 2024 14:07:45 -0400 Subject: [PATCH 140/166] Rename --- asmasm/src/main.rs | 4 ++-- compare/benches/benchmark.rs | 2 +- src/lib.rs | 2 +- src/xxhash3_64.rs | 34 +++++++++++++++++----------------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/asmasm/src/main.rs b/asmasm/src/main.rs index b1244716b..0561ba274 100644 --- a/asmasm/src/main.rs +++ b/asmasm/src/main.rs @@ -1,6 +1,6 @@ -use std::{hash::Hasher, time::Instant}; +use std::{hash::Hasher as _, time::Instant}; use xx_hash_sys::XxHash3_64 as C; -use xx_renu::xxhash3_64::XxHash3_64; +use xx_renu::XxHash3_64; fn main() { let filename = std::env::args().nth(1).expect("filename"); diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 0affcbb89..0e48f85a4 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -1,6 +1,6 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use rand::{Rng, RngCore, SeedableRng}; -use std::{hash::Hasher, iter}; +use std::{hash::Hasher as _, iter}; use xx_hash_sys as c; use xx_renu as rust; diff --git a/src/lib.rs b/src/lib.rs index ad243166d..524a2cc5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -97,7 +97,7 @@ pub mod xxhash3_64; #[cfg(feature = "xxhash3_64")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] -pub use xxhash3_64::XxHash3_64; +pub use xxhash3_64::Hasher as XxHash3_64; trait IntoU32 { fn into_u32(self) -> u32; diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index ab46d0e88..504f9b0fe 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -66,13 +66,13 @@ const DEFAULT_SECRET_RAW: DefaultSecret = [ const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; /// Calculates the 64-bit hash. -pub struct XxHash3_64 { +pub struct Hasher { #[cfg(feature = "alloc")] inner: with_alloc::AllocRawHasher, _private: (), } -impl XxHash3_64 { +impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] @@ -253,7 +253,7 @@ mod with_alloc { use super::*; - impl XxHash3_64 { + impl Hasher { /// Constructs the hasher using the default seed and secret values. pub fn new() -> Self { Self { @@ -288,13 +288,13 @@ mod with_alloc { } } - impl Default for XxHash3_64 { + impl Default for Hasher { fn default() -> Self { Self::new() } } - impl hash::Hasher for XxHash3_64 { + impl hash::Hasher for Hasher { #[inline] fn write(&mut self, input: &[u8]) { self.inner.write(input) @@ -491,7 +491,7 @@ impl StripeAccumulator { /// A lower-level interface for computing a hash from streaming data. /// /// The algorithm requires a secret which can be a reasonably large -/// piece of data. [`XxHash3_64`][] makes one concrete implementation +/// piece of data. [`Hasher`][] makes one concrete implementation /// decision that uses dynamic memory allocation, but specialized /// usages may desire more flexibility. This type, combined with /// [`SecretBuffer`][], offer that flexibility at the cost of a @@ -1303,7 +1303,7 @@ impl SliceBackport for [T] { #[cfg(test)] mod test { - use std::{array, hash::Hasher}; + use std::{array, hash::Hasher as _}; use super::*; @@ -1340,7 +1340,7 @@ mod test { } fn hash_byte_by_byte(input: &[u8]) -> u64 { - let mut hasher = XxHash3_64::new(); + let mut hasher = Hasher::new(); for byte in input.chunks(1) { hasher.write(byte) } @@ -1348,7 +1348,7 @@ mod test { } fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 { - let mut hasher = XxHash3_64::with_seed(seed); + let mut hasher = Hasher::with_seed(seed); for byte in input.chunks(1) { hasher.write(byte) } @@ -1357,7 +1357,7 @@ mod test { #[test] fn oneshot_empty() { - let hash = XxHash3_64::oneshot(&[]); + let hash = Hasher::oneshot(&[]); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } @@ -1369,7 +1369,7 @@ mod test { #[test] fn oneshot_1_to_3_bytes() { - test_1_to_3_bytes(XxHash3_64::oneshot) + test_1_to_3_bytes(Hasher::oneshot) } #[test] @@ -1395,7 +1395,7 @@ mod test { #[test] fn oneshot_4_to_8_bytes() { - test_4_to_8_bytes(XxHash3_64::oneshot) + test_4_to_8_bytes(Hasher::oneshot) } #[test] @@ -1423,7 +1423,7 @@ mod test { #[test] fn oneshot_9_to_16_bytes() { - test_9_to_16_bytes(XxHash3_64::oneshot) + test_9_to_16_bytes(Hasher::oneshot) } #[test] @@ -1454,7 +1454,7 @@ mod test { #[test] fn oneshot_17_to_128_bytes() { - test_17_to_128_bytes(XxHash3_64::oneshot) + test_17_to_128_bytes(Hasher::oneshot) } #[test] @@ -1496,7 +1496,7 @@ mod test { #[test] fn oneshot_129_to_240_bytes() { - test_129_to_240_bytes(XxHash3_64::oneshot) + test_129_to_240_bytes(Hasher::oneshot) } #[test] @@ -1530,7 +1530,7 @@ mod test { #[test] fn oneshot_241_plus_bytes() { - test_241_plus_bytes(XxHash3_64::oneshot) + test_241_plus_bytes(Hasher::oneshot) } #[test] @@ -1559,7 +1559,7 @@ mod test { #[test] fn oneshot_with_seed() { - test_with_seed(XxHash3_64::oneshot_with_seed) + test_with_seed(Hasher::oneshot_with_seed) } #[test] From 00c5464632b7357e3fa0a61bc7124005b660decb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 22 Aug 2024 12:26:33 -0400 Subject: [PATCH 141/166] Generate comparison graphs from benchmarks --- compare/Cargo.toml | 2 +- compare/README.md | 202 ++++++++++++ compare/benches/benchmark.rs | 305 +++++++++--------- compare/benchmark.sh | 56 ++++ compare/generate-graph.R | 150 +++++++++ compare/prepare-data.jq | 21 ++ .../results/xxhash3_64-streaming-aarch64.svg | 174 ++++++++++ .../results/xxhash3_64-streaming-x86_64.svg | 200 ++++++++++++ .../results/xxhash3_64-tiny_data-aarch64.svg | 126 ++++++++ .../results/xxhash3_64-tiny_data-x86_64.svg | 146 +++++++++ .../results/xxhash64-streaming-aarch64.svg | 116 +++++++ compare/results/xxhash64-streaming-x86_64.svg | 116 +++++++ .../results/xxhash64-tiny_data-aarch64.svg | 120 +++++++ compare/results/xxhash64-tiny_data-x86_64.svg | 120 +++++++ compare/svgo.config.js | 12 + 15 files changed, 1715 insertions(+), 151 deletions(-) create mode 100644 compare/README.md create mode 100755 compare/benchmark.sh create mode 100755 compare/generate-graph.R create mode 100755 compare/prepare-data.jq create mode 100644 compare/results/xxhash3_64-streaming-aarch64.svg create mode 100644 compare/results/xxhash3_64-streaming-x86_64.svg create mode 100644 compare/results/xxhash3_64-tiny_data-aarch64.svg create mode 100644 compare/results/xxhash3_64-tiny_data-x86_64.svg create mode 100644 compare/results/xxhash64-streaming-aarch64.svg create mode 100644 compare/results/xxhash64-streaming-x86_64.svg create mode 100644 compare/results/xxhash64-tiny_data-aarch64.svg create mode 100644 compare/results/xxhash64-tiny_data-x86_64.svg create mode 100644 compare/svgo.config.js diff --git a/compare/Cargo.toml b/compare/Cargo.toml index f71c802db..b6495a7b3 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -11,7 +11,7 @@ name = "benchmark" harness = false [dependencies] -criterion = { version = "0.5.1", features = ["real_blackbox"] } +criterion = { version = "0.5.1", features = [] } proptest = "1.5.0" rand = "0.8.5" twox-hash = "1.6.3" diff --git a/compare/README.md b/compare/README.md new file mode 100644 index 000000000..efb868e2b --- /dev/null +++ b/compare/README.md @@ -0,0 +1,202 @@ +# Overview + +Tests compare calling [the reference implementation in +C](https://xxhash.com) against equivalent functions in this crate. No +link-time optimization (LTO) is used, so the C performance numbers +have additional overhead for each function call. + +Click any graph to see it full-size. + +# XXHash64 + +## Oneshot hashing + +Compares the **speed** of hashing an entire buffer of data in one +function call. Data sizes from 256 KiB to 4 MiB are tested. These +graphs are boring flat lines, so a table is used instead. + +### aarch64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 13.4 | +| C | 13.4 | + +## x86_64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 15.7 | +| C | 15.8 | + + +## Streaming data + +Compares the **speed** of hashing a 1 MiB buffer of data split into +various chunk sizes. + +### aarch64 + + + XXHash64, streaming data, on an aarch64 processor + + +### x86_64 + + + XXHash64, streaming data, on an x86_64 processor + + +## Small amounts of data + +Compares the **time taken** to hash 0 to 32 bytes of data. + +### aarch64 + + + XXHash64, small data, on an aarch64 processor + + +### x86_64 + + + XXHash64, small data, on an x86_64 processor + + + +# XXHash3 (64-bit) + +## Oneshot hashing + +Compares the **speed** of hashing an entire buffer of data in one +function call. Data sizes from 256 KiB to 4 MiB are tested. These +graphs are boring flat lines, so a table is used instead. + +### aarch64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 34.8 | +| C | 34.8 | +| C (scalar) | 21.0 | +| C (NEON) | 34.7 | + +### x86_64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 58.3 | +| C | 25.0 | +| C (scalar) | 7.5 | +| C (SSE2) | 25.1 | +| C (AVX2) | 58.1 | + +## Streaming data + +Compares the **speed** of hashing a 1 MiB buffer of data split into +various chunk sizes. + +### aarch64 + + + XXHash3, 64-bit, streaming data, on an aarch64 processor + + +### x86_64 + + + XXHash3, 64-bit, streaming data, on an x86_64 processor + + +## Small amounts of data + +Compares the **time taken** to hash 0 to 230 bytes of +data. Representative samples are taken from similar times to avoid +cluttering the graph and wasting benchmarking time. + +### aarch64 + + + XXHash3, 64-bit, small data, on an aarch64 processor + + +### x86_64 + + + XXHash3, 64-bit, small data, on an x86_64 processor + + +# Benchmark machines + +## Overview + +| CPU | Memory | C compiler | +|-------------------|--------|--------------------| +| Apple M1 Max | 64 GiB | clang 15.0.0 | +| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 | + +Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`. + +## Details + +### aarch64 + + + + + + + + + + + + + + + + +
CPUApple M1 Max
Memory64 GiB
C compilerApple clang version 15.0.0 (clang-1500.3.9.4)
+ +### x86_64 + + + + + + + + + + + + + + + + +
CPUAMD Ryzen 9 3950X 16-Core Processor, 3501 Mhz, 16 Core(s), 32 Logical Processor(s)
Memory32 GiB (3600 MT/s)
C compilerMicrosoft (R) C/C++ Optimizing Compiler Version 19.41.34120 for x86
diff --git a/compare/benches/benchmark.rs b/compare/benches/benchmark.rs index 0e48f85a4..acf5ab8ec 100644 --- a/compare/benches/benchmark.rs +++ b/compare/benches/benchmark.rs @@ -1,79 +1,94 @@ -use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use criterion::{ + criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, Throughput, +}; use rand::{Rng, RngCore, SeedableRng}; -use std::{hash::Hasher as _, iter}; +use std::{env::consts::ARCH, hash::Hasher as _, iter}; use xx_hash_sys as c; use xx_renu as rust; -const TINY_DATA_SIZE: usize = 32; const BIG_DATA_SIZE: usize = 4 * 1024 * 1024; const MIN_BIG_DATA_SIZE: usize = 256 * 1024; -const MAX_CHUNKS: usize = 64; const SEED: u64 = 0xc651_4843_1995_363f; -fn tiny_data(c: &mut Criterion) { - let (seed, data) = gen_data(TINY_DATA_SIZE); - let mut g = c.benchmark_group("tiny_data"); - - for size in 0..=data.len() { - let data = &data[..size]; - g.throughput(Throughput::Bytes(data.len() as _)); - - let id = format!("impl-c/fn-oneshot/size-{size:02}"); - g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data))); - - let id = format!("impl-c/fn-streaming/size-{size:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = c::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }) - }); - - let id = format!("impl-rust/fn-oneshot/size-{size:02}"); - g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data))); - - let id = format!("impl-rust/fn-streaming/size-{size:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = rust::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }) - }); +trait CriterionExt { + fn my_benchmark_group(&mut self, algo: &str, bench: &str) -> BenchmarkGroup<'_, WallTime>; +} + +impl CriterionExt for Criterion { + fn my_benchmark_group(&mut self, algo: &str, bench: &str) -> BenchmarkGroup<'_, WallTime> { + self.benchmark_group(format!("arch-{ARCH}/algo-{algo}/bench-{bench}")) } +} - g.finish(); +fn gen_data(length: usize) -> (u64, Vec) { + let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); + + let seed = rng.gen(); + + let mut data = vec![0; length]; + rng.fill_bytes(&mut data); + + (seed, data) } -fn oneshot(c: &mut Criterion) { - let (seed, data) = gen_data(BIG_DATA_SIZE); - let mut g = c.benchmark_group("oneshot"); +fn half_sizes(max: usize) -> impl Iterator { + iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) }) +} - for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { - let data = &data[..size]; - g.throughput(Throughput::Bytes(data.len() as _)); +mod xxhash64 { + use super::*; - let id = format!("impl-c/size-{size:07}"); - g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data))); + const TINY_DATA_SIZE: usize = 32; - let id = format!("impl-rust/size-{size:07}"); - g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data))); + fn tiny_data(c: &mut Criterion) { + let (seed, data) = gen_data(TINY_DATA_SIZE); + let mut g = c.my_benchmark_group("xxhash64", "tiny_data"); + + for size in 0..=data.len() { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); + + let id = format!("impl-c/size-{size:02}"); + g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data))); + + let id = format!("impl-rust/size-{size:02}"); + g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data))); + } + + g.finish(); } - g.finish(); -} + fn oneshot(c: &mut Criterion) { + let (seed, data) = gen_data(BIG_DATA_SIZE); + let mut g = c.my_benchmark_group("xxhash64", "oneshot"); + + for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { + let data = &data[..size]; + g.throughput(Throughput::Bytes(data.len() as _)); -fn streaming(c: &mut Criterion) { - let mut g = c.benchmark_group("streaming_many_chunks"); + let id = format!("impl-c/size-{size:07}"); + g.bench_function(id, |b| b.iter(|| c::XxHash64::oneshot(seed, data))); + + let id = format!("impl-rust/size-{size:07}"); + g.bench_function(id, |b| b.iter(|| rust::XxHash64::oneshot(seed, data))); + } + + g.finish(); + } + + fn streaming(c: &mut Criterion) { + let mut g = c.my_benchmark_group("xxhash64", "streaming"); + + let size = 1024 * 1024; + let (seed, data) = gen_data(size); + + for chunk_size in half_sizes(size) { + let chunks = data.chunks(chunk_size).collect::>(); - for size in half_sizes(BIG_DATA_SIZE).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { - for n_chunks in half_sizes(MAX_CHUNKS) { - let (seed, chunks) = gen_chunked_data(size, n_chunks); g.throughput(Throughput::Bytes(size as _)); - let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}"); + let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:02}"); g.bench_function(id, |b| { b.iter(|| { let mut hasher = c::XxHash64::with_seed(seed); @@ -84,7 +99,7 @@ fn streaming(c: &mut Criterion) { }) }); - let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}"); + let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:02}"); g.bench_function(id, |b| { b.iter(|| { let mut hasher = rust::XxHash64::with_seed(seed); @@ -95,47 +110,11 @@ fn streaming(c: &mut Criterion) { }) }); } - } - - g.finish(); -} - -fn gen_data(length: usize) -> (u64, Vec) { - let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); - - let seed = rng.gen(); - - let mut data = vec![0; length]; - rng.fill_bytes(&mut data); - - (seed, data) -} - -fn gen_chunked_data(length: usize, n_chunks: usize) -> (u64, Vec>) { - assert!(length >= n_chunks); - - let mut rng = rand::rngs::StdRng::seed_from_u64(SEED); - - let seed = rng.gen(); - - let chunk_size = length / n_chunks; - - let mut total = 0; - let mut chunks = Vec::with_capacity(2 * n_chunks); - - while total < length { - let mut data = vec![0; chunk_size]; - rng.fill_bytes(&mut data); - total += data.len(); - chunks.push(data) + g.finish(); } - (seed, chunks) -} - -fn half_sizes(max: usize) -> impl Iterator { - iter::successors(Some(max), |&v| if v == 1 { None } else { Some(v / 2) }) + criterion_group!(benches, tiny_data, oneshot, streaming); } mod xxhash3_64 { @@ -143,7 +122,7 @@ mod xxhash3_64 { fn tiny_data(c: &mut Criterion) { let (seed, data) = gen_data(240); - let mut g = c.benchmark_group("xxhash3_64/tiny_data"); + let mut g = c.my_benchmark_group("xxhash3_64", "tiny_data"); // let categories = 0..=data.len(); @@ -157,12 +136,38 @@ mod xxhash3_64 { let data = &data[..size]; g.throughput(Throughput::Bytes(data.len() as _)); - let id = format!("impl-c/fn-oneshot/size-{size:03}"); + let id = format!("impl-c/size-{size:03}"); g.bench_function(id, |b| { b.iter(|| c::XxHash3_64::oneshot_with_seed(seed, data)) }); - let id = format!("impl-rust/fn-oneshot/size-{size:03}"); + let id = format!("impl-c-scalar/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::scalar::XxHash3_64::oneshot_with_seed(seed, data)) + }); + + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::neon::XxHash3_64::oneshot_with_seed(seed, data)) + }); + } + + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::avx2::XxHash3_64::oneshot_with_seed(seed, data)) + }); + + let id = format!("impl-c-sse2/size-{size:03}"); + g.bench_function(id, |b| { + b.iter(|| c::sse2::XxHash3_64::oneshot_with_seed(seed, data)) + }); + } + + let id = format!("impl-rust/size-{size:03}"); g.bench_function(id, |b| { b.iter(|| rust::XxHash3_64::oneshot_with_seed(seed, data)) }); @@ -173,7 +178,7 @@ mod xxhash3_64 { fn oneshot(c: &mut Criterion) { let (seed, data) = gen_data(BIG_DATA_SIZE); - let mut g = c.benchmark_group("xxhash3_64/oneshot"); + let mut g = c.my_benchmark_group("xxhash3_64", "oneshot"); for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) { let data = &data[..size]; @@ -220,28 +225,58 @@ mod xxhash3_64 { } fn streaming(c: &mut Criterion) { - let mut g = c.benchmark_group("xxhash3_64/streaming_many_chunks"); + let mut g = c.my_benchmark_group("xxhash3_64", "streaming"); + + let size = 1024 * 1024; + let (seed, data) = gen_data(size); + + for chunk_size in half_sizes(size) { + let chunks = data.chunks(chunk_size).collect::>(); + + g.throughput(Throughput::Bytes(size as _)); - for size in [1024 * 1024] { - for n_chunks in half_sizes(size) { - let (seed, chunks) = gen_chunked_data(size, n_chunks); - g.throughput(Throughput::Bytes(size as _)); + let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); + + let id = format!("impl-c-scalar/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = c::scalar::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); - let id = format!("impl-c/size-{size:07}/chunks-{n_chunks:02}"); + #[cfg(target_arch = "aarch64")] + { + let id = format!("impl-c-neon/size-{size:07}/chunk_size-{chunk_size:07}"); g.bench_function(id, |b| { b.iter(|| { - let mut hasher = c::XxHash3_64::with_seed(seed); + let mut hasher = c::neon::XxHash3_64::with_seed(seed); for chunk in &chunks { hasher.write(chunk); } hasher.finish() }) }); + } - let id = format!("impl-c-scalar/size-{size:07}/chunks-{n_chunks:02}"); + #[cfg(target_arch = "x86_64")] + { + let id = format!("impl-c-avx2/size-{size:07}/chunk_size-{chunk_size:07}"); g.bench_function(id, |b| { b.iter(|| { - let mut hasher = c::scalar::XxHash3_64::with_seed(seed); + let mut hasher = c::avx2::XxHash3_64::with_seed(seed); for chunk in &chunks { hasher.write(chunk); } @@ -249,49 +284,10 @@ mod xxhash3_64 { }) }); - #[cfg(target_arch = "aarch64")] - { - let id = format!("impl-c-neon/size-{size:07}/chunks-{n_chunks:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = c::neon::XxHash3_64::with_seed(seed); - for chunk in &chunks { - hasher.write(chunk); - } - hasher.finish() - }) - }); - } - - #[cfg(target_arch = "x86_64")] - { - let id = format!("impl-c-avx2/size-{size:07}/chunks-{n_chunks:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = c::avx2::XxHash3_64::with_seed(seed); - for chunk in &chunks { - hasher.write(chunk); - } - hasher.finish() - }) - }); - - let id = format!("impl-c-sse2/size-{size:07}/chunks-{n_chunks:02}"); - g.bench_function(id, |b| { - b.iter(|| { - let mut hasher = c::sse2::XxHash3_64::with_seed(seed); - for chunk in &chunks { - hasher.write(chunk); - } - hasher.finish() - }) - }); - } - - let id = format!("impl-rust/size-{size:07}/chunks-{n_chunks:02}"); + let id = format!("impl-c-sse2/size-{size:07}/chunk_size-{chunk_size:07}"); g.bench_function(id, |b| { b.iter(|| { - let mut hasher = rust::XxHash3_64::with_seed(seed); + let mut hasher = c::sse2::XxHash3_64::with_seed(seed); for chunk in &chunks { hasher.write(chunk); } @@ -299,6 +295,17 @@ mod xxhash3_64 { }) }); } + + let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:07}"); + g.bench_function(id, |b| { + b.iter(|| { + let mut hasher = rust::XxHash3_64::with_seed(seed); + for chunk in &chunks { + hasher.write(chunk); + } + hasher.finish() + }) + }); } g.finish(); @@ -307,6 +314,4 @@ mod xxhash3_64 { criterion_group!(benches, tiny_data, oneshot, streaming); } -criterion_group!(benches, tiny_data, oneshot, streaming); - -criterion_main!(benches, xxhash3_64::benches); +criterion_main!(xxhash64::benches, xxhash3_64::benches); diff --git a/compare/benchmark.sh b/compare/benchmark.sh new file mode 100755 index 000000000..145cabbcd --- /dev/null +++ b/compare/benchmark.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -eu + +SCRIPT_INVOKED_AS="${0}" +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + +temp_dir=$(mktemp -d) + +function capture() { + subset="${1:-}" + + raw_data="${temp_dir}/raw-data.streaming-json" + + cargo criterion -p compare --message-format=json -- "${subset}" > "${raw_data}" + + echo "Raw benchmark data captured to ${raw_data}" + echo "Next, run \`${SCRIPT_INVOKED_AS} analyze ${raw_data}\`" +} + +function analyze() { + cleaned_data="${temp_dir}/cleaned-data.streaming-json" + + # Capture our input to keep things consistent + cp "${@}" "${temp_dir}" + + "${SCRIPT_DIR}/prepare-data.jq" "${@}" > "${cleaned_data}" + "${SCRIPT_DIR}/generate-graph.R" "${cleaned_data}" "${temp_dir}" + + svgo \ + --quiet \ + --config "${SCRIPT_DIR}/svgo.config.js" \ + --multipass \ + --pretty \ + --indent 2 \ + --final-newline \ + --recursive \ + "${temp_dir}" + + echo "Graphs saved in ${temp_dir}" +} + +mode="${1:-}" +case "${mode}" in + capture) + capture "${@:2}" + ;; + + analyze) + analyze "${@:2}" + ;; + + *) + echo "Unknown command '${mode}'" + exit 1 + ;; +esac diff --git a/compare/generate-graph.R b/compare/generate-graph.R new file mode 100755 index 000000000..aacf9b09b --- /dev/null +++ b/compare/generate-graph.R @@ -0,0 +1,150 @@ +#!/usr/bin/env Rscript + +library(forcats) +library(ggplot2) +library(nlme) +library(rlang) +library(scales) + +args = commandArgs(trailingOnly = TRUE) + +filename = args[1] +output_dir = args[2] + +make_filename = function(algo, bench, arch) { + paste0(output_dir, "/", algo, "-", bench, "-", arch, ".svg") +} + +log2min = function(x) { 2 ^ floor(log2(min(x))) } +log2max = function(x) { 2 ^ ceiling(log2(max(x))) } + +MiB = 2^20 +GiB = 2^30 +TiB = 2^40 +powers_of_two = 2^(0:40) + +byte_labels_raw = label_bytes(units = "auto_binary") +byte_labels = function(x) { + l = byte_labels_raw(x) + l = gsub(" iB", " B", l) # Why would you call them "iB" + gsub(" kiB", " KiB", l) # That K should be capitalized +} +bytes_per_second_labels = function(x) { + paste0(byte_labels(x), "/sec") +} + +## Load the data +data = jsonlite::stream_in(file(filename), verbose = FALSE) + +## Reorder and rename the implementation factor +data$impl = fct_relevel(data$impl, "rust", "c", "c-scalar", "c-neon", "c-sse2", "c-avx2") +impl_names = c("rust" = "Rust", "c" = "C", "c-scalar" = "C (scalar)", "c-neon" = "C (NEON)", "c-sse2" = "C (SSE2)" , "c-avx2" = "C (AVX2)") +impl_name = function(n) { impl_names[n] } + +cpus = c(aarch64 = "Apple M1 Max", x86_64 = "AMD Ryzen 9 3950X") + +common_theme = theme(legend.position = "inside", legend.position.inside = c(0.8, 0.2), plot.margin = unit(c(0.1, 1, 0.1, 0.1), 'cm')) + +for (algo in c("xxhash64", "xxhash3_64")) { + message("# ", algo) + + algo_data = data[data$algo == algo,] + + all_tiny_data = algo_data[algo_data$bench == "tiny_data",] + all_oneshot = algo_data[algo_data$bench == "oneshot",] + all_streaming = algo_data[algo_data$bench == "streaming",] + + ## Convert to a duration type + all_tiny_data$mean_estimate = lubridate::dnanoseconds(all_tiny_data$mean_estimate) + + ## Get bytes per second; the time estimate is in nanoseconds + all_oneshot$throughput = all_oneshot$size/(all_oneshot$mean_estimate / 1e9) + + ## Get bytes per second; the time estimate is in nanoseconds + all_streaming$throughput = all_streaming$size / (all_streaming$mean_estimate / 1e9) + + tiny_data_y_limits = c(min(all_tiny_data$mean_estimate), max(all_tiny_data$mean_estimate)) + oneshot_y_limits = c(log2min(all_oneshot$throughput), log2max(all_oneshot$throughput)) + streaming_y_limits = c(log2min(all_streaming$throughput), log2max(all_streaming$throughput)) + + for (arch in c("aarch64", "x86_64")) { + message("## ", arch) + + oneshot = all_oneshot[all_oneshot$arch == arch,] + tiny_data = all_tiny_data[all_tiny_data$arch == arch,] + streaming = all_streaming[all_streaming$arch == arch,] + + cpu = cpus[arch] + subtitle = paste0(arch, " (", cpu, ")") + + if (nrow(tiny_data) != 0) { + message("### Tiny data") + + title = paste0("[", algo, "] Hashing small amounts of bytes (lower is better)") + + p = ggplot(tiny_data, aes(x = size, y = mean_estimate, colour = impl)) + + geom_point(alpha = 0.7) + + geom_line(alpha = 0.3) + + scale_x_continuous(labels = byte_labels) + + scale_y_time(labels = label_timespan(), limits = tiny_data_y_limits) + + scale_colour_brewer(labels = impl_name, palette = "Set1") + + labs(title = title, subtitle = subtitle, x = "Size", y = "Time", colour = "Implementation") + + common_theme + + output_filename = make_filename(algo = algo, bench = "tiny_data", arch = arch) + ggsave(output_filename, width = 3000, height = 2000, units = "px", scale = 1.5) + } + + if (nrow(oneshot) != 0) { + message("### Oneshot") + + fit = lmList(throughput ~ size | impl, data = oneshot, pool = FALSE, na.action = na.pass) + coef = as.data.frame(t(sapply(fit, coefficients))) + speeds = round(coef$"(Intercept)" / GiB, digits = 1) + names(speeds) = rownames(coef) + + impl_name_and_speed = function(n) { + name = impl_name(n) + paste(name, "—", speeds[n], "GiB/sec") + } + + title = paste0("[", algo, "] Throughput to hash a buffer (higher is better)") + + p = ggplot(oneshot, aes(x = size, y = throughput, colour = impl)) + + geom_point(alpha = 0.7) + + geom_line(alpha = 0.3) + + scale_x_continuous(transform = transform_log2(), labels = byte_labels, minor_breaks = NULL) + + scale_y_continuous(transform = transform_log2(), labels = bytes_per_second_labels, breaks = powers_of_two, minor_breaks = NULL, limits = oneshot_y_limits) + + scale_colour_brewer(labels = impl_name_and_speed, palette = "Set1") + + labs(title = title, subtitle = subtitle, x = "Buffer Size", y = "Throughput", colour = "Implementation") + + common_theme + + output_filename = make_filename(algo = algo, bench = "oneshot", arch = arch) + ggsave(output_filename, width = 3000, height = 2000, units = "px", scale = 1.5) + + speeds_table = data.frame(speeds) + rownames(speeds_table) = impl_names[rownames(speeds_table)] + print(speeds_table) + } + + if (nrow(streaming) != 0) { + message("### Streaming") + + title = paste0("[", algo, "] Throughput of a 1 MiB buffer by chunk size (higher is better)") + + p = ggplot(streaming, aes(x = chunk_size, y = throughput, colour = impl)) + + geom_point(alpha = 0.7) + + geom_line(alpha = 0.3) + + scale_x_continuous(transform = transform_log2(), labels = byte_labels, breaks = powers_of_two, minor_breaks = NULL) + + scale_y_continuous(transform = transform_log2(), labels = bytes_per_second_labels, breaks = powers_of_two, minor_breaks = NULL, limits = streaming_y_limits) + + scale_colour_brewer(palette = "Set1", labels = impl_name) + + labs(title = title , subtitle = subtitle, x = "Chunk Size", y = "Throughput", colour = "Implementation") + + common_theme + + output_filename = make_filename(algo = algo, bench = "streaming", arch = arch) + ggsave(output_filename, width = 3000, height = 2000, units = "px", scale = 1.5) + } + } +} + +warnings() diff --git a/compare/prepare-data.jq b/compare/prepare-data.jq new file mode 100755 index 000000000..99795add5 --- /dev/null +++ b/compare/prepare-data.jq @@ -0,0 +1,21 @@ +#!/usr/bin/env jq --from-file --compact-output + +select(.reason == "benchmark-complete") | + # Split the ID string into separate fields + ( + .id + | split("/") + | map(split("-") | { key: .[0], value: .[1:] | join("-")}) + | from_entries + # Clean up the separate fields + | .size |= tonumber + | if .chunk_size then .chunk_size |= tonumber end + ) + + + + + # Add the benchmark numbers + { + throughput: .throughput[0].per_iteration, + mean_estimate: .mean.estimate, + } diff --git a/compare/results/xxhash3_64-streaming-aarch64.svg b/compare/results/xxhash3_64-streaming-aarch64.svg new file mode 100644 index 000000000..8e21742b4 --- /dev/null +++ b/compare/results/xxhash3_64-streaming-aarch64.svg @@ -0,0 +1,174 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec + 16 MiB/sec + 32 MiB/sec + 64 MiB/sec + 128 MiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec + 32 GiB/sec + 64 GiB/sec + + 1 B + 2 B + 4 B + 8 B + 16 B + 32 B + 64 B + 128 B + 256 B + 512 B + 1 KiB + 2 KiB + 4 KiB + 8 KiB + 16 KiB + 32 KiB + 64 KiB + 128 KiB + 256 KiB + 512 KiB + 1 MiB + 2 MiB + Chunk Size + Throughput + + Implementation + + + + + + + + + + + + + Rust + C + C (scalar) + C (NEON) + aarch64 (Apple M1 Max) + [xxhash3_64] Throughput of a 1 MiB buffer by chunk size (higher is better) + + diff --git a/compare/results/xxhash3_64-streaming-x86_64.svg b/compare/results/xxhash3_64-streaming-x86_64.svg new file mode 100644 index 000000000..38428ce59 --- /dev/null +++ b/compare/results/xxhash3_64-streaming-x86_64.svg @@ -0,0 +1,200 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec + 16 MiB/sec + 32 MiB/sec + 64 MiB/sec + 128 MiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec + 32 GiB/sec + 64 GiB/sec + + 1 B + 2 B + 4 B + 8 B + 16 B + 32 B + 64 B + 128 B + 256 B + 512 B + 1 KiB + 2 KiB + 4 KiB + 8 KiB + 16 KiB + 32 KiB + 64 KiB + 128 KiB + 256 KiB + 512 KiB + 1 MiB + 2 MiB + Chunk Size + Throughput + + Implementation + + + + + + + + + + + + + + + + Rust + C + C (scalar) + C (SSE2) + C (AVX2) + x86_64 (AMD Ryzen 9 3950X) + [xxhash3_64] Throughput of a 1 MiB buffer by chunk size (higher is better) + + diff --git a/compare/results/xxhash3_64-tiny_data-aarch64.svg b/compare/results/xxhash3_64-tiny_data-aarch64.svg new file mode 100644 index 000000000..50e2a7f14 --- /dev/null +++ b/compare/results/xxhash3_64-tiny_data-aarch64.svg @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4ns + 8ns + 12ns + 16ns + 20ns + 24ns + + 0 B + 50 B + 100 B + 150 B + 200 B + Size + Time + + Implementation + + + + + + + + + + + + + Rust + C + C (scalar) + C (NEON) + aarch64 (Apple M1 Max) + [xxhash3_64] Hashing small amounts of bytes (lower is better) + + diff --git a/compare/results/xxhash3_64-tiny_data-x86_64.svg b/compare/results/xxhash3_64-tiny_data-x86_64.svg new file mode 100644 index 000000000..671c7e0d7 --- /dev/null +++ b/compare/results/xxhash3_64-tiny_data-x86_64.svg @@ -0,0 +1,146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4ns + 8ns + 12ns + 16ns + 20ns + 24ns + + 0 B + 50 B + 100 B + 150 B + 200 B + Size + Time + + Implementation + + + + + + + + + + + + + + + + Rust + C + C (scalar) + C (SSE2) + C (AVX2) + x86_64 (AMD Ryzen 9 3950X) + [xxhash3_64] Hashing small amounts of bytes (lower is better) + + diff --git a/compare/results/xxhash64-streaming-aarch64.svg b/compare/results/xxhash64-streaming-aarch64.svg new file mode 100644 index 000000000..effd1c847 --- /dev/null +++ b/compare/results/xxhash64-streaming-aarch64.svg @@ -0,0 +1,116 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec + 32 GiB/sec + + 1 B + 2 B + 4 B + 8 B + 16 B + 32 B + 64 B + 128 B + 256 B + 512 B + 1 KiB + 2 KiB + 4 KiB + 8 KiB + 16 KiB + 32 KiB + 64 KiB + 128 KiB + 256 KiB + 512 KiB + 1 MiB + 2 MiB + Chunk Size + Throughput + + Implementation + + + + + + + Rust + C + aarch64 (Apple M1 Max) + [xxhash64] Throughput of a 1 MiB buffer by chunk size (higher is better) + + diff --git a/compare/results/xxhash64-streaming-x86_64.svg b/compare/results/xxhash64-streaming-x86_64.svg new file mode 100644 index 000000000..636c7eafc --- /dev/null +++ b/compare/results/xxhash64-streaming-x86_64.svg @@ -0,0 +1,116 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec + 32 GiB/sec + + 1 B + 2 B + 4 B + 8 B + 16 B + 32 B + 64 B + 128 B + 256 B + 512 B + 1 KiB + 2 KiB + 4 KiB + 8 KiB + 16 KiB + 32 KiB + 64 KiB + 128 KiB + 256 KiB + 512 KiB + 1 MiB + 2 MiB + Chunk Size + Throughput + + Implementation + + + + + + + Rust + C + x86_64 (AMD Ryzen 9 3950X) + [xxhash64] Throughput of a 1 MiB buffer by chunk size (higher is better) + + diff --git a/compare/results/xxhash64-tiny_data-aarch64.svg b/compare/results/xxhash64-tiny_data-aarch64.svg new file mode 100644 index 000000000..d732a33eb --- /dev/null +++ b/compare/results/xxhash64-tiny_data-aarch64.svg @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 2ns + 4ns + 6ns + 8ns + + 0 B + 10 B + 20 B + 30 B + Size + Time + + Implementation + + + + + + + Rust + C + aarch64 (Apple M1 Max) + [xxhash64] Hashing small amounts of bytes (lower is better) + + diff --git a/compare/results/xxhash64-tiny_data-x86_64.svg b/compare/results/xxhash64-tiny_data-x86_64.svg new file mode 100644 index 000000000..cdbb6396d --- /dev/null +++ b/compare/results/xxhash64-tiny_data-x86_64.svg @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 2ns + 4ns + 6ns + 8ns + + 0 B + 10 B + 20 B + 30 B + Size + Time + + Implementation + + + + + + + Rust + C + x86_64 (AMD Ryzen 9 3950X) + [xxhash64] Hashing small amounts of bytes (lower is better) + + diff --git a/compare/svgo.config.js b/compare/svgo.config.js new file mode 100644 index 000000000..1a7c31349 --- /dev/null +++ b/compare/svgo.config.js @@ -0,0 +1,12 @@ +module.exports = { + plugins: [ + { + name: 'preset-default', + params: { + overrides: { + minifyStyles: false, + }, + }, + }, + ], +}; From b0b21c7b8a9acc316ddb6a2cbee80ec4ea6b4189 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 26 Aug 2024 21:04:54 -0400 Subject: [PATCH 142/166] test secret and seed --- compare/src/lib.rs | 45 +++++++++++++++++++++++++++++++++++++++++- src/xxhash3_64.rs | 32 ++++++++++++++++++++++++------ xx_hash-sys/src/lib.rs | 37 ++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 7 deletions(-) diff --git a/compare/src/lib.rs b/compare/src/lib.rs index 44e171c45..a4bdbe0bd 100644 --- a/compare/src/lib.rs +++ b/compare/src/lib.rs @@ -237,10 +237,15 @@ mod xxhash3_64 { } #[test] - fn oneshot_with_a_secret(secret in prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024), data: Vec) { + fn oneshot_with_a_secret(secret in secret(), data: Vec) { oneshot_with_secret_impl(&secret, &data)?; } + #[test] + fn oneshot_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec) { + oneshot_with_seed_and_secret_impl(seed, &secret, &data)?; + } + #[test] fn streaming_one_chunk(seed: u64, data: Vec) { streaming_one_chunk_impl(seed, &data)?; @@ -250,6 +255,11 @@ mod xxhash3_64 { fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { streaming_one_chunk_impl(seed, &data[offset..])?; } + + #[test] + fn streaming_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec) { + streaming_with_seed_and_secret_impl(seed, &secret, &data)?; + } } fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { @@ -298,6 +308,14 @@ mod xxhash3_64 { Ok(()) } + fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data); + let rust = rust::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data).unwrap(); + + prop_assert_eq!(native, rust); + Ok(()) + } + fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { let native = { let mut hasher = c::XxHash3_64::with_seed(seed); @@ -314,6 +332,31 @@ mod xxhash3_64 { prop_assert_eq!(native, rust); Ok(()) } + + fn streaming_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret); + for chunk in data.chunks(256) { + hasher.write(chunk); + } + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash3_64::with_seed_and_secret(seed, secret).unwrap(); + for chunk in data.chunks(256) { + hasher.write(chunk); + } + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn secret() -> impl Strategy> { + prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024) + } } fn vec_and_index() -> impl Strategy, usize)> { diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 504f9b0fe..217ebce56 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -40,6 +40,8 @@ const PRIME64_5: u64 = 0x27D4EB2F165667C5; const PRIME_MX1: u64 = 0x165667919E3779F9; const PRIME_MX2: u64 = 0x9FB21C651E98DF25; +const CUTOFF: usize = 240; + const DEFAULT_SEED: u64 = 0; /// The length of the default secret. @@ -91,7 +93,7 @@ impl Hasher { // We know that the secret will only be used if we have more // than 240 bytes, so don't waste time computing it otherwise. - if input.len() > 240 { + if input.len() > CUTOFF { derive_secret(seed, &mut secret); } @@ -100,14 +102,32 @@ impl Hasher { impl_oneshot(secret, seed, input) } - /// Hash all data at once using the provided secret. If you can - /// use this function, you may see noticable speed gains for - /// certain types of input. + /// Hash all data at once using the provided secret and the + /// default seed. If you can use this function, you may see + /// noticable speed gains for certain types of input. #[inline] pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result { let secret = Secret::new(secret).map_err(OneshotWithSecretError)?; Ok(impl_oneshot(secret, DEFAULT_SEED, input)) } + + /// Hash all data at once using the provided seed and secret. If + /// you can use this function, you may see noticable speed gains + /// for certain types of input. + #[inline] + pub fn oneshot_with_seed_and_secret( + seed: u64, + secret: &[u8], + input: &[u8], + ) -> Result { + let secret = if input.len() > CUTOFF { + Secret::new(secret).map_err(OneshotWithSecretError)? + } else { + DEFAULT_SECRET + }; + + Ok(impl_oneshot(secret, seed, input)) + } } /// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] @@ -129,7 +149,7 @@ const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; type Buffer = [u8; BUFFERED_BYTES]; // Ensure that a full buffer always implies we are in the 241+ byte case. -const _: () = assert!(BUFFERED_BYTES > 240); +const _: () = assert!(BUFFERED_BYTES > CUTOFF); /// A buffer containing the secret bytes. /// @@ -762,7 +782,7 @@ where assert_unchecked(buffer_usage <= buffer.len()) }; - if total_bytes >= 241 { + if total_bytes > CUTOFF { let input = &buffer[..buffer_usage]; // Ingest final stripes diff --git a/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs index 6cf194ea1..cbe655c40 100644 --- a/xx_hash-sys/src/lib.rs +++ b/xx_hash-sys/src/lib.rs @@ -158,6 +158,13 @@ macro_rules! xxh3_64b_template { secret: *const libc::c_void, secret_length: libc::size_t, ) -> crate::XXH64_hash_t; + fn [<$prefix _64bits_withSecretandSeed>]( + input: *const libc::c_void, + length: libc::size_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH64_hash_t; fn [<$prefix _createState>]() -> *mut crate::XXH3_state_t; fn [<$prefix _64bits_reset>](state: *mut crate::XXH3_state_t) -> crate::XXH_errorcode; @@ -165,6 +172,12 @@ macro_rules! xxh3_64b_template { state: *mut crate::XXH3_state_t, seed: crate::XXH64_hash_t, ) -> crate::XXH_errorcode; + fn [<$prefix _64bits_reset_withSecretandSeed>]( + state: *mut crate::XXH3_state_t, + secret: *const libc::c_void, + secret_length: libc::size_t, + seed: crate::XXH64_hash_t, + ) -> crate::XXH_errorcode; fn [<$prefix _64bits_update>]( state: *mut crate::XXH3_state_t, buffer: *const libc::c_void, @@ -199,6 +212,19 @@ macro_rules! xxh3_64b_template { } } + #[inline] + pub fn oneshot_with_seed_and_secret(seed: u64, secret: &[u8], data: &[u8]) -> u64 { + unsafe { + [<$prefix _64bits_withSecretandSeed>]( + data.as_ptr().cast(), + data.len(), + secret.as_ptr().cast(), + secret.len(), + seed, + ) + } + } + #[inline] pub fn new() -> Self { let state = unsafe { @@ -221,6 +247,17 @@ macro_rules! xxh3_64b_template { Self(state) } + #[inline] + pub fn with_seed_and_secret(seed: u64, secret: &[u8]) -> Self { + let state = unsafe { + let state = [<$prefix _createState>](); + [<$prefix _64bits_reset_withSecretandSeed>](state, secret.as_ptr().cast(), secret.len(), seed); + state + }; + + Self(state) + } + #[inline] pub fn write(&mut self, data: &[u8]) { let retval = From 1a2b6580e2011e256ff1d78f0b5eba1cda162ab6 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 7 Oct 2024 13:30:45 -0400 Subject: [PATCH 143/166] Rename xx-renu as twox-hash --- renu/Cargo.toml | 15 +++++++++++++-- renu/asmasm/Cargo.toml | 2 +- renu/asmasm/src/main.rs | 2 +- renu/compare/Cargo.toml | 3 +-- renu/compare/benches/benchmark.rs | 2 +- renu/compare/src/lib.rs | 4 ++-- renu/renu-sum/Cargo.toml | 2 +- renu/renu-sum/src/main.rs | 2 +- renu/src/lib.rs | 10 +++++----- 9 files changed, 26 insertions(+), 16 deletions(-) diff --git a/renu/Cargo.toml b/renu/Cargo.toml index 5183cadfb..e62d39cd4 100644 --- a/renu/Cargo.toml +++ b/renu/Cargo.toml @@ -1,8 +1,19 @@ [package] -name = "xx-renu" -version = "0.1.0" +name = "twox-hash" +version = "1.6.3" +authors = ["Jake Goulding "] edition = "2021" +description = "A Rust implementation of the XXHash and XXH3 algorithms" +readme = "README.md" +keywords = ["hash", "hasher", "xxhash", "xxh3"] +categories = ["algorithms"] + +repository = "https://github.com/shepmaster/twox-hash" +documentation = "https://docs.rs/twox-hash/" + +license = "MIT" + [workspace] members = [ "asmasm", diff --git a/renu/asmasm/Cargo.toml b/renu/asmasm/Cargo.toml index 511b782de..1f1f94fde 100644 --- a/renu/asmasm/Cargo.toml +++ b/renu/asmasm/Cargo.toml @@ -4,5 +4,5 @@ version = "0.1.0" edition = "2021" [dependencies] -xx-renu = { path = ".." } +twox-hash = { path = ".." } xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/renu/asmasm/src/main.rs b/renu/asmasm/src/main.rs index 0561ba274..db0f82d66 100644 --- a/renu/asmasm/src/main.rs +++ b/renu/asmasm/src/main.rs @@ -1,6 +1,6 @@ use std::{hash::Hasher as _, time::Instant}; +use twox_hash::XxHash3_64; use xx_hash_sys::XxHash3_64 as C; -use xx_renu::XxHash3_64; fn main() { let filename = std::env::args().nth(1).expect("filename"); diff --git a/renu/compare/Cargo.toml b/renu/compare/Cargo.toml index b6495a7b3..bc6b0ee81 100644 --- a/renu/compare/Cargo.toml +++ b/renu/compare/Cargo.toml @@ -14,6 +14,5 @@ harness = false criterion = { version = "0.5.1", features = [] } proptest = "1.5.0" rand = "0.8.5" -twox-hash = "1.6.3" -xx-renu = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] } +twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] } xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/renu/compare/benches/benchmark.rs b/renu/compare/benches/benchmark.rs index acf5ab8ec..961465ee0 100644 --- a/renu/compare/benches/benchmark.rs +++ b/renu/compare/benches/benchmark.rs @@ -4,8 +4,8 @@ use criterion::{ use rand::{Rng, RngCore, SeedableRng}; use std::{env::consts::ARCH, hash::Hasher as _, iter}; +use twox_hash as rust; use xx_hash_sys as c; -use xx_renu as rust; const BIG_DATA_SIZE: usize = 4 * 1024 * 1024; const MIN_BIG_DATA_SIZE: usize = 256 * 1024; diff --git a/renu/compare/src/lib.rs b/renu/compare/src/lib.rs index a4bdbe0bd..24b28a85b 100644 --- a/renu/compare/src/lib.rs +++ b/renu/compare/src/lib.rs @@ -2,8 +2,8 @@ use proptest::{num, prelude::*}; +use twox_hash as rust; use xx_hash_sys as c; -use xx_renu as rust; mod xxhash32 { use proptest::{prelude::*, test_runner::TestCaseResult}; @@ -206,7 +206,7 @@ mod xxhash64 { mod xxhash3_64 { use proptest::{prelude::*, test_runner::TestCaseResult}; use std::hash::Hasher as _; - use xx_renu::xxhash3_64::SECRET_MINIMUM_LENGTH; + use twox_hash::xxhash3_64::SECRET_MINIMUM_LENGTH; use super::*; diff --git a/renu/renu-sum/Cargo.toml b/renu/renu-sum/Cargo.toml index 2677f2ea2..9de3c49a9 100644 --- a/renu/renu-sum/Cargo.toml +++ b/renu/renu-sum/Cargo.toml @@ -4,4 +4,4 @@ version = "0.1.0" edition = "2021" [dependencies] -xx-renu = { path = ".." } +twox-hash = { path = ".." } diff --git a/renu/renu-sum/src/main.rs b/renu/renu-sum/src/main.rs index b86a5e26c..08bbcfdc5 100644 --- a/renu/renu-sum/src/main.rs +++ b/renu/renu-sum/src/main.rs @@ -7,7 +7,7 @@ use std::{ sync::mpsc::{self, SendError}, thread, }; -use xx_renu::XxHash3_64; +use twox_hash::XxHash3_64; type Error = Box; type Result = std::result::Result; diff --git a/renu/src/lib.rs b/renu/src/lib.rs index 524a2cc5a..2ee51fb45 100644 --- a/renu/src/lib.rs +++ b/renu/src/lib.rs @@ -7,7 +7,7 @@ //! ### When all the data is available at once //! //! ```rust -//! use xx_renu::XxHash64; +//! use twox_hash::XxHash64; //! //! let seed = 1234; //! let hash = XxHash64::oneshot(seed, b"some bytes"); @@ -18,7 +18,7 @@ //! //! ```rust //! use std::hash::Hasher as _; -//! use xx_renu::XxHash64; +//! use twox_hash::XxHash64; //! //! let seed = 1234; //! let mut hasher = XxHash64::with_seed(seed); @@ -35,7 +35,7 @@ //! //! ```rust //! use std::{collections::HashMap, hash::BuildHasherDefault}; -//! use xx_renu::XxHash64; +//! use twox_hash::XxHash64; //! //! let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); //! hash.insert(42, "the answer"); @@ -46,7 +46,7 @@ //! //! ```rust //! use std::collections::HashMap; -//! use xx_renu::xxhash64; +//! use twox_hash::xxhash64; //! //! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); //! hash.insert(42, "the answer"); @@ -57,7 +57,7 @@ //! //! ```rust //! use std::collections::HashMap; -//! use xx_renu::xxhash64; +//! use twox_hash::xxhash64; //! //! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); //! hash.insert(42, "the answer"); From 58c05665dfdf5022034853e3a892eb3172f5b204 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 7 Oct 2024 13:56:32 -0400 Subject: [PATCH 144/166] Format code that slipped through --- renu/compare/src/lib.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/renu/compare/src/lib.rs b/renu/compare/src/lib.rs index 24b28a85b..7701a8314 100644 --- a/renu/compare/src/lib.rs +++ b/renu/compare/src/lib.rs @@ -333,7 +333,11 @@ mod xxhash3_64 { Ok(()) } - fn streaming_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult { + fn streaming_with_seed_and_secret_impl( + seed: u64, + secret: &[u8], + data: &[u8], + ) -> TestCaseResult { let native = { let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret); for chunk in data.chunks(256) { From 1497bfcbc94ddc34d46b26460f2643ae5c875c98 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 7 Oct 2024 14:15:29 -0400 Subject: [PATCH 145/166] Set minimum Rust version --- renu/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/renu/Cargo.toml b/renu/Cargo.toml index e62d39cd4..837af1d8e 100644 --- a/renu/Cargo.toml +++ b/renu/Cargo.toml @@ -3,6 +3,7 @@ name = "twox-hash" version = "1.6.3" authors = ["Jake Goulding "] edition = "2021" +rust-version = "1.81" description = "A Rust implementation of the XXHash and XXH3 algorithms" readme = "README.md" From 9e3510bc7d3c2480c487e4ffffa688cd9c423a2c Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 7 Oct 2024 14:30:35 -0400 Subject: [PATCH 146/166] Avoid inline assembly when testing with Miri --- renu/src/xxhash3_64/scalar.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renu/src/xxhash3_64/scalar.rs b/renu/src/xxhash3_64/scalar.rs index 3a91464b1..b7a996de5 100644 --- a/renu/src/xxhash3_64/scalar.rs +++ b/renu/src/xxhash3_64/scalar.rs @@ -33,7 +33,7 @@ impl Vector for Impl { } #[inline] -#[cfg(not(target_arch = "aarch64"))] +#[cfg(any(miri, not(target_arch = "aarch64")))] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { use super::IntoU64; @@ -47,7 +47,7 @@ fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 // https://github.com/llvm/llvm-project/issues/98481 -#[cfg(target_arch = "aarch64")] +#[cfg(all(not(miri), target_arch = "aarch64"))] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { let res; From c7e348c69521dc862091de9ca3ce620b51769bd5 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 7 Oct 2024 14:32:28 -0400 Subject: [PATCH 147/166] Feature `std` implies `alloc` --- renu/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renu/Cargo.toml b/renu/Cargo.toml index 837af1d8e..b0a4ce722 100644 --- a/renu/Cargo.toml +++ b/renu/Cargo.toml @@ -24,7 +24,7 @@ members = [ ] [features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "alloc", "std"] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] random = ["dep:rand"] @@ -34,8 +34,8 @@ xxhash32 = [] xxhash64 = [] xxhash3_64 = [] -alloc = [] std = ["alloc"] +alloc = [] [lints.rust.unexpected_cfgs] level = "warn" From b664dd08b9a7c77b26b5d64eab89629cd53ea2b2 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 7 Oct 2024 14:40:10 -0400 Subject: [PATCH 148/166] Use correct functions for big- and little-endian --- renu/src/xxhash3_64.rs | 22 +++++++++++----------- renu/src/xxhash3_64/scalar.rs | 6 +++--- renu/src/xxhash3_64/secret.rs | 8 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/renu/src/xxhash3_64.rs b/renu/src/xxhash3_64.rs index 217ebce56..70c84d337 100644 --- a/renu/src/xxhash3_64.rs +++ b/renu/src/xxhash3_64.rs @@ -833,14 +833,14 @@ fn derive_secret(seed: u64, secret: &mut DefaultSecret) { let (pairs, _) = words.bp_as_chunks_mut(); for [a_p, b_p] in pairs { - let a = u64::from_ne_bytes(*a_p); - let b = u64::from_ne_bytes(*b_p); + let a = u64::from_le_bytes(*a_p); + let b = u64::from_le_bytes(*b_p); let a = a.wrapping_add(seed); let b = b.wrapping_sub(seed); - *a_p = a.to_ne_bytes(); - *b_p = b.to_ne_bytes(); + *a_p = a.to_le_bytes(); + *b_p = b.to_le_bytes(); } } @@ -1006,7 +1006,7 @@ fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { #[inline] fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] { let (pair, _) = bytes.bp_as_chunks::<8>(); - [pair[0], pair[1]].map(u64::from_ne_bytes) + [pair[0], pair[1]].map(u64::from_le_bytes) } let data_words = to_u64s(data); @@ -1160,8 +1160,8 @@ where for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result let mul_result = { - let sa = u64::from_ne_bytes(secrets[i * 2]); - let sb = u64::from_ne_bytes(secrets[i * 2 + 1]); + let sa = u64::from_le_bytes(secrets[i * 2]); + let sb = u64::from_le_bytes(secrets[i * 2 + 1]); let a = (acc[i * 2] ^ sa).into_u128(); let b = (acc[i * 2 + 1] ^ sb).into_u128(); @@ -1253,22 +1253,22 @@ trait U8SliceExt { impl U8SliceExt for [u8] { #[inline] fn first_u32(&self) -> Option { - self.first_chunk().copied().map(u32::from_ne_bytes) + self.first_chunk().copied().map(u32::from_le_bytes) } #[inline] fn last_u32(&self) -> Option { - self.last_chunk().copied().map(u32::from_ne_bytes) + self.last_chunk().copied().map(u32::from_le_bytes) } #[inline] fn first_u64(&self) -> Option { - self.first_chunk().copied().map(u64::from_ne_bytes) + self.first_chunk().copied().map(u64::from_le_bytes) } #[inline] fn last_u64(&self) -> Option { - self.last_chunk().copied().map(u64::from_ne_bytes) + self.last_chunk().copied().map(u64::from_le_bytes) } } diff --git a/renu/src/xxhash3_64/scalar.rs b/renu/src/xxhash3_64/scalar.rs index b7a996de5..64f6f9fa4 100644 --- a/renu/src/xxhash3_64/scalar.rs +++ b/renu/src/xxhash3_64/scalar.rs @@ -7,7 +7,7 @@ impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { let (last, _) = secret_end.bp_as_chunks(); - let last = last.iter().copied().map(u64::from_ne_bytes); + let last = last.iter().copied().map(u64::from_le_bytes); for (acc, secret) in acc.iter_mut().zip(last) { *acc ^= *acc >> 47; @@ -22,8 +22,8 @@ impl Vector for Impl { let (secret, _) = secret.bp_as_chunks(); for i in 0..8 { - let stripe = u64::from_ne_bytes(stripe[i]); - let secret = u64::from_ne_bytes(secret[i]); + let stripe = u64::from_le_bytes(stripe[i]); + let secret = u64::from_le_bytes(secret[i]); let value = stripe ^ secret; acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); diff --git a/renu/src/xxhash3_64/secret.rs b/renu/src/xxhash3_64/secret.rs index 9741fabb0..e7210b977 100644 --- a/renu/src/xxhash3_64/secret.rs +++ b/renu/src/xxhash3_64/secret.rs @@ -38,7 +38,7 @@ impl Secret { self.reassert_preconditions(); let (q, _) = self.0[56..].bp_as_chunks(); - [q[0], q[1]].map(u64::from_ne_bytes) + [q[0], q[1]].map(u64::from_le_bytes) } #[inline] @@ -46,7 +46,7 @@ impl Secret { self.reassert_preconditions(); let (q, _) = self.0.bp_as_chunks(); - [q[0], q[1]].map(u32::from_ne_bytes) + [q[0], q[1]].map(u32::from_le_bytes) } #[inline] @@ -54,7 +54,7 @@ impl Secret { self.reassert_preconditions(); let (q, _) = self.0[8..].bp_as_chunks(); - [q[0], q[1]].map(u64::from_ne_bytes) + [q[0], q[1]].map(u64::from_le_bytes) } #[inline] @@ -62,7 +62,7 @@ impl Secret { self.reassert_preconditions(); let (q, _) = self.0[24..].bp_as_chunks(); - [q[0], q[1], q[2], q[3]].map(u64::from_ne_bytes) + [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) } #[inline] From 39d1b3962d25ac5904ad99c7e39024f387f69e06 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 9 Oct 2024 13:06:02 -0400 Subject: [PATCH 149/166] Overwrite original version with refresh --- .gitignore | 4 +- .gitmodules | 8 +- Cargo.toml | 51 +- {renu/asmasm => asmasm}/Cargo.toml | 0 {renu/asmasm => asmasm}/src/main.rs | 0 renu/clippy.toml => clippy.toml | 0 {renu/compare => comparison}/.gitignore | 0 comparison/Cargo.toml | 25 +- comparison/README.md | 207 +- comparison/benches/bench.rs | 105 -- .../benches/benchmark.rs | 0 {renu/compare => comparison}/benchmark.sh | 0 comparison/build.rs | 3 - {renu/compare => comparison}/generate-graph.R | 0 {renu/compare => comparison}/prepare-data.jq | 0 .../results/xxhash3_64-streaming-aarch64.svg | 0 .../results/xxhash3_64-streaming-x86_64.svg | 0 .../results/xxhash3_64-tiny_data-aarch64.svg | 0 .../results/xxhash3_64-tiny_data-x86_64.svg | 0 .../results/xxhash64-streaming-aarch64.svg | 0 .../results/xxhash64-streaming-x86_64.svg | 0 .../results/xxhash64-tiny_data-aarch64.svg | 0 .../results/xxhash64-tiny_data-x86_64.svg | 0 comparison/src/c_xxhash.rs | 52 - comparison/src/lib.rs | 416 +++- {renu/compare => comparison}/svgo.config.js | 0 comparison/xxHash | 1 - compatibility-tests/digest_0_10/Cargo.toml | 9 - compatibility-tests/digest_0_10/src/lib.rs | 142 -- compatibility-tests/digest_0_8/Cargo.toml | 9 - compatibility-tests/digest_0_8/src/lib.rs | 130 -- compatibility-tests/digest_0_9/Cargo.toml | 9 - compatibility-tests/digest_0_9/src/lib.rs | 142 -- renu/.gitignore | 2 - renu/Cargo.toml | 57 - renu/README.md | 16 - renu/compare/Cargo.toml | 18 - renu/compare/README.md | 202 -- renu/compare/src/lib.rs | 378 ---- renu/src/lib.rs | 143 -- src/bin/hash_file.rs | 28 - src/digest_0_10_support.rs | 92 - src/digest_0_9_support.rs | 179 -- src/digest_support.rs | 179 -- src/lib.rs | 196 +- src/sixty_four.rs | 413 ---- src/std_support.rs | 113 -- src/thirty_two.rs | 416 ---- src/xxh3.rs | 1666 ----------------- {renu/src => src}/xxhash32.rs | 0 {renu/src => src}/xxhash3_64.rs | 0 {renu/src => src}/xxhash3_64/avx2.rs | 0 {renu/src => src}/xxhash3_64/neon.rs | 0 {renu/src => src}/xxhash3_64/scalar.rs | 0 {renu/src => src}/xxhash3_64/secret.rs | 0 {renu/src => src}/xxhash3_64/sse2.rs | 0 {renu/src => src}/xxhash64.rs | 0 {renu/renu-sum => twox-hash-sum}/.gitignore | 0 {renu/renu-sum => twox-hash-sum}/Cargo.toml | 2 +- {renu/renu-sum => twox-hash-sum}/src/main.rs | 0 {renu/xx_hash-sys => xx_hash-sys}/.gitignore | 0 {renu/xx_hash-sys => xx_hash-sys}/Cargo.toml | 0 {renu/xx_hash-sys => xx_hash-sys}/build.rs | 0 {renu/xx_hash-sys => xx_hash-sys}/src/lib.rs | 0 {renu/xx_hash-sys => xx_hash-sys}/xxHash | 0 65 files changed, 708 insertions(+), 4705 deletions(-) rename {renu/asmasm => asmasm}/Cargo.toml (100%) rename {renu/asmasm => asmasm}/src/main.rs (100%) rename renu/clippy.toml => clippy.toml (100%) rename {renu/compare => comparison}/.gitignore (100%) delete mode 100644 comparison/benches/bench.rs rename {renu/compare => comparison}/benches/benchmark.rs (100%) rename {renu/compare => comparison}/benchmark.sh (100%) delete mode 100644 comparison/build.rs rename {renu/compare => comparison}/generate-graph.R (100%) rename {renu/compare => comparison}/prepare-data.jq (100%) rename {renu/compare => comparison}/results/xxhash3_64-streaming-aarch64.svg (100%) rename {renu/compare => comparison}/results/xxhash3_64-streaming-x86_64.svg (100%) rename {renu/compare => comparison}/results/xxhash3_64-tiny_data-aarch64.svg (100%) rename {renu/compare => comparison}/results/xxhash3_64-tiny_data-x86_64.svg (100%) rename {renu/compare => comparison}/results/xxhash64-streaming-aarch64.svg (100%) rename {renu/compare => comparison}/results/xxhash64-streaming-x86_64.svg (100%) rename {renu/compare => comparison}/results/xxhash64-tiny_data-aarch64.svg (100%) rename {renu/compare => comparison}/results/xxhash64-tiny_data-x86_64.svg (100%) delete mode 100644 comparison/src/c_xxhash.rs rename {renu/compare => comparison}/svgo.config.js (100%) delete mode 160000 comparison/xxHash delete mode 100644 compatibility-tests/digest_0_10/Cargo.toml delete mode 100644 compatibility-tests/digest_0_10/src/lib.rs delete mode 100644 compatibility-tests/digest_0_8/Cargo.toml delete mode 100644 compatibility-tests/digest_0_8/src/lib.rs delete mode 100644 compatibility-tests/digest_0_9/Cargo.toml delete mode 100644 compatibility-tests/digest_0_9/src/lib.rs delete mode 100644 renu/.gitignore delete mode 100644 renu/Cargo.toml delete mode 100644 renu/README.md delete mode 100644 renu/compare/Cargo.toml delete mode 100644 renu/compare/README.md delete mode 100644 renu/compare/src/lib.rs delete mode 100644 renu/src/lib.rs delete mode 100644 src/bin/hash_file.rs delete mode 100644 src/digest_0_10_support.rs delete mode 100644 src/digest_0_9_support.rs delete mode 100644 src/digest_support.rs delete mode 100644 src/sixty_four.rs delete mode 100644 src/std_support.rs delete mode 100644 src/thirty_two.rs delete mode 100644 src/xxh3.rs rename {renu/src => src}/xxhash32.rs (100%) rename {renu/src => src}/xxhash3_64.rs (100%) rename {renu/src => src}/xxhash3_64/avx2.rs (100%) rename {renu/src => src}/xxhash3_64/neon.rs (100%) rename {renu/src => src}/xxhash3_64/scalar.rs (100%) rename {renu/src => src}/xxhash3_64/secret.rs (100%) rename {renu/src => src}/xxhash3_64/sse2.rs (100%) rename {renu/src => src}/xxhash64.rs (100%) rename {renu/renu-sum => twox-hash-sum}/.gitignore (100%) rename {renu/renu-sum => twox-hash-sum}/Cargo.toml (79%) rename {renu/renu-sum => twox-hash-sum}/src/main.rs (100%) rename {renu/xx_hash-sys => xx_hash-sys}/.gitignore (100%) rename {renu/xx_hash-sys => xx_hash-sys}/Cargo.toml (100%) rename {renu/xx_hash-sys => xx_hash-sys}/build.rs (100%) rename {renu/xx_hash-sys => xx_hash-sys}/src/lib.rs (100%) rename {renu/xx_hash-sys => xx_hash-sys}/xxHash (100%) diff --git a/.gitignore b/.gitignore index a9d37c560..1b72444ae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ -target -Cargo.lock +/Cargo.lock +/target diff --git a/.gitmodules b/.gitmodules index 7ae1d298f..532b0627b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,3 @@ -[submodule "comparison/xxHash"] - path = comparison/xxHash - url = https://github.com/Cyan4973/xxHash.git - -[submodule "renu/xx_hash-sys/xxHash"] - path = renu/xx_hash-sys/xxHash +[submodule "xxHash"] + path = xx_hash-sys/xxHash url = https://github.com/Cyan4973/xxHash.git diff --git a/Cargo.toml b/Cargo.toml index 57937abd9..426cd074e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,8 @@ name = "twox-hash" version = "1.6.3" authors = ["Jake Goulding "] -edition = "2018" +edition = "2021" +rust-version = "1.81" description = "A Rust implementation of the XXHash and XXH3 algorithms" readme = "README.md" @@ -14,19 +15,43 @@ documentation = "https://docs.rs/twox-hash/" license = "MIT" +[workspace] +members = [ + "asmasm", + "comparison", + "twox-hash-sum", + "xx_hash-sys", +] + +[features] +default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] + +random = ["dep:rand"] + +serialize = ["dep:serde"] + +xxhash32 = [] +xxhash64 = [] +xxhash3_64 = [] + +std = ["alloc"] +alloc = [] + +[lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = [ + 'cfg(_internal_xxhash3_force_scalar)', + 'cfg(_internal_xxhash3_force_neon)', + 'cfg(_internal_xxhash3_force_sse2)', + 'cfg(_internal_xxhash3_force_avx2)', +] + [dependencies] -cfg-if = { version = ">= 0.1, < 2", default-features = false } -static_assertions = { version = "1.0", default-features = false } -rand = { version = ">= 0.3.10, < 0.9", optional = true } -serde = { version = "1.0", features = ["derive"], optional = true} -digest = { package = "digest", version = "0.8", default-features = false, optional = true } -digest_0_9 = { package = "digest", version = "0.9", default-features = false, optional = true } -digest_0_10 = { package = "digest", version = "0.10", default-features = false, optional = true } +rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } +serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } [dev-dependencies] -serde_json = "1.0" +serde_json = "1.0.117" -[features] -default = ["std"] -serialize = ["serde"] -std = ["rand"] +[package.metadata.docs.rs] +all-features = true diff --git a/renu/asmasm/Cargo.toml b/asmasm/Cargo.toml similarity index 100% rename from renu/asmasm/Cargo.toml rename to asmasm/Cargo.toml diff --git a/renu/asmasm/src/main.rs b/asmasm/src/main.rs similarity index 100% rename from renu/asmasm/src/main.rs rename to asmasm/src/main.rs diff --git a/renu/clippy.toml b/clippy.toml similarity index 100% rename from renu/clippy.toml rename to clippy.toml diff --git a/renu/compare/.gitignore b/comparison/.gitignore similarity index 100% rename from renu/compare/.gitignore rename to comparison/.gitignore diff --git a/comparison/Cargo.toml b/comparison/Cargo.toml index b4168647d..80cc76cb6 100644 --- a/comparison/Cargo.toml +++ b/comparison/Cargo.toml @@ -1,23 +1,18 @@ [package] -authors = ["Jake Goulding "] name = "comparison" version = "0.1.0" -edition = "2018" +edition = "2021" + +[lib] +bench = false [[bench]] -name = "bench" +name = "benchmark" harness = false [dependencies] -fnv = "1.0" -rand = "0.6.5" -twox-hash = { path = ".." } -libc = "0.2.53" -criterion = "0.2.11" -proptest = "0.9.3" - -[features] -bench = [] - -[build-dependencies] -cc = "1.0.36" +criterion = { version = "0.5.1", features = [] } +proptest = "1.5.0" +rand = "0.8.5" +twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] } +xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/comparison/README.md b/comparison/README.md index 68c29ee51..efb868e2b 100644 --- a/comparison/README.md +++ b/comparison/README.md @@ -1,7 +1,202 @@ -This is just a crate for sanity checks and performance tests. Pay no -attention to the man behind the curtain. +# Overview -``` -cargo test -cargo bench -``` +Tests compare calling [the reference implementation in +C](https://xxhash.com) against equivalent functions in this crate. No +link-time optimization (LTO) is used, so the C performance numbers +have additional overhead for each function call. + +Click any graph to see it full-size. + +# XXHash64 + +## Oneshot hashing + +Compares the **speed** of hashing an entire buffer of data in one +function call. Data sizes from 256 KiB to 4 MiB are tested. These +graphs are boring flat lines, so a table is used instead. + +### aarch64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 13.4 | +| C | 13.4 | + +## x86_64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 15.7 | +| C | 15.8 | + + +## Streaming data + +Compares the **speed** of hashing a 1 MiB buffer of data split into +various chunk sizes. + +### aarch64 + + + XXHash64, streaming data, on an aarch64 processor + + +### x86_64 + + + XXHash64, streaming data, on an x86_64 processor + + +## Small amounts of data + +Compares the **time taken** to hash 0 to 32 bytes of data. + +### aarch64 + + + XXHash64, small data, on an aarch64 processor + + +### x86_64 + + + XXHash64, small data, on an x86_64 processor + + + +# XXHash3 (64-bit) + +## Oneshot hashing + +Compares the **speed** of hashing an entire buffer of data in one +function call. Data sizes from 256 KiB to 4 MiB are tested. These +graphs are boring flat lines, so a table is used instead. + +### aarch64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 34.8 | +| C | 34.8 | +| C (scalar) | 21.0 | +| C (NEON) | 34.7 | + +### x86_64 + +| Implementation | Throughput (GiB/s) | +|----------------|--------------------| +| Rust | 58.3 | +| C | 25.0 | +| C (scalar) | 7.5 | +| C (SSE2) | 25.1 | +| C (AVX2) | 58.1 | + +## Streaming data + +Compares the **speed** of hashing a 1 MiB buffer of data split into +various chunk sizes. + +### aarch64 + + + XXHash3, 64-bit, streaming data, on an aarch64 processor + + +### x86_64 + + + XXHash3, 64-bit, streaming data, on an x86_64 processor + + +## Small amounts of data + +Compares the **time taken** to hash 0 to 230 bytes of +data. Representative samples are taken from similar times to avoid +cluttering the graph and wasting benchmarking time. + +### aarch64 + + + XXHash3, 64-bit, small data, on an aarch64 processor + + +### x86_64 + + + XXHash3, 64-bit, small data, on an x86_64 processor + + +# Benchmark machines + +## Overview + +| CPU | Memory | C compiler | +|-------------------|--------|--------------------| +| Apple M1 Max | 64 GiB | clang 15.0.0 | +| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 | + +Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`. + +## Details + +### aarch64 + + + + + + + + + + + + + + + + +
CPUApple M1 Max
Memory64 GiB
C compilerApple clang version 15.0.0 (clang-1500.3.9.4)
+ +### x86_64 + + + + + + + + + + + + + + + + +
CPUAMD Ryzen 9 3950X 16-Core Processor, 3501 Mhz, 16 Core(s), 32 Logical Processor(s)
Memory32 GiB (3600 MT/s)
C compilerMicrosoft (R) C/C++ Optimizing Compiler Version 19.41.34120 for x86
diff --git a/comparison/benches/bench.rs b/comparison/benches/bench.rs deleted file mode 100644 index 5deee9790..000000000 --- a/comparison/benches/bench.rs +++ /dev/null @@ -1,105 +0,0 @@ -#![deny(rust_2018_idioms)] - -use comparison::{ - c_xxhash::{hash32, hash64, xxh3_hash128, xxh3_hash64}, - hash_once, -}; -use criterion::{ - criterion_group, criterion_main, AxisScale, Bencher, Criterion, ParameterizedBenchmark, - PlotConfiguration, Throughput, -}; -use fnv::FnvHasher; -use rand::{distributions::Standard, rngs::StdRng, Rng, SeedableRng}; -use std::{collections::hash_map::DefaultHasher, env, fmt, hash::Hasher, ops}; -use twox_hash::{xxh3, XxHash32, XxHash64}; - -const INPUT_SIZES: &[usize] = &[0, 1, 4, 16, 23, 32, 47, 128, 256, 512, 1024, 1024 * 1024]; -const OFFSETS: &[usize] = &[0, 1]; - -fn bench_hasher(hasher: impl Fn() -> H) -> impl FnMut(&mut Bencher, &Data) -where - H: Hasher, -{ - move |b, data| b.iter(|| hash_once(hasher(), data)) -} - -fn bench_c(hasher: impl Fn(&[u8]) -> R) -> impl FnMut(&mut Bencher, &Data) { - move |b, data| b.iter(|| hasher(data)) -} - -fn bench_everything(c: &mut Criterion) { - let seed: u64 = env::var("RANDOM_SEED") - .ok() - .and_then(|v| v.parse().ok()) - .unwrap_or_else(rand::random); - eprintln!("Using RANDOM_SEED={}", seed); - - let data: Vec<_> = OFFSETS - .iter() - .flat_map(|&o| { - INPUT_SIZES - .iter() - .map(|&l| Data::new(l, seed, o)) - .collect::>() - }) - .collect(); - - let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Logarithmic); - - let bench = - ParameterizedBenchmark::new("XxHash64", bench_hasher(|| XxHash64::with_seed(0)), data) - .with_function("XxHash32", bench_hasher(|| XxHash32::with_seed(0))) - .with_function("XxHash64 (C)", bench_c(|d| hash64(d, 0))) - .with_function("XxHash32 (C)", bench_c(|d| hash32(d, 0))) - .with_function("xxh3::Hash64", bench_hasher(|| xxh3::Hash64::with_seed(0))) - .with_function( - "xxh3::Hash128", - bench_hasher(|| xxh3::Hash128::with_seed(0)), - ) - .with_function("xxh3::hash64", bench_c(|d| xxh3::hash64(d))) - .with_function("xxh3::hash128", bench_c(|d| xxh3::hash128(d))) - .with_function("xxh3::hash64 (C)", bench_c(|d| xxh3_hash64(d, 0))) - .with_function("xxh3::hash128 (C)", bench_c(|d| xxh3_hash128(d, 0))) - .with_function("DefaultHasher", bench_hasher(|| DefaultHasher::new())) - .with_function("FnvHasher", bench_hasher(|| FnvHasher::default())) - .throughput(|data| Throughput::Elements(data.0.len() as u32)) - .plot_config(plot_config); - - c.bench("All Hashers", bench); -} - -struct Data(Vec, usize); - -impl Data { - fn new(len: usize, seed: u64, offset: usize) -> Self { - let mut rng = StdRng::seed_from_u64(seed); - let data = rng.sample_iter(&Standard).take(len + offset).collect(); - Self(data, offset) - } - - #[inline] - fn len(&self) -> usize { - self.0.len() - self.offset() - } - - #[inline] - fn offset(&self) -> usize { - self.1 - } -} - -impl ops::Deref for Data { - type Target = [u8]; - fn deref(&self) -> &[u8] { - &self.0[self.offset()..] - } -} - -impl fmt::Debug for Data { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} bytes/offset {}", self.len(), self.offset()) - } -} - -criterion_group!(benches, bench_everything); -criterion_main!(benches); diff --git a/renu/compare/benches/benchmark.rs b/comparison/benches/benchmark.rs similarity index 100% rename from renu/compare/benches/benchmark.rs rename to comparison/benches/benchmark.rs diff --git a/renu/compare/benchmark.sh b/comparison/benchmark.sh similarity index 100% rename from renu/compare/benchmark.sh rename to comparison/benchmark.sh diff --git a/comparison/build.rs b/comparison/build.rs deleted file mode 100644 index b8c1ef7a5..000000000 --- a/comparison/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - cc::Build::new().file("xxHash/xxhash.c").compile("xxhash"); -} diff --git a/renu/compare/generate-graph.R b/comparison/generate-graph.R similarity index 100% rename from renu/compare/generate-graph.R rename to comparison/generate-graph.R diff --git a/renu/compare/prepare-data.jq b/comparison/prepare-data.jq similarity index 100% rename from renu/compare/prepare-data.jq rename to comparison/prepare-data.jq diff --git a/renu/compare/results/xxhash3_64-streaming-aarch64.svg b/comparison/results/xxhash3_64-streaming-aarch64.svg similarity index 100% rename from renu/compare/results/xxhash3_64-streaming-aarch64.svg rename to comparison/results/xxhash3_64-streaming-aarch64.svg diff --git a/renu/compare/results/xxhash3_64-streaming-x86_64.svg b/comparison/results/xxhash3_64-streaming-x86_64.svg similarity index 100% rename from renu/compare/results/xxhash3_64-streaming-x86_64.svg rename to comparison/results/xxhash3_64-streaming-x86_64.svg diff --git a/renu/compare/results/xxhash3_64-tiny_data-aarch64.svg b/comparison/results/xxhash3_64-tiny_data-aarch64.svg similarity index 100% rename from renu/compare/results/xxhash3_64-tiny_data-aarch64.svg rename to comparison/results/xxhash3_64-tiny_data-aarch64.svg diff --git a/renu/compare/results/xxhash3_64-tiny_data-x86_64.svg b/comparison/results/xxhash3_64-tiny_data-x86_64.svg similarity index 100% rename from renu/compare/results/xxhash3_64-tiny_data-x86_64.svg rename to comparison/results/xxhash3_64-tiny_data-x86_64.svg diff --git a/renu/compare/results/xxhash64-streaming-aarch64.svg b/comparison/results/xxhash64-streaming-aarch64.svg similarity index 100% rename from renu/compare/results/xxhash64-streaming-aarch64.svg rename to comparison/results/xxhash64-streaming-aarch64.svg diff --git a/renu/compare/results/xxhash64-streaming-x86_64.svg b/comparison/results/xxhash64-streaming-x86_64.svg similarity index 100% rename from renu/compare/results/xxhash64-streaming-x86_64.svg rename to comparison/results/xxhash64-streaming-x86_64.svg diff --git a/renu/compare/results/xxhash64-tiny_data-aarch64.svg b/comparison/results/xxhash64-tiny_data-aarch64.svg similarity index 100% rename from renu/compare/results/xxhash64-tiny_data-aarch64.svg rename to comparison/results/xxhash64-tiny_data-aarch64.svg diff --git a/renu/compare/results/xxhash64-tiny_data-x86_64.svg b/comparison/results/xxhash64-tiny_data-x86_64.svg similarity index 100% rename from renu/compare/results/xxhash64-tiny_data-x86_64.svg rename to comparison/results/xxhash64-tiny_data-x86_64.svg diff --git a/comparison/src/c_xxhash.rs b/comparison/src/c_xxhash.rs deleted file mode 100644 index 0fa9284c7..000000000 --- a/comparison/src/c_xxhash.rs +++ /dev/null @@ -1,52 +0,0 @@ -mod ffi { - use libc::{c_void, size_t}; - - #[allow(non_camel_case_types)] - type XXH32_hash_t = u32; - - #[allow(non_camel_case_types)] - type XXH64_hash_t = u64; - - #[allow(non_camel_case_types)] - #[repr(C)] - #[derive(Debug, Copy, Clone)] - pub struct XXH128_hash_t { - pub low64: XXH64_hash_t, - pub high64: XXH64_hash_t, - } - - extern "C" { - pub fn XXH32(input: *const c_void, length: size_t, seed: u32) -> XXH32_hash_t; - pub fn XXH64(input: *const c_void, length: size_t, seed: u64) -> XXH64_hash_t; - pub fn XXH3_64bits_withSeed( - data: *const ::std::os::raw::c_void, - len: usize, - seed: ::std::os::raw::c_ulonglong, - ) -> XXH64_hash_t; - pub fn XXH3_128bits_withSeed( - data: *const ::std::os::raw::c_void, - len: usize, - seed: ::std::os::raw::c_ulonglong, - ) -> XXH128_hash_t; - } -} - -pub fn hash32(data: &[u8], seed: u32) -> u32 { - unsafe { ffi::XXH32(data.as_ptr() as *const libc::c_void, data.len(), seed) } -} - -pub fn hash64(data: &[u8], seed: u64) -> u64 { - unsafe { ffi::XXH64(data.as_ptr() as *const libc::c_void, data.len(), seed) } -} - -pub fn xxh3_hash64(data: &[u8], seed: u64) -> u64 { - unsafe { ffi::XXH3_64bits_withSeed(data.as_ptr() as *const libc::c_void, data.len(), seed) } -} - -pub fn xxh3_hash128(data: &[u8], seed: u64) -> u128 { - let hash = unsafe { - ffi::XXH3_128bits_withSeed(data.as_ptr() as *const libc::c_void, data.len(), seed) - }; - - u128::from(hash.low64) + (u128::from(hash.high64) << 64) -} diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs index 4a0022ba3..7701a8314 100644 --- a/comparison/src/lib.rs +++ b/comparison/src/lib.rs @@ -1,104 +1,378 @@ -#![deny(rust_2018_idioms)] +#![cfg(test)] -use proptest::{collection::vec as propvec, prelude::*}; -use std::hash::Hasher; -#[cfg(test)] -use twox_hash::{XxHash32, XxHash64}; +use proptest::{num, prelude::*}; -pub mod c_xxhash; +use twox_hash as rust; +use xx_hash_sys as c; -pub fn hash_once(mut hasher: impl Hasher, data: &[u8]) -> u64 { - hasher.write(&data); - hasher.finish() -} +mod xxhash32 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use std::hash::Hasher as _; + + use super::*; + + proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed: u32, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } + + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_same_as_many_chunks(seed: u32, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } + + #[test] + fn oneshot(seed: u32, data: Vec) { + oneshot_impl(seed, &data)?; + } + + #[test] + fn oneshot_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } + + #[test] + fn streaming_one_chunk(seed: u32, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } -#[cfg(test)] -fn hash_by_chunks(mut hasher: impl Hasher, mut data: &[u8], chunk_sizes: &[usize]) -> u64 { - for &chunk_size in chunk_sizes { - let (this_chunk, remaining) = data.split_at(chunk_size); - hasher.write(this_chunk); - data = remaining; + #[test] + fn streaming_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } } - hasher.finish() -} + fn oneshot_same_as_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash32::oneshot(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash32::with_seed(seed); + hasher.write(data); + hasher.finish_32() + }; -prop_compose! { - fn data_and_offset - () - (data in any::>()) - (index in 0..=data.len(), data in Just(data)) - -> (Vec, usize) - { - (data, index) + prop_assert_eq!(oneshot, one_chunk); + Ok(()) + } + + fn oneshot_same_as_many_chunks_impl( + seed: u32, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash32::oneshot(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash32::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish_32() + }; + + prop_assert_eq!(oneshot, many_chunks); + Ok(()) + } + + fn oneshot_impl(seed: u32, data: &[u8]) -> TestCaseResult { + let native = c::XxHash32::oneshot(seed, data); + let rust = rust::XxHash32::oneshot(seed, data); + + prop_assert_eq!(native, rust); + Ok(()) } -} -prop_compose! { - fn data_and_chunk_sizes - () - (sizes in propvec(1..=256usize, 0..=100)) - (data in propvec(any::(), sizes.iter().sum::()), sizes in Just(sizes)) - -> (Vec, Vec) - { - (data, sizes) + fn streaming_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash32::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash32::with_seed(seed); + hasher.write(data); + hasher.finish_32() + }; + + prop_assert_eq!(native, rust); + Ok(()) } } -proptest! { - #![proptest_config(ProptestConfig::with_cases(100_000))] +mod xxhash64 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use std::hash::Hasher as _; + + use super::*; + + proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } + + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } + + #[test] + fn oneshot(seed: u64, data: Vec) { + oneshot_impl(seed, &data)?; + } + + #[test] + fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } - #[test] - fn same_results_as_c_for_64_bit(seed: u64, data: Vec) { - let our_result = hash_once(XxHash64::with_seed(seed), &data); - let their_result = c_xxhash::hash64(&data, seed); + #[test] + fn streaming_one_chunk(seed: u64, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } - prop_assert_eq!(our_result, their_result); + #[test] + fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } } - #[test] - fn same_results_as_c_with_offset_for_64_bit(seed: u64, (data, offset) in data_and_offset()) { - let data = &data[offset..]; - let our_result = hash_once(XxHash64::with_seed(seed), data); - let their_result = c_xxhash::hash64(data, seed); + fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash64::oneshot(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; - prop_assert_eq!(our_result, their_result); + prop_assert_eq!(oneshot, one_chunk); + Ok(()) } - #[test] - fn same_results_as_c_for_32_bit(seed: u32, data: Vec) { - let our_result = hash_once(XxHash32::with_seed(seed), &data); - let their_result = c_xxhash::hash32(&data, seed); + fn oneshot_same_as_many_chunks_impl( + seed: u64, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash64::oneshot(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash64::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish() + }; - prop_assert_eq!(our_result, their_result as u64); + prop_assert_eq!(oneshot, many_chunks); + Ok(()) } - #[test] - fn same_results_as_c_with_offset_for_32_bit(seed: u32, (data, offset) in data_and_offset()) { - let data = &data[offset..]; - let our_result = hash_once(XxHash32::with_seed(seed), data); - let their_result = c_xxhash::hash32(data, seed); + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = c::XxHash64::oneshot(seed, data); + let rust = rust::XxHash64::oneshot(seed, data); - prop_assert_eq!(our_result, their_result as u64); + prop_assert_eq!(native, rust); + Ok(()) + } + + fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) } } -proptest! { - #![proptest_config(ProptestConfig::with_cases(1_000))] +mod xxhash3_64 { + use proptest::{prelude::*, test_runner::TestCaseResult}; + use std::hash::Hasher as _; + use twox_hash::xxhash3_64::SECRET_MINIMUM_LENGTH; + + use super::*; + + proptest! { + #[test] + fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { + oneshot_same_as_one_chunk_impl(seed, &data)?; + } + + #[test] + fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { + oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; + } - #[test] - fn same_results_with_many_chunks_as_one_for_64_bit(seed: u64, (data, chunk_sizes) in data_and_chunk_sizes()) { - let chunked_result = hash_by_chunks(XxHash64::with_seed(seed), &data, &chunk_sizes); - let monolithic_result = hash_once(XxHash64::with_seed(seed), &data); + #[test] + fn oneshot(seed: u64, data: Vec) { + oneshot_impl(seed, &data)?; + } - prop_assert_eq!(chunked_result, monolithic_result); + #[test] + fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + oneshot_impl(seed, &data[offset..])?; + } + + #[test] + fn oneshot_with_a_secret(secret in secret(), data: Vec) { + oneshot_with_secret_impl(&secret, &data)?; + } + + #[test] + fn oneshot_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec) { + oneshot_with_seed_and_secret_impl(seed, &secret, &data)?; + } + + #[test] + fn streaming_one_chunk(seed: u64, data: Vec) { + streaming_one_chunk_impl(seed, &data)?; + } + + #[test] + fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { + streaming_one_chunk_impl(seed, &data[offset..])?; + } + + #[test] + fn streaming_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec) { + streaming_with_seed_and_secret_impl(seed, &secret, &data)?; + } + } + + fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data); + let one_chunk = { + let mut hasher = rust::XxHash3_64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(oneshot, one_chunk); + Ok(()) } - #[test] - fn same_results_with_many_chunks_as_one_for_32_bit(seed: u32, (data, chunk_sizes) in data_and_chunk_sizes()) { - let chunked_result = hash_by_chunks(XxHash32::with_seed(seed), &data, &chunk_sizes); - let monolithic_result = hash_once(XxHash32::with_seed(seed), &data); + fn oneshot_same_as_many_chunks_impl( + seed: u64, + data: &[u8], + chunks: &[Vec], + ) -> TestCaseResult { + let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data); + let many_chunks = { + let mut hasher = rust::XxHash3_64::with_seed(seed); + for chunk in chunks { + hasher.write(chunk); + } + hasher.finish() + }; - prop_assert_eq!(chunked_result, monolithic_result); + prop_assert_eq!(oneshot, many_chunks); + Ok(()) } + + fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_64::oneshot_with_seed(seed, data); + let rust = rust::XxHash3_64::oneshot_with_seed(seed, data); + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_64::oneshot_with_secret(secret, data); + let rust = rust::XxHash3_64::oneshot_with_secret(secret, data).unwrap(); + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult { + let native = c::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data); + let rust = rust::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data).unwrap(); + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash3_64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash3_64::with_seed(seed); + hasher.write(data); + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn streaming_with_seed_and_secret_impl( + seed: u64, + secret: &[u8], + data: &[u8], + ) -> TestCaseResult { + let native = { + let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret); + for chunk in data.chunks(256) { + hasher.write(chunk); + } + hasher.finish() + }; + + let rust = { + let mut hasher = rust::XxHash3_64::with_seed_and_secret(seed, secret).unwrap(); + for chunk in data.chunks(256) { + hasher.write(chunk); + } + hasher.finish() + }; + + prop_assert_eq!(native, rust); + Ok(()) + } + + fn secret() -> impl Strategy> { + prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024) + } +} + +fn vec_and_index() -> impl Strategy, usize)> { + prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| { + let len = vec.len(); + (Just(vec), 0..len) + }) +} + +fn data_and_chunks() -> impl Strategy, Vec>)> { + prop::collection::vec(prop::collection::vec(num::u8::ANY, 0..100), 0..100).prop_map(|vs| { + let data = vs.iter().flatten().copied().collect(); + (data, vs) + }) } diff --git a/renu/compare/svgo.config.js b/comparison/svgo.config.js similarity index 100% rename from renu/compare/svgo.config.js rename to comparison/svgo.config.js diff --git a/comparison/xxHash b/comparison/xxHash deleted file mode 160000 index d7f47bc3b..000000000 --- a/comparison/xxHash +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d7f47bc3bf1ca767b82eda6ada557ba02dc36e83 diff --git a/compatibility-tests/digest_0_10/Cargo.toml b/compatibility-tests/digest_0_10/Cargo.toml deleted file mode 100644 index 65eaadfd4..000000000 --- a/compatibility-tests/digest_0_10/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "digest_0_10" -version = "0.1.0" -authors = ["Jake Goulding "] -edition = "2018" - -[dependencies] -digest = "0.10" -twox-hash = { path = "../..", features = ["digest_0_10"] } diff --git a/compatibility-tests/digest_0_10/src/lib.rs b/compatibility-tests/digest_0_10/src/lib.rs deleted file mode 100644 index 66f336519..000000000 --- a/compatibility-tests/digest_0_10/src/lib.rs +++ /dev/null @@ -1,142 +0,0 @@ -#![cfg(test)] - -use digest::Digest; -use twox_hash::{XxHash32, XxHash64}; - -#[test] -fn it_implements_digest() { - fn implements_digest() {} - - implements_digest::(); -} - -#[test] -fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_64() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash64::new(); - for byte in bytes.chunks(1) { - byte_by_byte.update(byte); - } - - let mut one_chunk = XxHash64::new(); - one_chunk.update(&bytes); - - assert_eq!(byte_by_byte.finalize(), one_chunk.finalize()); -} - -#[test] -fn hash_of_nothing_matches_c_implementation_64() { - let mut hasher = XxHash64::new(); - hasher.update(&[]); - assert_eq!( - hasher.finalize()[..], - 0xef46_db37_51d8_e999_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_single_byte_matches_c_implementation_64() { - let mut hasher = XxHash64::new(); - hasher.update(&[42]); - assert_eq!( - hasher.finalize()[..], - 0x0a9e_dece_beb0_3ae4_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_bytes_matches_c_implementation_64() { - assert_eq!( - XxHash64::digest(b"Hello, world!\0")[..], - 0x7b06_c531_ea43_e89f_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_chunks_matches_c_implementation_64() { - let bytes: Vec<_> = (0..100).collect(); - assert_eq!( - XxHash64::digest(&bytes)[..], - 0x6ac1_e580_3216_6597_u64.to_be_bytes() - ); -} - -#[test] -fn hash_with_different_seed_matches_c_implementation_64() { - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.update(&[]); - assert_eq!( - hasher.finalize()[..], - 0x4b6a_04fc_df7a_4672_u64.to_be_bytes() - ); -} - -#[test] -fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_64() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.update(&bytes); - assert_eq!( - hasher.finalize()[..], - 0x567e_355e_0682_e1f1_u64.to_be_bytes() - ); -} - -#[test] -fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_32() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash32::new(); - for byte in bytes.chunks(1) { - byte_by_byte.update(byte); - } - - let mut one_chunk = XxHash32::new(); - one_chunk.update(&bytes); - - assert_eq!(byte_by_byte.finalize(), one_chunk.finalize()); -} - -#[test] -fn hash_of_nothing_matches_c_implementation_32() { - let mut hasher = XxHash32::new(); - hasher.update(&[]); - assert_eq!(hasher.finalize()[..], 0x02cc_5d05_u32.to_be_bytes()); -} - -#[test] -fn hash_of_single_byte_matches_c_implementation_32() { - let mut hasher = XxHash32::new(); - hasher.update(&[42]); - assert_eq!(hasher.finalize()[..], 0xe0fe_705f_u32.to_be_bytes()); -} - -#[test] -fn hash_of_multiple_bytes_matches_c_implementation_32() { - assert_eq!( - XxHash32::digest(b"Hello, world!\0")[..], - 0x9e5e_7e93_u32.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_chunks_matches_c_implementation_32() { - let bytes: Vec<_> = (0..100).collect(); - assert_eq!(XxHash32::digest(&bytes)[..], 0x7f89_ba44_u32.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_matches_c_implementation_32() { - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.update(&[]); - assert_eq!(hasher.finalize()[..], 0xd6bf_8459_u32.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_32() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.update(&bytes); - assert_eq!(hasher.finalize()[..], 0x6d2f_6c17_u32.to_be_bytes()); -} diff --git a/compatibility-tests/digest_0_8/Cargo.toml b/compatibility-tests/digest_0_8/Cargo.toml deleted file mode 100644 index 3b5d3d6ae..000000000 --- a/compatibility-tests/digest_0_8/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "digest_0_8" -version = "0.1.0" -authors = ["Jake Goulding "] -edition = "2018" - -[dependencies] -digest = "0.8" -twox-hash = { path = "../..", features = ["digest"] } diff --git a/compatibility-tests/digest_0_8/src/lib.rs b/compatibility-tests/digest_0_8/src/lib.rs deleted file mode 100644 index 59585a049..000000000 --- a/compatibility-tests/digest_0_8/src/lib.rs +++ /dev/null @@ -1,130 +0,0 @@ -#![cfg(test)] - -use digest::Digest; -use twox_hash::{XxHash32, XxHash64}; - -#[test] -fn it_implements_digest() { - fn implements_digest() {} - - implements_digest::(); -} - -#[test] -fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_64() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash64::new(); - for byte in bytes.chunks(1) { - byte_by_byte.input(byte); - } - - let mut one_chunk = XxHash64::new(); - one_chunk.input(&bytes); - - assert_eq!(byte_by_byte.result(), one_chunk.result()); -} - -#[test] -fn hash_of_nothing_matches_c_implementation_64() { - let mut hasher = XxHash64::new(); - hasher.input(&[]); - assert_eq!(hasher.result()[..], 0xef46_db37_51d8_e999_u64.to_be_bytes()); -} - -#[test] -fn hash_of_single_byte_matches_c_implementation_64() { - let mut hasher = XxHash64::new(); - hasher.input(&[42]); - assert_eq!(hasher.result()[..], 0x0a9e_dece_beb0_3ae4_u64.to_be_bytes()); -} - -#[test] -fn hash_of_multiple_bytes_matches_c_implementation_64() { - assert_eq!( - XxHash64::digest(b"Hello, world!\0")[..], - 0x7b06_c531_ea43_e89f_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_chunks_matches_c_implementation_64() { - let bytes: Vec<_> = (0..100).collect(); - assert_eq!( - XxHash64::digest(&bytes)[..], - 0x6ac1_e580_3216_6597_u64.to_be_bytes() - ); -} - -#[test] -fn hash_with_different_seed_matches_c_implementation_64() { - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.input(&[]); - assert_eq!(hasher.result()[..], 0x4b6a_04fc_df7a_4672_u64.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_64() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.input(&bytes); - assert_eq!(hasher.result()[..], 0x567e_355e_0682_e1f1_u64.to_be_bytes()); -} - -#[test] -fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_32() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash32::new(); - for byte in bytes.chunks(1) { - byte_by_byte.input(byte); - } - - let mut one_chunk = XxHash32::new(); - one_chunk.input(&bytes); - - assert_eq!(byte_by_byte.result(), one_chunk.result()); -} - -#[test] -fn hash_of_nothing_matches_c_implementation_32() { - let mut hasher = XxHash32::new(); - hasher.input(&[]); - assert_eq!(hasher.result()[..], 0x02cc_5d05_u32.to_be_bytes()); -} - -#[test] -fn hash_of_single_byte_matches_c_implementation_32() { - let mut hasher = XxHash32::new(); - hasher.input(&[42]); - assert_eq!(hasher.result()[..], 0xe0fe_705f_u32.to_be_bytes()); -} - -#[test] -fn hash_of_multiple_bytes_matches_c_implementation_32() { - assert_eq!( - XxHash32::digest(b"Hello, world!\0")[..], - 0x9e5e_7e93_u32.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_chunks_matches_c_implementation_32() { - let bytes: Vec<_> = (0..100).collect(); - assert_eq!(XxHash32::digest(&bytes)[..], 0x7f89_ba44_u32.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_matches_c_implementation_32() { - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.input(&[]); - assert_eq!(hasher.result()[..], 0xd6bf_8459_u32.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_32() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.input(&bytes); - assert_eq!(hasher.result()[..], 0x6d2f_6c17_u32.to_be_bytes()); -} diff --git a/compatibility-tests/digest_0_9/Cargo.toml b/compatibility-tests/digest_0_9/Cargo.toml deleted file mode 100644 index 7d072eaf3..000000000 --- a/compatibility-tests/digest_0_9/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "digest_0_9" -version = "0.1.0" -authors = ["Jake Goulding "] -edition = "2018" - -[dependencies] -digest = "0.9" -twox-hash = { path = "../..", features = ["digest_0_9"] } diff --git a/compatibility-tests/digest_0_9/src/lib.rs b/compatibility-tests/digest_0_9/src/lib.rs deleted file mode 100644 index 66f336519..000000000 --- a/compatibility-tests/digest_0_9/src/lib.rs +++ /dev/null @@ -1,142 +0,0 @@ -#![cfg(test)] - -use digest::Digest; -use twox_hash::{XxHash32, XxHash64}; - -#[test] -fn it_implements_digest() { - fn implements_digest() {} - - implements_digest::(); -} - -#[test] -fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_64() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash64::new(); - for byte in bytes.chunks(1) { - byte_by_byte.update(byte); - } - - let mut one_chunk = XxHash64::new(); - one_chunk.update(&bytes); - - assert_eq!(byte_by_byte.finalize(), one_chunk.finalize()); -} - -#[test] -fn hash_of_nothing_matches_c_implementation_64() { - let mut hasher = XxHash64::new(); - hasher.update(&[]); - assert_eq!( - hasher.finalize()[..], - 0xef46_db37_51d8_e999_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_single_byte_matches_c_implementation_64() { - let mut hasher = XxHash64::new(); - hasher.update(&[42]); - assert_eq!( - hasher.finalize()[..], - 0x0a9e_dece_beb0_3ae4_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_bytes_matches_c_implementation_64() { - assert_eq!( - XxHash64::digest(b"Hello, world!\0")[..], - 0x7b06_c531_ea43_e89f_u64.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_chunks_matches_c_implementation_64() { - let bytes: Vec<_> = (0..100).collect(); - assert_eq!( - XxHash64::digest(&bytes)[..], - 0x6ac1_e580_3216_6597_u64.to_be_bytes() - ); -} - -#[test] -fn hash_with_different_seed_matches_c_implementation_64() { - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.update(&[]); - assert_eq!( - hasher.finalize()[..], - 0x4b6a_04fc_df7a_4672_u64.to_be_bytes() - ); -} - -#[test] -fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_64() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.update(&bytes); - assert_eq!( - hasher.finalize()[..], - 0x567e_355e_0682_e1f1_u64.to_be_bytes() - ); -} - -#[test] -fn ingesting_byte_by_byte_is_equivalent_to_large_chunks_32() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash32::new(); - for byte in bytes.chunks(1) { - byte_by_byte.update(byte); - } - - let mut one_chunk = XxHash32::new(); - one_chunk.update(&bytes); - - assert_eq!(byte_by_byte.finalize(), one_chunk.finalize()); -} - -#[test] -fn hash_of_nothing_matches_c_implementation_32() { - let mut hasher = XxHash32::new(); - hasher.update(&[]); - assert_eq!(hasher.finalize()[..], 0x02cc_5d05_u32.to_be_bytes()); -} - -#[test] -fn hash_of_single_byte_matches_c_implementation_32() { - let mut hasher = XxHash32::new(); - hasher.update(&[42]); - assert_eq!(hasher.finalize()[..], 0xe0fe_705f_u32.to_be_bytes()); -} - -#[test] -fn hash_of_multiple_bytes_matches_c_implementation_32() { - assert_eq!( - XxHash32::digest(b"Hello, world!\0")[..], - 0x9e5e_7e93_u32.to_be_bytes() - ); -} - -#[test] -fn hash_of_multiple_chunks_matches_c_implementation_32() { - let bytes: Vec<_> = (0..100).collect(); - assert_eq!(XxHash32::digest(&bytes)[..], 0x7f89_ba44_u32.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_matches_c_implementation_32() { - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.update(&[]); - assert_eq!(hasher.finalize()[..], 0xd6bf_8459_u32.to_be_bytes()); -} - -#[test] -fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation_32() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.update(&bytes); - assert_eq!(hasher.finalize()[..], 0x6d2f_6c17_u32.to_be_bytes()); -} diff --git a/renu/.gitignore b/renu/.gitignore deleted file mode 100644 index 1b72444ae..000000000 --- a/renu/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/Cargo.lock -/target diff --git a/renu/Cargo.toml b/renu/Cargo.toml deleted file mode 100644 index b0a4ce722..000000000 --- a/renu/Cargo.toml +++ /dev/null @@ -1,57 +0,0 @@ -[package] -name = "twox-hash" -version = "1.6.3" -authors = ["Jake Goulding "] -edition = "2021" -rust-version = "1.81" - -description = "A Rust implementation of the XXHash and XXH3 algorithms" -readme = "README.md" -keywords = ["hash", "hasher", "xxhash", "xxh3"] -categories = ["algorithms"] - -repository = "https://github.com/shepmaster/twox-hash" -documentation = "https://docs.rs/twox-hash/" - -license = "MIT" - -[workspace] -members = [ - "asmasm", - "compare", - "renu-sum", - "xx_hash-sys", -] - -[features] -default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] - -random = ["dep:rand"] - -serialize = ["dep:serde"] - -xxhash32 = [] -xxhash64 = [] -xxhash3_64 = [] - -std = ["alloc"] -alloc = [] - -[lints.rust.unexpected_cfgs] -level = "warn" -check-cfg = [ - 'cfg(_internal_xxhash3_force_scalar)', - 'cfg(_internal_xxhash3_force_neon)', - 'cfg(_internal_xxhash3_force_sse2)', - 'cfg(_internal_xxhash3_force_avx2)', -] - -[dependencies] -rand = { version = "0.8.0", optional = true, default-features = false, features = ["std", "std_rng"] } -serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } - -[dev-dependencies] -serde_json = "1.0.117" - -[package.metadata.docs.rs] -all-features = true diff --git a/renu/README.md b/renu/README.md deleted file mode 100644 index ab987c651..000000000 --- a/renu/README.md +++ /dev/null @@ -1,16 +0,0 @@ -cargo test # unit tests -cargo test -p comparison # proptests -cargo miri test # unsafe -cargo miri test --target s390x-unknown-linux-gnu # big-endian - -cargo -Z profile-rustflags --config 'profile.test.package.xx-renu.rustflags=["--cfg=_internal_xxhash3_force_scalar"]' test - -minimal versions -no-features -all-features - -features for 32 / 64 / xx3 - - -rand feature instead of `std`? -remove digest as we aren't crypto? diff --git a/renu/compare/Cargo.toml b/renu/compare/Cargo.toml deleted file mode 100644 index bc6b0ee81..000000000 --- a/renu/compare/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "compare" -version = "0.1.0" -edition = "2021" - -[lib] -bench = false - -[[bench]] -name = "benchmark" -harness = false - -[dependencies] -criterion = { version = "0.5.1", features = [] } -proptest = "1.5.0" -rand = "0.8.5" -twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] } -xx_hash-sys = { path = "../xx_hash-sys" } diff --git a/renu/compare/README.md b/renu/compare/README.md deleted file mode 100644 index efb868e2b..000000000 --- a/renu/compare/README.md +++ /dev/null @@ -1,202 +0,0 @@ -# Overview - -Tests compare calling [the reference implementation in -C](https://xxhash.com) against equivalent functions in this crate. No -link-time optimization (LTO) is used, so the C performance numbers -have additional overhead for each function call. - -Click any graph to see it full-size. - -# XXHash64 - -## Oneshot hashing - -Compares the **speed** of hashing an entire buffer of data in one -function call. Data sizes from 256 KiB to 4 MiB are tested. These -graphs are boring flat lines, so a table is used instead. - -### aarch64 - -| Implementation | Throughput (GiB/s) | -|----------------|--------------------| -| Rust | 13.4 | -| C | 13.4 | - -## x86_64 - -| Implementation | Throughput (GiB/s) | -|----------------|--------------------| -| Rust | 15.7 | -| C | 15.8 | - - -## Streaming data - -Compares the **speed** of hashing a 1 MiB buffer of data split into -various chunk sizes. - -### aarch64 - - - XXHash64, streaming data, on an aarch64 processor - - -### x86_64 - - - XXHash64, streaming data, on an x86_64 processor - - -## Small amounts of data - -Compares the **time taken** to hash 0 to 32 bytes of data. - -### aarch64 - - - XXHash64, small data, on an aarch64 processor - - -### x86_64 - - - XXHash64, small data, on an x86_64 processor - - - -# XXHash3 (64-bit) - -## Oneshot hashing - -Compares the **speed** of hashing an entire buffer of data in one -function call. Data sizes from 256 KiB to 4 MiB are tested. These -graphs are boring flat lines, so a table is used instead. - -### aarch64 - -| Implementation | Throughput (GiB/s) | -|----------------|--------------------| -| Rust | 34.8 | -| C | 34.8 | -| C (scalar) | 21.0 | -| C (NEON) | 34.7 | - -### x86_64 - -| Implementation | Throughput (GiB/s) | -|----------------|--------------------| -| Rust | 58.3 | -| C | 25.0 | -| C (scalar) | 7.5 | -| C (SSE2) | 25.1 | -| C (AVX2) | 58.1 | - -## Streaming data - -Compares the **speed** of hashing a 1 MiB buffer of data split into -various chunk sizes. - -### aarch64 - - - XXHash3, 64-bit, streaming data, on an aarch64 processor - - -### x86_64 - - - XXHash3, 64-bit, streaming data, on an x86_64 processor - - -## Small amounts of data - -Compares the **time taken** to hash 0 to 230 bytes of -data. Representative samples are taken from similar times to avoid -cluttering the graph and wasting benchmarking time. - -### aarch64 - - - XXHash3, 64-bit, small data, on an aarch64 processor - - -### x86_64 - - - XXHash3, 64-bit, small data, on an x86_64 processor - - -# Benchmark machines - -## Overview - -| CPU | Memory | C compiler | -|-------------------|--------|--------------------| -| Apple M1 Max | 64 GiB | clang 15.0.0 | -| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 | - -Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`. - -## Details - -### aarch64 - - - - - - - - - - - - - - - - -
CPUApple M1 Max
Memory64 GiB
C compilerApple clang version 15.0.0 (clang-1500.3.9.4)
- -### x86_64 - - - - - - - - - - - - - - - - -
CPUAMD Ryzen 9 3950X 16-Core Processor, 3501 Mhz, 16 Core(s), 32 Logical Processor(s)
Memory32 GiB (3600 MT/s)
C compilerMicrosoft (R) C/C++ Optimizing Compiler Version 19.41.34120 for x86
diff --git a/renu/compare/src/lib.rs b/renu/compare/src/lib.rs deleted file mode 100644 index 7701a8314..000000000 --- a/renu/compare/src/lib.rs +++ /dev/null @@ -1,378 +0,0 @@ -#![cfg(test)] - -use proptest::{num, prelude::*}; - -use twox_hash as rust; -use xx_hash_sys as c; - -mod xxhash32 { - use proptest::{prelude::*, test_runner::TestCaseResult}; - use std::hash::Hasher as _; - - use super::*; - - proptest! { - #[test] - fn oneshot_same_as_one_chunk(seed: u32, data: Vec) { - oneshot_same_as_one_chunk_impl(seed, &data)?; - } - - #[test] - fn oneshot_same_as_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { - oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; - } - - #[test] - fn oneshot_same_as_many_chunks(seed: u32, (data, chunks) in data_and_chunks()) { - oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; - } - - #[test] - fn oneshot(seed: u32, data: Vec) { - oneshot_impl(seed, &data)?; - } - - #[test] - fn oneshot_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { - oneshot_impl(seed, &data[offset..])?; - } - - #[test] - fn streaming_one_chunk(seed: u32, data: Vec) { - streaming_one_chunk_impl(seed, &data)?; - } - - #[test] - fn streaming_one_chunk_with_an_offset(seed: u32, (data, offset) in vec_and_index()) { - streaming_one_chunk_impl(seed, &data[offset..])?; - } - } - - fn oneshot_same_as_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult { - let oneshot = rust::XxHash32::oneshot(seed, data); - let one_chunk = { - let mut hasher = rust::XxHash32::with_seed(seed); - hasher.write(data); - hasher.finish_32() - }; - - prop_assert_eq!(oneshot, one_chunk); - Ok(()) - } - - fn oneshot_same_as_many_chunks_impl( - seed: u32, - data: &[u8], - chunks: &[Vec], - ) -> TestCaseResult { - let oneshot = rust::XxHash32::oneshot(seed, data); - let many_chunks = { - let mut hasher = rust::XxHash32::with_seed(seed); - for chunk in chunks { - hasher.write(chunk); - } - hasher.finish_32() - }; - - prop_assert_eq!(oneshot, many_chunks); - Ok(()) - } - - fn oneshot_impl(seed: u32, data: &[u8]) -> TestCaseResult { - let native = c::XxHash32::oneshot(seed, data); - let rust = rust::XxHash32::oneshot(seed, data); - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn streaming_one_chunk_impl(seed: u32, data: &[u8]) -> TestCaseResult { - let native = { - let mut hasher = c::XxHash32::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - let rust = { - let mut hasher = rust::XxHash32::with_seed(seed); - hasher.write(data); - hasher.finish_32() - }; - - prop_assert_eq!(native, rust); - Ok(()) - } -} - -mod xxhash64 { - use proptest::{prelude::*, test_runner::TestCaseResult}; - use std::hash::Hasher as _; - - use super::*; - - proptest! { - #[test] - fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { - oneshot_same_as_one_chunk_impl(seed, &data)?; - } - - #[test] - fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; - } - - #[test] - fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { - oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; - } - - #[test] - fn oneshot(seed: u64, data: Vec) { - oneshot_impl(seed, &data)?; - } - - #[test] - fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - oneshot_impl(seed, &data[offset..])?; - } - - #[test] - fn streaming_one_chunk(seed: u64, data: Vec) { - streaming_one_chunk_impl(seed, &data)?; - } - - #[test] - fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - streaming_one_chunk_impl(seed, &data[offset..])?; - } - } - - fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let oneshot = rust::XxHash64::oneshot(seed, data); - let one_chunk = { - let mut hasher = rust::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - prop_assert_eq!(oneshot, one_chunk); - Ok(()) - } - - fn oneshot_same_as_many_chunks_impl( - seed: u64, - data: &[u8], - chunks: &[Vec], - ) -> TestCaseResult { - let oneshot = rust::XxHash64::oneshot(seed, data); - let many_chunks = { - let mut hasher = rust::XxHash64::with_seed(seed); - for chunk in chunks { - hasher.write(chunk); - } - hasher.finish() - }; - - prop_assert_eq!(oneshot, many_chunks); - Ok(()) - } - - fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = c::XxHash64::oneshot(seed, data); - let rust = rust::XxHash64::oneshot(seed, data); - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = { - let mut hasher = c::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - let rust = { - let mut hasher = rust::XxHash64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - prop_assert_eq!(native, rust); - Ok(()) - } -} - -mod xxhash3_64 { - use proptest::{prelude::*, test_runner::TestCaseResult}; - use std::hash::Hasher as _; - use twox_hash::xxhash3_64::SECRET_MINIMUM_LENGTH; - - use super::*; - - proptest! { - #[test] - fn oneshot_same_as_one_chunk(seed: u64, data: Vec) { - oneshot_same_as_one_chunk_impl(seed, &data)?; - } - - #[test] - fn oneshot_same_as_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - oneshot_same_as_one_chunk_impl(seed, &data[offset..])?; - } - - #[test] - fn oneshot_same_as_many_chunks(seed: u64, (data, chunks) in data_and_chunks()) { - oneshot_same_as_many_chunks_impl(seed, &data, &chunks)?; - } - - #[test] - fn oneshot(seed: u64, data: Vec) { - oneshot_impl(seed, &data)?; - } - - #[test] - fn oneshot_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - oneshot_impl(seed, &data[offset..])?; - } - - #[test] - fn oneshot_with_a_secret(secret in secret(), data: Vec) { - oneshot_with_secret_impl(&secret, &data)?; - } - - #[test] - fn oneshot_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec) { - oneshot_with_seed_and_secret_impl(seed, &secret, &data)?; - } - - #[test] - fn streaming_one_chunk(seed: u64, data: Vec) { - streaming_one_chunk_impl(seed, &data)?; - } - - #[test] - fn streaming_one_chunk_with_an_offset(seed: u64, (data, offset) in vec_and_index()) { - streaming_one_chunk_impl(seed, &data[offset..])?; - } - - #[test] - fn streaming_with_a_seed_and_secret(seed: u64, secret in secret(), data: Vec) { - streaming_with_seed_and_secret_impl(seed, &secret, &data)?; - } - } - - fn oneshot_same_as_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data); - let one_chunk = { - let mut hasher = rust::XxHash3_64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - prop_assert_eq!(oneshot, one_chunk); - Ok(()) - } - - fn oneshot_same_as_many_chunks_impl( - seed: u64, - data: &[u8], - chunks: &[Vec], - ) -> TestCaseResult { - let oneshot = rust::XxHash3_64::oneshot_with_seed(seed, data); - let many_chunks = { - let mut hasher = rust::XxHash3_64::with_seed(seed); - for chunk in chunks { - hasher.write(chunk); - } - hasher.finish() - }; - - prop_assert_eq!(oneshot, many_chunks); - Ok(()) - } - - fn oneshot_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = c::XxHash3_64::oneshot_with_seed(seed, data); - let rust = rust::XxHash3_64::oneshot_with_seed(seed, data); - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn oneshot_with_secret_impl(secret: &[u8], data: &[u8]) -> TestCaseResult { - let native = c::XxHash3_64::oneshot_with_secret(secret, data); - let rust = rust::XxHash3_64::oneshot_with_secret(secret, data).unwrap(); - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn oneshot_with_seed_and_secret_impl(seed: u64, secret: &[u8], data: &[u8]) -> TestCaseResult { - let native = c::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data); - let rust = rust::XxHash3_64::oneshot_with_seed_and_secret(seed, secret, data).unwrap(); - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn streaming_one_chunk_impl(seed: u64, data: &[u8]) -> TestCaseResult { - let native = { - let mut hasher = c::XxHash3_64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - let rust = { - let mut hasher = rust::XxHash3_64::with_seed(seed); - hasher.write(data); - hasher.finish() - }; - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn streaming_with_seed_and_secret_impl( - seed: u64, - secret: &[u8], - data: &[u8], - ) -> TestCaseResult { - let native = { - let mut hasher = c::XxHash3_64::with_seed_and_secret(seed, secret); - for chunk in data.chunks(256) { - hasher.write(chunk); - } - hasher.finish() - }; - - let rust = { - let mut hasher = rust::XxHash3_64::with_seed_and_secret(seed, secret).unwrap(); - for chunk in data.chunks(256) { - hasher.write(chunk); - } - hasher.finish() - }; - - prop_assert_eq!(native, rust); - Ok(()) - } - - fn secret() -> impl Strategy> { - prop::collection::vec(num::u8::ANY, SECRET_MINIMUM_LENGTH..1024) - } -} - -fn vec_and_index() -> impl Strategy, usize)> { - prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| { - let len = vec.len(); - (Just(vec), 0..len) - }) -} - -fn data_and_chunks() -> impl Strategy, Vec>)> { - prop::collection::vec(prop::collection::vec(num::u8::ANY, 0..100), 0..100).prop_map(|vs| { - let data = vs.iter().flatten().copied().collect(); - (data, vs) - }) -} diff --git a/renu/src/lib.rs b/renu/src/lib.rs deleted file mode 100644 index 2ee51fb45..000000000 --- a/renu/src/lib.rs +++ /dev/null @@ -1,143 +0,0 @@ -//! A Rust implementation of the [XXHash][] algorithm. -//! -//! [XXHash]: https://github.com/Cyan4973/xxHash -//! -//! ## Hashing arbitrary data -//! -//! ### When all the data is available at once -//! -//! ```rust -//! use twox_hash::XxHash64; -//! -//! let seed = 1234; -//! let hash = XxHash64::oneshot(seed, b"some bytes"); -//! assert_eq!(0xeab5_5659_a496_d78b, hash); -//! ``` -//! -//! ### When the data is streaming -//! -//! ```rust -//! use std::hash::Hasher as _; -//! use twox_hash::XxHash64; -//! -//! let seed = 1234; -//! let mut hasher = XxHash64::with_seed(seed); -//! hasher.write(b"some"); -//! hasher.write(b" "); -//! hasher.write(b"bytes"); -//! let hash = hasher.finish(); -//! assert_eq!(0xeab5_5659_a496_d78b, hash); -//! ``` -//! -//! ## In a [`HashMap`](std::collections::HashMap) -//! -//! ### With a default seed -//! -//! ```rust -//! use std::{collections::HashMap, hash::BuildHasherDefault}; -//! use twox_hash::XxHash64; -//! -//! let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); -//! hash.insert(42, "the answer"); -//! assert_eq!(hash.get(&42), Some(&"the answer")); -//! ``` -//! -//! ### With a random seed -//! -//! ```rust -//! use std::collections::HashMap; -//! use twox_hash::xxhash64; -//! -//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); -//! hash.insert(42, "the answer"); -//! assert_eq!(hash.get(&42), Some(&"the answer")); -//! ``` -//! -//! ### With a fixed seed -//! -//! ```rust -//! use std::collections::HashMap; -//! use twox_hash::xxhash64; -//! -//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); -//! hash.insert(42, "the answer"); -//! assert_eq!(hash.get(&42), Some(&"the answer")); -//! ``` - -#![deny(rust_2018_idioms)] -#![deny(missing_docs)] -#![cfg_attr(not(feature = "std"), no_std)] -#![cfg_attr(docsrs, feature(doc_cfg))] - -#[cfg(feature = "alloc")] -extern crate alloc; - -#[cfg(any(feature = "std", doc, test))] -extern crate std; - -#[cfg(feature = "xxhash32")] -#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] -pub mod xxhash32; - -#[cfg(feature = "xxhash32")] -#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] -pub use xxhash32::Hasher as XxHash32; - -#[cfg(feature = "xxhash64")] -#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] -pub mod xxhash64; - -#[cfg(feature = "xxhash64")] -#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] -pub use xxhash64::Hasher as XxHash64; - -#[cfg(feature = "xxhash3_64")] -#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] -pub mod xxhash3_64; - -#[cfg(feature = "xxhash3_64")] -#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] -pub use xxhash3_64::Hasher as XxHash3_64; - -trait IntoU32 { - fn into_u32(self) -> u32; -} - -impl IntoU32 for u8 { - fn into_u32(self) -> u32 { - self.into() - } -} - -trait IntoU64 { - fn into_u64(self) -> u64; -} - -impl IntoU64 for u8 { - fn into_u64(self) -> u64 { - self.into() - } -} - -impl IntoU64 for u32 { - fn into_u64(self) -> u64 { - self.into() - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -impl IntoU64 for usize { - fn into_u64(self) -> u64 { - self as u64 - } -} - -trait IntoU128 { - fn into_u128(self) -> u128; -} - -impl IntoU128 for u64 { - fn into_u128(self) -> u128 { - u128::from(self) - } -} diff --git a/src/bin/hash_file.rs b/src/bin/hash_file.rs deleted file mode 100644 index 509b48d68..000000000 --- a/src/bin/hash_file.rs +++ /dev/null @@ -1,28 +0,0 @@ -use std::env; -use std::fs::File; -use std::hash::Hasher; -use std::io::{BufRead, BufReader}; -use twox_hash::XxHash64; - -fn main() { - for arg in env::args().skip(1) { - let f = File::open(&arg).unwrap(); - let mut f = BufReader::new(f); - - let mut hasher = XxHash64::with_seed(0); - - loop { - let consumed = { - let bytes = f.fill_buf().unwrap(); - if bytes.is_empty() { - break; - } - hasher.write(bytes); - bytes.len() - }; - f.consume(consumed); - } - - println!("{:16x} {}", hasher.finish(), arg); - } -} diff --git a/src/digest_0_10_support.rs b/src/digest_0_10_support.rs deleted file mode 100644 index 935c09692..000000000 --- a/src/digest_0_10_support.rs +++ /dev/null @@ -1,92 +0,0 @@ -use core::hash::Hasher; - -use digest_0_10::{ - generic_array::typenum::consts::{U16, U4, U8}, - FixedOutput, HashMarker, Output, OutputSizeUser, Update, -}; - -use crate::{xxh3, XxHash32, XxHash64}; - -// ---------- - -impl Update for XxHash32 { - fn update(&mut self, data: &[u8]) { - self.write(data); - } -} - -impl OutputSizeUser for XxHash32 { - type OutputSize = U4; -} - -impl FixedOutput for XxHash32 { - fn finalize_into(self, out: &mut Output) { - let tmp: &mut [u8; 4] = out.as_mut(); - *tmp = self.finish().to_be_bytes(); - } -} - -impl HashMarker for XxHash32 {} - -// ---------- - -impl Update for XxHash64 { - fn update(&mut self, data: &[u8]) { - self.write(data); - } -} - -impl OutputSizeUser for XxHash64 { - type OutputSize = U8; -} - -impl FixedOutput for XxHash64 { - fn finalize_into(self, out: &mut Output) { - let tmp: &mut [u8; 8] = out.as_mut(); - *tmp = self.finish().to_be_bytes(); - } -} - -impl HashMarker for XxHash64 {} - -// ---------- - -impl Update for xxh3::Hash64 { - fn update(&mut self, data: &[u8]) { - self.write(data); - } -} - -impl OutputSizeUser for xxh3::Hash64 { - type OutputSize = U8; -} - -impl FixedOutput for xxh3::Hash64 { - fn finalize_into(self, out: &mut Output) { - let tmp: &mut [u8; 8] = out.as_mut(); - *tmp = self.finish().to_be_bytes(); - } -} - -impl HashMarker for xxh3::Hash64 {} - -// ---------- - -impl Update for xxh3::Hash128 { - fn update(&mut self, data: &[u8]) { - self.write(data); - } -} - -impl OutputSizeUser for xxh3::Hash128 { - type OutputSize = U16; -} - -impl FixedOutput for xxh3::Hash128 { - fn finalize_into(self, out: &mut Output) { - let tmp: &mut [u8; 16] = out.as_mut(); - *tmp = xxh3::HasherExt::finish_ext(&self).to_be_bytes(); - } -} - -impl HashMarker for xxh3::Hash128 {} diff --git a/src/digest_0_9_support.rs b/src/digest_0_9_support.rs deleted file mode 100644 index 67788cd6c..000000000 --- a/src/digest_0_9_support.rs +++ /dev/null @@ -1,179 +0,0 @@ -use core::hash::Hasher; - -use digest_0_9::{ - generic_array::{ - typenum::consts::{U16, U4, U8}, - GenericArray, - }, - Digest, -}; - -use crate::{xxh3, XxHash32, XxHash64}; - -impl Digest for XxHash32 { - type OutputSize = U4; - - fn new() -> Self { - Self::default() - } - - fn update(&mut self, data: impl AsRef<[u8]>) { - self.write(data.as_ref()); - } - - fn chain(mut self, data: impl AsRef<[u8]>) -> Self - where - Self: Sized, - { - self.update(data); - self - } - - fn finalize(self) -> GenericArray { - self.finish().to_be_bytes().into() - } - - fn finalize_reset(&mut self) -> GenericArray { - let result = self.finalize(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 4 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).finalize() - } -} - -impl Digest for XxHash64 { - type OutputSize = U8; - - fn new() -> Self { - Self::default() - } - - fn update(&mut self, data: impl AsRef<[u8]>) { - self.write(data.as_ref()); - } - - fn chain(mut self, data: impl AsRef<[u8]>) -> Self - where - Self: Sized, - { - self.update(data); - self - } - - fn finalize(self) -> GenericArray { - self.finish().to_be_bytes().into() - } - - fn finalize_reset(&mut self) -> GenericArray { - let result = self.finalize(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 8 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).finalize() - } -} - -impl Digest for xxh3::Hash64 { - type OutputSize = U8; - - fn new() -> Self { - Self::default() - } - - fn update(&mut self, data: impl AsRef<[u8]>) { - self.write(data.as_ref()); - } - - fn chain(mut self, data: impl AsRef<[u8]>) -> Self - where - Self: Sized, - { - self.update(data); - self - } - - fn finalize(self) -> GenericArray { - self.finish().to_be_bytes().into() - } - - fn finalize_reset(&mut self) -> GenericArray { - let result = self.clone().finalize(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 8 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).finalize() - } -} - -impl Digest for xxh3::Hash128 { - type OutputSize = U16; - - fn new() -> Self { - Self::default() - } - - fn update(&mut self, data: impl AsRef<[u8]>) { - self.write(data.as_ref()); - } - - fn chain(mut self, data: impl AsRef<[u8]>) -> Self - where - Self: Sized, - { - self.update(data); - self - } - - fn finalize(self) -> GenericArray { - xxh3::HasherExt::finish_ext(&self).to_be_bytes().into() - } - - fn finalize_reset(&mut self) -> GenericArray { - let result = self.clone().finalize(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 8 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).finalize() - } -} diff --git a/src/digest_support.rs b/src/digest_support.rs deleted file mode 100644 index 7b00b9d80..000000000 --- a/src/digest_support.rs +++ /dev/null @@ -1,179 +0,0 @@ -use core::hash::Hasher; - -use digest::{ - generic_array::{ - typenum::consts::{U16, U4, U8}, - GenericArray, - }, - Digest, -}; - -use crate::{xxh3, XxHash32, XxHash64}; - -impl Digest for XxHash32 { - type OutputSize = U4; - - fn new() -> Self { - Self::default() - } - - fn input>(&mut self, data: B) { - self.write(data.as_ref()); - } - - fn chain>(mut self, data: B) -> Self - where - Self: Sized, - { - self.input(data); - self - } - - fn result(self) -> GenericArray { - self.finish().to_be_bytes().into() - } - - fn result_reset(&mut self) -> GenericArray { - let result = self.result(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 4 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).result() - } -} - -impl Digest for XxHash64 { - type OutputSize = U8; - - fn new() -> Self { - Self::default() - } - - fn input>(&mut self, data: B) { - self.write(data.as_ref()); - } - - fn chain>(mut self, data: B) -> Self - where - Self: Sized, - { - self.input(data); - self - } - - fn result(self) -> GenericArray { - self.finish().to_be_bytes().into() - } - - fn result_reset(&mut self) -> GenericArray { - let result = self.result(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 8 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).result() - } -} - -impl Digest for xxh3::Hash64 { - type OutputSize = U8; - - fn new() -> Self { - Self::default() - } - - fn input>(&mut self, data: B) { - self.write(data.as_ref()); - } - - fn chain>(mut self, data: B) -> Self - where - Self: Sized, - { - self.input(data); - self - } - - fn result(self) -> GenericArray { - self.finish().to_be_bytes().into() - } - - fn result_reset(&mut self) -> GenericArray { - let result = self.clone().result(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 8 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).result() - } -} - -impl Digest for xxh3::Hash128 { - type OutputSize = U16; - - fn new() -> Self { - Self::default() - } - - fn input>(&mut self, data: B) { - self.write(data.as_ref()); - } - - fn chain>(mut self, data: B) -> Self - where - Self: Sized, - { - self.input(data); - self - } - - fn result(self) -> GenericArray { - xxh3::HasherExt::finish_ext(&self).to_be_bytes().into() - } - - fn result_reset(&mut self) -> GenericArray { - let result = self.clone().result(); - self.reset(); - result - } - - fn reset(&mut self) { - *self = Self::default(); - } - - fn output_size() -> usize { - 8 - } - - fn digest(data: &[u8]) -> GenericArray { - Self::new().chain(data).result() - } -} diff --git a/src/lib.rs b/src/lib.rs index 414dc8d42..2ee51fb45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,43 @@ -//! A Rust implementation of the [XXHash] algorithm. +//! A Rust implementation of the [XXHash][] algorithm. //! //! [XXHash]: https://github.com/Cyan4973/xxHash //! -//! ### With a fixed seed +//! ## Hashing arbitrary data +//! +//! ### When all the data is available at once //! //! ```rust -//! use std::hash::BuildHasherDefault; -//! use std::collections::HashMap; //! use twox_hash::XxHash64; //! -//! let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); +//! let seed = 1234; +//! let hash = XxHash64::oneshot(seed, b"some bytes"); +//! assert_eq!(0xeab5_5659_a496_d78b, hash); +//! ``` +//! +//! ### When the data is streaming +//! +//! ```rust +//! use std::hash::Hasher as _; +//! use twox_hash::XxHash64; +//! +//! let seed = 1234; +//! let mut hasher = XxHash64::with_seed(seed); +//! hasher.write(b"some"); +//! hasher.write(b" "); +//! hasher.write(b"bytes"); +//! let hash = hasher.finish(); +//! assert_eq!(0xeab5_5659_a496_d78b, hash); +//! ``` +//! +//! ## In a [`HashMap`](std::collections::HashMap) +//! +//! ### With a default seed +//! +//! ```rust +//! use std::{collections::HashMap, hash::BuildHasherDefault}; +//! use twox_hash::XxHash64; +//! +//! let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); //! hash.insert(42, "the answer"); //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` @@ -18,104 +46,98 @@ //! //! ```rust //! use std::collections::HashMap; -//! use twox_hash::RandomXxHashBuilder64; +//! use twox_hash::xxhash64; +//! +//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); +//! hash.insert(42, "the answer"); +//! assert_eq!(hash.get(&42), Some(&"the answer")); +//! ``` +//! +//! ### With a fixed seed +//! +//! ```rust +//! use std::collections::HashMap; +//! use twox_hash::xxhash64; //! -//! let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); +//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); //! hash.insert(42, "the answer"); //! assert_eq!(hash.get(&42), Some(&"the answer")); //! ``` -#![no_std] +#![deny(rust_2018_idioms)] +#![deny(missing_docs)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#[cfg(feature = "alloc")] extern crate alloc; -#[cfg(test)] +#[cfg(any(feature = "std", doc, test))] extern crate std; -use core::{marker::PhantomData, mem}; - -mod sixty_four; -mod thirty_two; -pub mod xxh3; - -#[cfg(feature = "std")] -mod std_support; -#[cfg(feature = "std")] -pub use std_support::sixty_four::RandomXxHashBuilder64; -#[cfg(feature = "std")] -pub use std_support::thirty_two::RandomXxHashBuilder32; -#[cfg(feature = "std")] -pub use std_support::xxh3::{ - RandomHashBuilder128 as RandomXxh3HashBuilder128, - RandomHashBuilder64 as RandomXxh3HashBuilder64, -}; - -#[cfg(feature = "digest")] -mod digest_support; - -#[cfg(feature = "digest_0_9")] -mod digest_0_9_support; - -#[cfg(feature = "digest_0_10")] -mod digest_0_10_support; - -pub use crate::sixty_four::XxHash64; -pub use crate::thirty_two::XxHash32; -pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64}; - -/// A backwards compatibility type alias. Consider directly using -/// `XxHash64` instead. -pub type XxHash = XxHash64; - -#[cfg(feature = "std")] -/// A backwards compatibility type alias. Consider directly using -/// `RandomXxHashBuilder64` instead. -pub type RandomXxHashBuilder = RandomXxHashBuilder64; - -/// An unaligned buffer with iteration support for `UnalignedItem`. -struct UnalignedBuffer<'a, T> { - buf: &'a [u8], - phantom: PhantomData, +#[cfg(feature = "xxhash32")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] +pub mod xxhash32; + +#[cfg(feature = "xxhash32")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] +pub use xxhash32::Hasher as XxHash32; + +#[cfg(feature = "xxhash64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] +pub mod xxhash64; + +#[cfg(feature = "xxhash64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] +pub use xxhash64::Hasher as XxHash64; + +#[cfg(feature = "xxhash3_64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] +pub mod xxhash3_64; + +#[cfg(feature = "xxhash3_64")] +#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] +pub use xxhash3_64::Hasher as XxHash3_64; + +trait IntoU32 { + fn into_u32(self) -> u32; } -/// Types implementing this trait must be transmutable from a `*const -/// u8` to `*const Self` at any possible alignment. -/// -/// The intent is to use this with only primitive integer types (and -/// tightly-packed arrays of those integers). -#[allow(clippy::missing_safety_doc)] -unsafe trait UnalignedItem {} - -unsafe impl UnalignedItem for [u64; 4] {} -unsafe impl UnalignedItem for [u32; 4] {} -unsafe impl UnalignedItem for u64 {} -unsafe impl UnalignedItem for u32 {} - -impl<'a, T: UnalignedItem> UnalignedBuffer<'a, T> { - #[inline] - fn new(buf: &'a [u8]) -> Self { - Self { - buf, - phantom: PhantomData, - } +impl IntoU32 for u8 { + fn into_u32(self) -> u32 { + self.into() } +} - #[inline] - fn remaining(&self) -> &[u8] { - self.buf +trait IntoU64 { + fn into_u64(self) -> u64; +} + +impl IntoU64 for u8 { + fn into_u64(self) -> u64 { + self.into() + } +} + +impl IntoU64 for u32 { + fn into_u64(self) -> u64 { + self.into() } } -impl<'a, T: UnalignedItem> Iterator for UnalignedBuffer<'a, T> { - type Item = T; - - fn next(&mut self) -> Option { - let size = mem::size_of::(); - self.buf.get(size..).map(|remaining| { - // `self.buf` has at least `size` bytes that can be read as `T`. - let result = unsafe { (self.buf.as_ptr() as *const T).read_unaligned() }; - self.buf = remaining; - result - }) +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl IntoU64 for usize { + fn into_u64(self) -> u64 { + self as u64 + } +} + +trait IntoU128 { + fn into_u128(self) -> u128; +} + +impl IntoU128 for u64 { + fn into_u128(self) -> u128 { + u128::from(self) } } diff --git a/src/sixty_four.rs b/src/sixty_four.rs deleted file mode 100644 index c15158693..000000000 --- a/src/sixty_four.rs +++ /dev/null @@ -1,413 +0,0 @@ -use crate::UnalignedBuffer; -use core::{cmp, hash::Hasher}; - -#[cfg(feature = "serialize")] -use serde::{Deserialize, Serialize}; - -const CHUNK_SIZE: usize = 32; - -pub const PRIME_1: u64 = 11_400_714_785_074_694_791; -pub const PRIME_2: u64 = 14_029_467_366_897_019_727; -pub const PRIME_3: u64 = 1_609_587_929_392_839_161; -pub const PRIME_4: u64 = 9_650_029_242_287_828_579; -pub const PRIME_5: u64 = 2_870_177_450_012_600_261; - -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Copy, Clone, PartialEq)] -struct XxCore { - v1: u64, - v2: u64, - v3: u64, - v4: u64, -} - -/// Calculates the 64-bit hash. -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct XxHash64 { - total_len: u64, - seed: u64, - core: XxCore, - #[cfg_attr(feature = "serialize", serde(flatten))] - buffer: Buffer, -} - -impl XxCore { - fn with_seed(seed: u64) -> XxCore { - XxCore { - v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), - v2: seed.wrapping_add(PRIME_2), - v3: seed, - v4: seed.wrapping_sub(PRIME_1), - } - } - - #[inline(always)] - fn ingest_chunks(&mut self, values: I) - where - I: IntoIterator, - { - #[inline(always)] - fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 { - value = value.wrapping_mul(PRIME_2); - current_value = current_value.wrapping_add(value); - current_value = current_value.rotate_left(31); - current_value.wrapping_mul(PRIME_1) - } - - // By drawing these out, we can avoid going back and forth to - // memory. It only really helps for large files, when we need - // to iterate multiple times here. - - let mut v1 = self.v1; - let mut v2 = self.v2; - let mut v3 = self.v3; - let mut v4 = self.v4; - - for [n1, n2, n3, n4] in values { - v1 = ingest_one_number(v1, n1.to_le()); - v2 = ingest_one_number(v2, n2.to_le()); - v3 = ingest_one_number(v3, n3.to_le()); - v4 = ingest_one_number(v4, n4.to_le()); - } - - self.v1 = v1; - self.v2 = v2; - self.v3 = v3; - self.v4 = v4; - } - - #[inline(always)] - fn finish(&self) -> u64 { - // The original code pulls out local vars for v[1234] - // here. Performance tests did not show that to be effective - // here, presumably because this method is not called in a - // tight loop. - - #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel - let mut hash; - - hash = self.v1.rotate_left(1); - hash = hash.wrapping_add(self.v2.rotate_left(7)); - hash = hash.wrapping_add(self.v3.rotate_left(12)); - hash = hash.wrapping_add(self.v4.rotate_left(18)); - - #[inline(always)] - fn mix_one(mut hash: u64, mut value: u64) -> u64 { - value = value.wrapping_mul(PRIME_2); - value = value.rotate_left(31); - value = value.wrapping_mul(PRIME_1); - hash ^= value; - hash = hash.wrapping_mul(PRIME_1); - hash.wrapping_add(PRIME_4) - } - - hash = mix_one(hash, self.v1); - hash = mix_one(hash, self.v2); - hash = mix_one(hash, self.v3); - hash = mix_one(hash, self.v4); - - hash - } -} - -impl core::fmt::Debug for XxCore { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { - write!( - f, - "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", - self.v1, self.v2, self.v3, self.v4 - ) - } -} - -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, Default, PartialEq)] -#[repr(align(8))] -#[cfg_attr(feature = "serialize", serde(transparent))] -struct AlignToU64(T); - -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, Default, PartialEq)] -struct Buffer { - #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] - data: AlignToU64<[u8; CHUNK_SIZE]>, - #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] - len: usize, -} - -impl Buffer { - fn data(&self) -> &[u8] { - &self.data.0[..self.len] - } - - /// Consumes as much of the parameter as it can, returning the unused part. - fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { - let to_use = cmp::min(self.available(), data.len()); - let (data, remaining) = data.split_at(to_use); - self.data.0[self.len..][..to_use].copy_from_slice(data); - self.len += to_use; - remaining - } - - fn set_data(&mut self, data: &[u8]) { - debug_assert!(self.is_empty()); - debug_assert!(data.len() < CHUNK_SIZE); - self.data.0[..data.len()].copy_from_slice(data); - self.len = data.len(); - } - - fn available(&self) -> usize { - CHUNK_SIZE - self.len - } - - fn is_empty(&self) -> bool { - self.len == 0 - } - - fn is_full(&self) -> bool { - self.len == CHUNK_SIZE - } -} - -impl XxHash64 { - /// Constructs the hash with an initial seed - pub fn with_seed(seed: u64) -> XxHash64 { - XxHash64 { - total_len: 0, - seed, - core: XxCore::with_seed(seed), - buffer: Buffer::default(), - } - } - - pub(crate) fn write(&mut self, bytes: &[u8]) { - let remaining = self.maybe_consume_bytes(bytes); - if !remaining.is_empty() { - let mut remaining = UnalignedBuffer::new(remaining); - self.core.ingest_chunks(&mut remaining); - self.buffer.set_data(remaining.remaining()); - } - self.total_len += bytes.len() as u64; - } - - // Consume bytes and try to make `self.buffer` empty. - // If there are not enough bytes, `self.buffer` can be non-empty, and this - // function returns an empty slice. - fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { - if self.buffer.is_empty() { - data - } else { - let data = self.buffer.consume(data); - if self.buffer.is_full() { - let mut u64s = UnalignedBuffer::new(self.buffer.data()); - self.core.ingest_chunks(&mut u64s); - debug_assert!(u64s.remaining().is_empty()); - self.buffer.len = 0; - } - data - } - } - - pub(crate) fn finish(&self) -> u64 { - let mut hash = if self.total_len >= CHUNK_SIZE as u64 { - // We have processed at least one full chunk - self.core.finish() - } else { - self.seed.wrapping_add(PRIME_5) - }; - - hash = hash.wrapping_add(self.total_len); - - let mut buffered_u64s = UnalignedBuffer::::new(self.buffer.data()); - for buffered_u64 in &mut buffered_u64s { - let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2); - k1 = k1.rotate_left(31); - k1 = k1.wrapping_mul(PRIME_1); - hash ^= k1; - hash = hash.rotate_left(27); - hash = hash.wrapping_mul(PRIME_1); - hash = hash.wrapping_add(PRIME_4); - } - - let mut buffered_u32s = UnalignedBuffer::::new(buffered_u64s.remaining()); - for buffered_u32 in &mut buffered_u32s { - let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1); - hash ^= k1; - hash = hash.rotate_left(23); - hash = hash.wrapping_mul(PRIME_2); - hash = hash.wrapping_add(PRIME_3); - } - - let buffered_u8s = buffered_u32s.remaining(); - for &buffered_u8 in buffered_u8s { - let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5); - hash ^= k1; - hash = hash.rotate_left(11); - hash = hash.wrapping_mul(PRIME_1); - } - - // The final intermixing - hash ^= hash >> 33; - hash = hash.wrapping_mul(PRIME_2); - hash ^= hash >> 29; - hash = hash.wrapping_mul(PRIME_3); - hash ^= hash >> 32; - - hash - } - - pub fn seed(&self) -> u64 { - self.seed - } - - pub fn total_len(&self) -> u64 { - self.total_len - } -} - -impl Default for XxHash64 { - fn default() -> XxHash64 { - XxHash64::with_seed(0) - } -} - -impl Hasher for XxHash64 { - fn finish(&self) -> u64 { - XxHash64::finish(self) - } - - fn write(&mut self, bytes: &[u8]) { - XxHash64::write(self, bytes) - } -} - -#[cfg(feature = "std")] -pub use crate::std_support::sixty_four::RandomXxHashBuilder64; - -#[cfg(test)] -mod test { - use super::{RandomXxHashBuilder64, XxHash64}; - use std::collections::HashMap; - use std::hash::BuildHasherDefault; - use std::prelude::v1::*; - - #[test] - fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash64::with_seed(0); - for byte in bytes.chunks(1) { - byte_by_byte.write(byte); - } - - let mut one_chunk = XxHash64::with_seed(0); - one_chunk.write(&bytes); - - assert_eq!(byte_by_byte.core, one_chunk.core); - } - - #[test] - fn hash_of_nothing_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); - hasher.write(&[]); - assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); - } - - #[test] - fn hash_of_single_byte_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); - hasher.write(&[42]); - assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); - } - - #[test] - fn hash_of_multiple_bytes_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0); - hasher.write(b"Hello, world!\0"); - assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); - } - - #[test] - fn hash_of_multiple_chunks_matches_c_implementation() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash64::with_seed(0); - hasher.write(&bytes); - assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); - } - - #[test] - fn hash_with_different_seed_matches_c_implementation() { - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.write(&[]); - assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); - } - - #[test] - fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); - hasher.write(&bytes); - assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); - } - - #[test] - fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - - #[test] - fn can_be_used_in_a_hashmap_with_a_random_seed() { - let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - - #[cfg(feature = "serialize")] - type TestResult = Result>; - - #[cfg(feature = "serialize")] - #[test] - fn test_serialization_cycle() -> TestResult { - let mut hasher = XxHash64::with_seed(0); - hasher.write(b"Hello, world!\0"); - hasher.finish(); - - let serialized = serde_json::to_string(&hasher)?; - let unserialized: XxHash64 = serde_json::from_str(&serialized)?; - assert_eq!(hasher, unserialized); - Ok(()) - } - - #[cfg(feature = "serialize")] - #[test] - fn test_serialization_stability() -> TestResult { - let mut hasher = XxHash64::with_seed(0); - hasher.write(b"Hello, world!\0"); - hasher.finish(); - - let serialized = r#"{ - "total_len": 14, - "seed": 0, - "core": { - "v1": 6983438078262162902, - "v2": 14029467366897019727, - "v3": 0, - "v4": 7046029288634856825 - }, - "buffer": [ - 72, 101, 108, 108, 111, 44, 32, 119, - 111, 114, 108, 100, 33, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - ], - "buffer_usage": 14 - }"#; - - let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); - assert_eq!(hasher, unserialized); - Ok(()) - } -} diff --git a/src/std_support.rs b/src/std_support.rs deleted file mode 100644 index d79085e26..000000000 --- a/src/std_support.rs +++ /dev/null @@ -1,113 +0,0 @@ -pub mod sixty_four { - use crate::XxHash64; - use core::hash::BuildHasher; - use rand::{self, Rng}; - - #[derive(Clone)] - /// Constructs a randomized seed and reuses it for multiple hasher instances. - pub struct RandomXxHashBuilder64(u64); - - impl RandomXxHashBuilder64 { - fn new() -> RandomXxHashBuilder64 { - RandomXxHashBuilder64(rand::thread_rng().gen()) - } - } - - impl Default for RandomXxHashBuilder64 { - fn default() -> RandomXxHashBuilder64 { - RandomXxHashBuilder64::new() - } - } - - impl BuildHasher for RandomXxHashBuilder64 { - type Hasher = XxHash64; - - fn build_hasher(&self) -> XxHash64 { - XxHash64::with_seed(self.0) - } - } -} - -pub mod thirty_two { - use crate::XxHash32; - use core::hash::BuildHasher; - use rand::{self, Rng}; - - #[derive(Clone)] - /// Constructs a randomized seed and reuses it for multiple hasher instances. See the usage warning on `XxHash32`. - pub struct RandomXxHashBuilder32(u32); - - impl RandomXxHashBuilder32 { - fn new() -> RandomXxHashBuilder32 { - RandomXxHashBuilder32(rand::thread_rng().gen()) - } - } - - impl Default for RandomXxHashBuilder32 { - fn default() -> RandomXxHashBuilder32 { - RandomXxHashBuilder32::new() - } - } - - impl BuildHasher for RandomXxHashBuilder32 { - type Hasher = XxHash32; - - fn build_hasher(&self) -> XxHash32 { - XxHash32::with_seed(self.0) - } - } -} - -pub mod xxh3 { - use crate::xxh3::{Hash128, Hash64}; - use core::hash::BuildHasher; - use rand::{self, Rng}; - - #[derive(Clone)] - /// Constructs a randomized seed and reuses it for multiple hasher instances. - pub struct RandomHashBuilder64(u64); - - impl RandomHashBuilder64 { - fn new() -> RandomHashBuilder64 { - RandomHashBuilder64(rand::thread_rng().gen()) - } - } - - impl Default for RandomHashBuilder64 { - fn default() -> RandomHashBuilder64 { - RandomHashBuilder64::new() - } - } - - impl BuildHasher for RandomHashBuilder64 { - type Hasher = Hash64; - - fn build_hasher(&self) -> Hash64 { - Hash64::with_seed(self.0) - } - } - - #[derive(Clone)] - /// Constructs a randomized seed and reuses it for multiple hasher instances. - pub struct RandomHashBuilder128(u64); - - impl RandomHashBuilder128 { - fn new() -> RandomHashBuilder128 { - RandomHashBuilder128(rand::thread_rng().gen()) - } - } - - impl Default for RandomHashBuilder128 { - fn default() -> RandomHashBuilder128 { - RandomHashBuilder128::new() - } - } - - impl BuildHasher for RandomHashBuilder128 { - type Hasher = Hash128; - - fn build_hasher(&self) -> Hash128 { - Hash128::with_seed(self.0) - } - } -} diff --git a/src/thirty_two.rs b/src/thirty_two.rs deleted file mode 100644 index cfa44cdbc..000000000 --- a/src/thirty_two.rs +++ /dev/null @@ -1,416 +0,0 @@ -use crate::UnalignedBuffer; -use core::{cmp, hash::Hasher}; - -#[cfg(feature = "serialize")] -use serde::{Deserialize, Serialize}; - -const CHUNK_SIZE: usize = 16; - -pub const PRIME_1: u32 = 2_654_435_761; -pub const PRIME_2: u32 = 2_246_822_519; -pub const PRIME_3: u32 = 3_266_489_917; -pub const PRIME_4: u32 = 668_265_263; -pub const PRIME_5: u32 = 374_761_393; - -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Copy, Clone, PartialEq)] -struct XxCore { - v1: u32, - v2: u32, - v3: u32, - v4: u32, -} - -/// Calculates the 32-bit hash. Care should be taken when using this -/// hash. -/// -/// Although this struct implements `Hasher`, it only calculates a -/// 32-bit number, leaving the upper bits as 0. This means it is -/// unlikely to be correct to use this in places like a `HashMap`. -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct XxHash32 { - total_len: u64, - seed: u32, - core: XxCore, - #[cfg_attr(feature = "serialize", serde(flatten))] - buffer: Buffer, -} - -impl XxCore { - fn with_seed(seed: u32) -> XxCore { - XxCore { - v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), - v2: seed.wrapping_add(PRIME_2), - v3: seed, - v4: seed.wrapping_sub(PRIME_1), - } - } - - #[inline(always)] - fn ingest_chunks(&mut self, values: I) - where - I: IntoIterator, - { - #[inline(always)] - fn ingest_one_number(mut current_value: u32, mut value: u32) -> u32 { - value = value.wrapping_mul(PRIME_2); - current_value = current_value.wrapping_add(value); - current_value = current_value.rotate_left(13); - current_value.wrapping_mul(PRIME_1) - } - - // By drawing these out, we can avoid going back and forth to - // memory. It only really helps for large files, when we need - // to iterate multiple times here. - - let mut v1 = self.v1; - let mut v2 = self.v2; - let mut v3 = self.v3; - let mut v4 = self.v4; - - for [n1, n2, n3, n4] in values { - v1 = ingest_one_number(v1, n1.to_le()); - v2 = ingest_one_number(v2, n2.to_le()); - v3 = ingest_one_number(v3, n3.to_le()); - v4 = ingest_one_number(v4, n4.to_le()); - } - - self.v1 = v1; - self.v2 = v2; - self.v3 = v3; - self.v4 = v4; - } - - #[inline(always)] - fn finish(&self) -> u32 { - // The original code pulls out local vars for v[1234] - // here. Performance tests did not show that to be effective - // here, presumably because this method is not called in a - // tight loop. - - #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel - let mut hash; - - hash = self.v1.rotate_left(1); - hash = hash.wrapping_add(self.v2.rotate_left(7)); - hash = hash.wrapping_add(self.v3.rotate_left(12)); - hash = hash.wrapping_add(self.v4.rotate_left(18)); - - hash - } -} - -impl core::fmt::Debug for XxCore { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { - write!( - f, - "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", - self.v1, self.v2, self.v3, self.v4 - ) - } -} - -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, Default, PartialEq)] -#[repr(align(4))] -#[cfg_attr(feature = "serialize", serde(transparent))] -struct AlignToU32(T); - -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, Default, PartialEq)] -struct Buffer { - #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] - data: AlignToU32<[u8; CHUNK_SIZE]>, - #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] - len: usize, -} - -impl Buffer { - fn data(&self) -> &[u8] { - &self.data.0[..self.len] - } - - /// Consumes as much of the parameter as it can, returning the unused part. - fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { - let to_use = cmp::min(self.available(), data.len()); - let (data, remaining) = data.split_at(to_use); - self.data.0[self.len..][..to_use].copy_from_slice(data); - self.len += to_use; - remaining - } - - fn set_data(&mut self, data: &[u8]) { - debug_assert!(self.is_empty()); - debug_assert!(data.len() < CHUNK_SIZE); - self.data.0[..data.len()].copy_from_slice(data); - self.len = data.len(); - } - - fn available(&self) -> usize { - CHUNK_SIZE - self.len - } - - fn is_empty(&self) -> bool { - self.len == 0 - } - - fn is_full(&self) -> bool { - self.len == CHUNK_SIZE - } -} - -impl XxHash32 { - /// Constructs the hash with an initial seed - pub fn with_seed(seed: u32) -> XxHash32 { - XxHash32 { - total_len: 0, - seed, - core: XxCore::with_seed(seed), - buffer: Buffer::default(), - } - } - - pub(crate) fn write(&mut self, bytes: &[u8]) { - let remaining = self.maybe_consume_bytes(bytes); - if !remaining.is_empty() { - let mut remaining = UnalignedBuffer::new(remaining); - self.core.ingest_chunks(&mut remaining); - self.buffer.set_data(remaining.remaining()); - } - self.total_len += bytes.len() as u64; - } - - // Consume bytes and try to make `self.buffer` empty. - // If there are not enough bytes, `self.buffer` can be non-empty, and this - // function returns an empty slice. - fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { - if self.buffer.is_empty() { - data - } else { - let data = self.buffer.consume(data); - if self.buffer.is_full() { - let mut u32s = UnalignedBuffer::new(self.buffer.data()); - self.core.ingest_chunks(&mut u32s); - debug_assert!(u32s.remaining().is_empty()); - self.buffer.len = 0; - } - data - } - } - - pub(crate) fn finish(&self) -> u32 { - let mut hash = if self.total_len >= CHUNK_SIZE as u64 { - // We have processed at least one full chunk - self.core.finish() - } else { - self.seed.wrapping_add(PRIME_5) - }; - - hash = hash.wrapping_add(self.total_len as u32); - - let mut buffered_u32s = UnalignedBuffer::::new(self.buffer.data()); - for buffered_u32 in &mut buffered_u32s { - let k1 = buffered_u32.to_le().wrapping_mul(PRIME_3); - hash = hash.wrapping_add(k1); - hash = hash.rotate_left(17); - hash = hash.wrapping_mul(PRIME_4); - } - - let buffered_u8s = buffered_u32s.remaining(); - for &buffered_u8 in buffered_u8s { - let k1 = u32::from(buffered_u8).wrapping_mul(PRIME_5); - hash = hash.wrapping_add(k1); - hash = hash.rotate_left(11); - hash = hash.wrapping_mul(PRIME_1); - } - - // The final intermixing - hash ^= hash >> 15; - hash = hash.wrapping_mul(PRIME_2); - hash ^= hash >> 13; - hash = hash.wrapping_mul(PRIME_3); - hash ^= hash >> 16; - - hash - } - - pub fn seed(&self) -> u32 { - self.seed - } - - /// Get the total number of bytes hashed, truncated to 32 bits. - /// For the full 64-bit byte count, use `total_len_64` - pub fn total_len(&self) -> u32 { - self.total_len as u32 - } - - /// Get the total number of bytes hashed. - pub fn total_len_64(&self) -> u64 { - self.total_len - } -} - -impl Default for XxHash32 { - fn default() -> XxHash32 { - XxHash32::with_seed(0) - } -} - -impl Hasher for XxHash32 { - fn finish(&self) -> u64 { - u64::from(XxHash32::finish(self)) - } - - fn write(&mut self, bytes: &[u8]) { - XxHash32::write(self, bytes) - } -} - -#[cfg(feature = "std")] -pub use crate::std_support::thirty_two::RandomXxHashBuilder32; - -#[cfg(test)] -mod test { - use super::{RandomXxHashBuilder32, XxHash32}; - use std::collections::HashMap; - use std::hash::BuildHasherDefault; - use std::prelude::v1::*; - - #[test] - fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { - let bytes: Vec<_> = (0..32).map(|_| 0).collect(); - - let mut byte_by_byte = XxHash32::with_seed(0); - for byte in bytes.chunks(1) { - byte_by_byte.write(byte); - } - - let mut one_chunk = XxHash32::with_seed(0); - one_chunk.write(&bytes); - - assert_eq!(byte_by_byte.core, one_chunk.core); - } - - #[test] - fn hash_of_nothing_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0); - hasher.write(&[]); - assert_eq!(hasher.finish(), 0x02cc_5d05); - } - - #[test] - fn hash_of_single_byte_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0); - hasher.write(&[42]); - assert_eq!(hasher.finish(), 0xe0fe_705f); - } - - #[test] - fn hash_of_multiple_bytes_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0); - hasher.write(b"Hello, world!\0"); - assert_eq!(hasher.finish(), 0x9e5e_7e93); - } - - #[test] - fn hash_of_multiple_chunks_matches_c_implementation() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash32::with_seed(0); - hasher.write(&bytes); - assert_eq!(hasher.finish(), 0x7f89_ba44); - } - - #[test] - fn hash_with_different_seed_matches_c_implementation() { - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.write(&[]); - assert_eq!(hasher.finish(), 0xd6bf_8459); - } - - #[test] - fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { - let bytes: Vec<_> = (0..100).collect(); - let mut hasher = XxHash32::with_seed(0x42c9_1977); - hasher.write(&bytes); - assert_eq!(hasher.finish(), 0x6d2f_6c17); - } - - #[test] - fn can_be_used_in_a_hashmap_with_a_default_seed() { - let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - - #[test] - fn can_be_used_in_a_hashmap_with_a_random_seed() { - let mut hash: HashMap<_, _, RandomXxHashBuilder32> = Default::default(); - hash.insert(42, "the answer"); - assert_eq!(hash.get(&42), Some(&"the answer")); - } - - #[cfg(feature = "serialize")] - type TestResult = Result>; - - #[cfg(feature = "serialize")] - #[test] - fn test_serialization_cycle() -> TestResult { - let mut hasher = XxHash32::with_seed(0); - hasher.write(b"Hello, world!\0"); - hasher.finish(); - - let serialized = serde_json::to_string(&hasher)?; - let unserialized: XxHash32 = serde_json::from_str(&serialized)?; - assert_eq!(hasher, unserialized); - Ok(()) - } - - #[cfg(feature = "serialize")] - #[test] - fn test_serialization_stability() -> TestResult { - let mut hasher = XxHash32::with_seed(0); - hasher.write(b"Hello, world!\0"); - hasher.finish(); - - let serialized = r#"{ - "total_len": 14, - "seed": 0, - "core": { - "v1": 606290984, - "v2": 2246822519, - "v3": 0, - "v4": 1640531535 - }, - "buffer": [ - 72, 101, 108, 108, 111, 44, 32, 119, - 111, 114, 108, 100, 33, 0, 0, 0 - ], - "buffer_usage": 14 - }"#; - - let unserialized: XxHash32 = serde_json::from_str(serialized).unwrap(); - assert_eq!(hasher, unserialized); - Ok(()) - } - - // This test validates wraparound/truncation behavior for very large inputs - // of a 32-bit hash, but runs very slowly in the normal "cargo test" - // build config since it hashes 4.3GB of data. It runs reasonably quick - // under "cargo test --release". - /* - #[test] - fn len_overflow_32bit() { - // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32. - let bytes200: Vec = (0..200).collect(); - let mut hasher = XxHash32::with_seed(0); - for _ in 0..(4_300_000_000u64 / 200u64) { - hasher.write(&bytes200); - } - assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00); - assert_eq!(hasher.total_len(), 0x004c_cb00); - // retult is tested against the C implementation - assert_eq!(hasher.finish(), 0x1522_4ca7); - } - */ -} diff --git a/src/xxh3.rs b/src/xxh3.rs deleted file mode 100644 index 0ffc54189..000000000 --- a/src/xxh3.rs +++ /dev/null @@ -1,1666 +0,0 @@ -//! The in-progress XXH3 algorithm. -//! -//! Please read [the notes in original implementation][warning] to -//! learn about when to use these algorithms. Specifically, the -//! version of code this crate reproduces says: -//! -//! > The algorithm is currently in development, meaning its return -//! values might still change in future versions. However, the API -//! is stable, and can be used in production, typically for -//! generation of ephemeral hashes (produced and consumed in same -//! session). -//! -//! [warning]: https://github.com/Cyan4973/xxHash#new-hash-algorithms - -use alloc::vec::Vec; - -use core::convert::TryInto; -use core::hash::Hasher; -use core::mem; -use core::ops::{Deref, DerefMut}; -use core::slice; - -#[cfg(target_arch = "x86")] -use core::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64::*; - -use cfg_if::cfg_if; -use static_assertions::{const_assert, const_assert_eq}; - -#[cfg(feature = "serialize")] -use serde::{Deserialize, Serialize}; - -use crate::sixty_four::{ - PRIME_1 as PRIME64_1, PRIME_2 as PRIME64_2, PRIME_3 as PRIME64_3, PRIME_4 as PRIME64_4, - PRIME_5 as PRIME64_5, -}; -use crate::thirty_two::{PRIME_1 as PRIME32_1, PRIME_2 as PRIME32_2, PRIME_3 as PRIME32_3}; - -#[cfg(feature = "std")] -pub use crate::std_support::xxh3::{RandomHashBuilder128, RandomHashBuilder64}; - -#[inline(always)] -pub fn hash64(data: &[u8]) -> u64 { - hash64_with_seed(data, 0) -} - -#[inline(always)] -pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 { - let len = data.len(); - - if len <= 16 { - hash_len_0to16_64bits(data, len, &SECRET, seed) - } else if len <= 128 { - hash_len_17to128_64bits(data, len, &SECRET, seed) - } else if len <= MIDSIZE_MAX { - hash_len_129to240_64bits(data, len, &SECRET, seed) - } else { - hash_long_64bits_with_seed(data, len, seed) - } -} - -#[inline(always)] -pub fn hash64_with_secret(data: &[u8], secret: &[u8]) -> u64 { - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let len = data.len(); - - if len <= 16 { - hash_len_0to16_64bits(data, len, secret, 0) - } else if len <= 128 { - hash_len_17to128_64bits(data, len, secret, 0) - } else if len <= MIDSIZE_MAX { - hash_len_129to240_64bits(data, len, secret, 0) - } else { - hash_long_64bits_with_secret(data, len, secret) - } -} - -#[inline(always)] -pub fn hash128(data: &[u8]) -> u128 { - hash128_with_seed(data, 0) -} - -#[inline(always)] -pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 { - let len = data.len(); - - if len <= 16 { - hash_len_0to16_128bits(data, len, &SECRET, seed) - } else if len <= 128 { - hash_len_17to128_128bits(data, len, &SECRET, seed) - } else if len <= MIDSIZE_MAX { - hash_len_129to240_128bits(data, len, &SECRET, seed) - } else { - hash_long_128bits_with_seed(data, len, seed) - } -} - -#[inline(always)] -pub fn hash128_with_secret(data: &[u8], secret: &[u8]) -> u128 { - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let len = data.len(); - - if len <= 16 { - hash_len_0to16_128bits(data, len, secret, 0) - } else if len <= 128 { - hash_len_17to128_128bits(data, len, secret, 0) - } else if len <= MIDSIZE_MAX { - hash_len_129to240_128bits(data, len, secret, 0) - } else { - hash_long_128bits_with_secret(data, len, secret) - } -} - -/// Calculates the 64-bit hash. -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Clone, Default)] -pub struct Hash64(State); - -impl Hash64 { - pub fn with_seed(seed: u64) -> Self { - Self(State::with_seed(seed)) - } - - pub fn with_secret>>(secret: S) -> Self { - Self(State::with_secret(secret)) - } -} - -impl Hasher for Hash64 { - #[inline(always)] - fn finish(&self) -> u64 { - self.0.digest64() - } - - #[inline(always)] - fn write(&mut self, bytes: &[u8]) { - self.0.update(bytes, AccWidth::Acc64Bits) - } -} - -/// Calculates the 128-bit hash. -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Clone, Default)] -pub struct Hash128(State); - -impl Hash128 { - pub fn with_seed(seed: u64) -> Self { - Self(State::with_seed(seed)) - } - - pub fn with_secret>>(secret: S) -> Self { - Self(State::with_secret(secret)) - } -} - -impl Hasher for Hash128 { - #[inline(always)] - fn finish(&self) -> u64 { - self.0.digest128() as u64 - } - - #[inline(always)] - fn write(&mut self, bytes: &[u8]) { - self.0.update(bytes, AccWidth::Acc128Bits) - } -} - -pub trait HasherExt: Hasher { - fn finish_ext(&self) -> u128; -} - -impl HasherExt for Hash128 { - #[inline(always)] - fn finish_ext(&self) -> u128 { - self.0.digest128() - } -} - -/* ========================================== - * XXH3 default settings - * ========================================== */ - -const SECRET_DEFAULT_SIZE: usize = 192; -const SECRET_SIZE_MIN: usize = 136; - -const SECRET: Secret = Secret([ - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, -]); - -#[repr(align(64))] -#[derive(Clone)] -struct Secret([u8; SECRET_DEFAULT_SIZE]); - -const_assert_eq!(mem::size_of::() % 16, 0); - -impl Default for Secret { - #[inline(always)] - fn default() -> Self { - SECRET - } -} - -impl Deref for Secret { - type Target = [u8]; - - #[inline(always)] - fn deref(&self) -> &Self::Target { - &self.0[..] - } -} - -cfg_if! { - if #[cfg(feature = "serialize")] { - impl Serialize for Secret { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serializer.serialize_bytes(self) - } - } - - impl<'de> Deserialize<'de> for Secret { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - deserializer.deserialize_bytes(SecretVisitor) - } - } - - struct SecretVisitor; - - impl<'de> serde::de::Visitor<'de> for SecretVisitor { - type Value = Secret; - - fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result { - formatter.write_str("secret with a bytes array") - } - - fn visit_bytes(self, v: &[u8]) -> Result - where - E: serde::de::Error, - { - if v.len() == SECRET_DEFAULT_SIZE { - let mut secret = [0; SECRET_DEFAULT_SIZE]; - - secret.copy_from_slice(v); - - Ok(Secret(secret)) - } else { - Err(E::custom("incomplete secret data")) - } - } - } - } -} - -impl Secret { - #[inline(always)] - pub fn with_seed(seed: u64) -> Self { - let mut secret = [0; SECRET_DEFAULT_SIZE]; - - for off in (0..SECRET_DEFAULT_SIZE).step_by(16) { - secret[off..].write_u64_le(SECRET[off..].read_u64_le().wrapping_add(seed)); - secret[off + 8..].write_u64_le(SECRET[off + 8..].read_u64_le().wrapping_sub(seed)); - } - - Secret(secret) - } -} - -cfg_if! { - if #[cfg(target_feature = "avx2")] { - #[repr(align(32))] - #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] - #[derive(Clone)] - struct Acc([u64; ACC_NB]); - } else if #[cfg(target_feature = "sse2")] { - #[repr(align(16))] - #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] - #[derive(Clone)] - struct Acc([u64; ACC_NB]); - } else { - #[repr(align(8))] - #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] - #[derive(Clone)] - struct Acc([u64; ACC_NB]); - } -} - -const ACC_SIZE: usize = mem::size_of::(); - -const_assert_eq!(ACC_SIZE, 64); - -impl Default for Acc { - #[inline(always)] - fn default() -> Self { - Acc([ - u64::from(PRIME32_3), - PRIME64_1, - PRIME64_2, - PRIME64_3, - PRIME64_4, - u64::from(PRIME32_2), - PRIME64_5, - u64::from(PRIME32_1), - ]) - } -} - -impl Deref for Acc { - type Target = [u64]; - - #[inline(always)] - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for Acc { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -trait Buf { - fn read_u32_le(&self) -> u32; - - fn read_u64_le(&self) -> u64; -} - -trait BufMut { - fn write_u32_le(&mut self, n: u32); - - fn write_u64_le(&mut self, n: u64); -} - -impl Buf for [u8] { - #[inline(always)] - fn read_u32_le(&self) -> u32 { - let buf = &self[..mem::size_of::()]; - u32::from_le_bytes(buf.try_into().unwrap()) - } - - #[inline(always)] - fn read_u64_le(&self) -> u64 { - let buf = &self[..mem::size_of::()]; - u64::from_le_bytes(buf.try_into().unwrap()) - } -} - -impl BufMut for [u8] { - #[inline(always)] - fn write_u32_le(&mut self, n: u32) { - self[..mem::size_of::()].copy_from_slice(&n.to_le_bytes()[..]); - } - - #[inline(always)] - fn write_u64_le(&mut self, n: u64) { - self[..mem::size_of::()].copy_from_slice(&n.to_le_bytes()[..]); - } -} - -/* ========================================== - * Short keys - * ========================================== */ - -#[inline(always)] -fn hash_len_0to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { - debug_assert!(len <= 16); - - if len > 8 { - hash_len_9to16_64bits(data, len, key, seed) - } else if len >= 4 { - hash_len_4to8_64bits(data, len, key, seed) - } else if len > 0 { - hash_len_1to3_64bits(data, len, key, seed) - } else { - 0 - } -} - -#[inline(always)] -fn hash_len_9to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { - debug_assert!((9..=16).contains(&len)); - - let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); - let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); - let acc = (len as u64) - .wrapping_add(ll1) - .wrapping_add(ll2) - .wrapping_add(mul128_fold64(ll1, ll2)); - - avalanche(acc) -} - -#[inline(always)] -fn hash_len_4to8_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { - debug_assert!((4..=8).contains(&len)); - - let in1 = u64::from(data.read_u32_le()); - let in2 = u64::from(data[len - 4..].read_u32_le()); - let in64 = in1.wrapping_add(in2 << 32); - let keyed = in64 ^ key.read_u64_le().wrapping_add(seed); - let mix64 = - (len as u64).wrapping_add((keyed ^ (keyed >> 51)).wrapping_mul(u64::from(PRIME32_1))); - - avalanche((mix64 ^ (mix64 >> 47)).wrapping_mul(PRIME64_2)) -} - -#[inline(always)] -fn hash_len_1to3_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { - debug_assert!((1..=3).contains(&len)); - - let c1 = u32::from(data[0]); - let c2 = u32::from(data[len >> 1]); - let c3 = u32::from(data[len - 1]); - let combined = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); - let keyed = u64::from(combined) ^ u64::from(key.read_u32_le()).wrapping_add(seed); - let mixed = keyed.wrapping_mul(PRIME64_1); - - avalanche(mixed) -} - -#[inline(always)] -fn hash_len_17to128_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { - debug_assert!((17..=128).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let mut acc = PRIME64_1.wrapping_mul(len as u64); - - if len > 32 { - if len > 64 { - if len > 96 { - acc = acc - .wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)) - .wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); - } - acc = acc - .wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)) - .wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); - } - - acc = acc - .wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)) - .wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); - } - - acc = acc - .wrapping_add(mix_16bytes(data, secret, seed)) - .wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); - - avalanche(acc) -} - -const MIDSIZE_MAX: usize = 240; -const MIDSIZE_STARTOFFSET: usize = 3; -const MIDSIZE_LASTOFFSET: usize = 17; - -#[inline(always)] -fn hash_len_129to240_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { - debug_assert!((129..=MIDSIZE_MAX).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let acc = (len as u64).wrapping_mul(PRIME64_1); - let acc = (0..8).fold(acc, |acc, i| { - acc.wrapping_add(mix_16bytes(&data[16 * i..], &secret[16 * i..], seed)) - }); - let acc = avalanche(acc); - - let nb_rounds = len / 16; - debug_assert!(nb_rounds >= 8); - - let acc = (8..nb_rounds).fold(acc, |acc, i| { - acc.wrapping_add(mix_16bytes( - &data[16 * i..], - &secret[16 * (i - 8) + MIDSIZE_STARTOFFSET..], - seed, - )) - }); - - avalanche(acc.wrapping_add(mix_16bytes( - &data[len - 16..], - &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], - seed, - ))) -} - -/* ========================================== - * Long keys - * ========================================== */ - -const STRIPE_LEN: usize = 64; -const SECRET_CONSUME_RATE: usize = 8; // nb of secret bytes consumed at each accumulation -const SECRET_MERGEACCS_START: usize = 11; // do not align on 8, so that secret is different from accumulator -const SECRET_LASTACC_START: usize = 7; // do not align on 8, so that secret is different from scrambler -const ACC_NB: usize = STRIPE_LEN / mem::size_of::(); - -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) enum AccWidth { - Acc64Bits, - Acc128Bits, -} - -#[inline(always)] -fn hash_long_64bits_with_default_secret(data: &[u8], len: usize) -> u64 { - hash_long_internal(data, len, &SECRET) -} - -#[inline(always)] -fn hash_long_64bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u64 { - hash_long_internal(data, len, secret) -} - -/// Generate a custom key, based on alteration of default kSecret with the seed, -/// and then use this key for long mode hashing. -/// -/// This operation is decently fast but nonetheless costs a little bit of time. -/// Try to avoid it whenever possible (typically when `seed.is_none()`). -#[inline(always)] -fn hash_long_64bits_with_seed(data: &[u8], len: usize, seed: u64) -> u64 { - if seed == 0 { - hash_long_64bits_with_default_secret(data, len) - } else { - let secret = Secret::with_seed(seed); - - hash_long_internal(data, len, &secret) - } -} - -#[inline(always)] -fn hash_long_internal(data: &[u8], len: usize, secret: &[u8]) -> u64 { - let mut acc = Acc::default(); - - hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc64Bits); - - merge_accs( - &acc, - &secret[SECRET_MERGEACCS_START..], - (len as u64).wrapping_mul(PRIME64_1), - ) -} - -#[inline(always)] -fn hash_long_internal_loop( - acc: &mut [u64], - data: &[u8], - len: usize, - secret: &[u8], - acc_width: AccWidth, -) { - let secret_len = secret.len(); - let nb_rounds = (secret_len - STRIPE_LEN) / SECRET_CONSUME_RATE; - let block_len = STRIPE_LEN * nb_rounds; - - debug_assert!(secret_len >= SECRET_SIZE_MIN); - - let mut chunks = data.chunks_exact(block_len); - - for chunk in &mut chunks { - accumulate(acc, chunk, secret, nb_rounds, acc_width); - unsafe { - scramble_acc(acc, &secret[secret_len - STRIPE_LEN..]); - } - } - - /* last partial block */ - debug_assert!(len > STRIPE_LEN); - - let nb_stripes = (len % block_len) / STRIPE_LEN; - - debug_assert!(nb_stripes < (secret_len / SECRET_CONSUME_RATE)); - - accumulate(acc, chunks.remainder(), secret, nb_stripes, acc_width); - - /* last stripe */ - if (len & (STRIPE_LEN - 1)) != 0 { - unsafe { - accumulate512( - acc, - &data[len - STRIPE_LEN..], - &secret[secret_len - STRIPE_LEN - SECRET_LASTACC_START..], - acc_width, - ); - } - } -} - -#[inline(always)] -fn accumulate(acc: &mut [u64], data: &[u8], secret: &[u8], nb_stripes: usize, acc_width: AccWidth) { - for n in 0..nb_stripes { - unsafe { - accumulate512( - acc, - &data[n * STRIPE_LEN..], - &secret[n * SECRET_CONSUME_RATE..], - acc_width, - ); - } - } -} - -#[inline(always)] -const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 { - ((z << 6) | (y << 4) | (x << 2) | w) as i32 -} - -#[cfg(target_feature = "avx2")] -mod avx2 { - use super::*; - - #[target_feature(enable = "avx2")] - pub(crate) unsafe fn accumulate512( - acc: &mut [u64], - data: &[u8], - keys: &[u8], - acc_width: AccWidth, - ) { - let xacc = acc.as_mut_ptr() as *mut __m256i; - let xdata = data.as_ptr() as *const __m256i; - let xkey = keys.as_ptr() as *const __m256i; - - for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { - let d = _mm256_loadu_si256(xdata.add(i)); - let k = _mm256_loadu_si256(xkey.add(i)); - let dk = _mm256_xor_si256(d, k); // uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} - let mul = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} - - xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { - let dswap = _mm256_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); - let add = _mm256_add_epi64(xacc.add(i).read(), dswap); - _mm256_add_epi64(mul, add) - } else { - let add = _mm256_add_epi64(xacc.add(i).read(), d); - _mm256_add_epi64(mul, add) - }) - } - } - - #[target_feature(enable = "avx2")] - pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { - let xacc = acc.as_mut_ptr() as *mut __m256i; - let xkey = key.as_ptr() as *const __m256i; - let prime32 = _mm256_set1_epi32(PRIME32_1 as i32); - - for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { - let data = xacc.add(i).read(); - let shifted = _mm256_srli_epi64(data, 47); - let data = _mm256_xor_si256(data, shifted); - - let k = _mm256_loadu_si256(xkey.add(i)); - let dk = _mm256_xor_si256(data, k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ - let dk1 = _mm256_mul_epu32(dk, prime32); - - let d2 = _mm256_shuffle_epi32(dk, 0x31); - let dk2 = _mm256_mul_epu32(d2, prime32); - let dk2h = _mm256_slli_epi64(dk2, 32); - - xacc.add(i).write(_mm256_add_epi64(dk1, dk2h)); - } - } -} - -#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))] -mod sse2 { - use super::*; - - #[target_feature(enable = "sse2")] - #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn accumulate512( - acc: &mut [u64], - data: &[u8], - keys: &[u8], - acc_width: AccWidth, - ) { - let xacc = acc.as_mut_ptr() as *mut __m128i; - let xdata = data.as_ptr() as *const __m128i; - let xkey = keys.as_ptr() as *const __m128i; - - for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { - let d = _mm_loadu_si128(xdata.add(i)); - let k = _mm_loadu_si128(xkey.add(i)); - let dk = _mm_xor_si128(d, k); // uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ - let mul = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */ - xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { - let dswap = _mm_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); - let add = _mm_add_epi64(xacc.add(i).read(), dswap); - _mm_add_epi64(mul, add) - } else { - let add = _mm_add_epi64(xacc.add(i).read(), d); - _mm_add_epi64(mul, add) - }) - } - } - - #[target_feature(enable = "sse2")] - #[allow(clippy::cast_ptr_alignment)] - pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { - let xacc = acc.as_mut_ptr() as *mut __m128i; - let xkey = key.as_ptr() as *const __m128i; - let prime32 = _mm_set1_epi32(PRIME32_1 as i32); - - for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { - let data = xacc.add(i).read(); - let shifted = _mm_srli_epi64(data, 47); - let data = _mm_xor_si128(data, shifted); - - let k = _mm_loadu_si128(xkey.add(i)); - let dk = _mm_xor_si128(data, k); - - let dk1 = _mm_mul_epu32(dk, prime32); - - let d2 = _mm_shuffle_epi32(dk, 0x31); - let dk2 = _mm_mul_epu32(d2, prime32); - let dk2h = _mm_slli_epi64(dk2, 32); - - xacc.add(i).write(_mm_add_epi64(dk1, dk2h)); - } - } -} - -#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))] -mod generic { - use super::*; - - #[inline(always)] - pub(crate) unsafe fn accumulate512( - acc: &mut [u64], - data: &[u8], - key: &[u8], - acc_width: AccWidth, - ) { - for i in (0..ACC_NB).step_by(2) { - let in1 = data[8 * i..].read_u64_le(); - let in2 = data[8 * (i + 1)..].read_u64_le(); - let key1 = key[8 * i..].read_u64_le(); - let key2 = key[8 * (i + 1)..].read_u64_le(); - let data_key1 = key1 ^ in1; - let data_key2 = key2 ^ in2; - acc[i] = acc[i].wrapping_add(mul32_to64(data_key1, data_key1 >> 32)); - acc[i + 1] = acc[i + 1].wrapping_add(mul32_to64(data_key2, data_key2 >> 32)); - - if acc_width == AccWidth::Acc128Bits { - acc[i] = acc[i].wrapping_add(in2); - acc[i + 1] = acc[i + 1].wrapping_add(in1); - } else { - acc[i] = acc[i].wrapping_add(in1); - acc[i + 1] = acc[i + 1].wrapping_add(in2); - } - } - } - - #[inline(always)] - fn mul32_to64(a: u64, b: u64) -> u64 { - (a & 0xFFFFFFFF).wrapping_mul(b & 0xFFFFFFFF) - } - - #[inline(always)] - pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { - for i in 0..ACC_NB { - let key64 = key[8 * i..].read_u64_le(); - let mut acc64 = acc[i]; - acc64 ^= acc64 >> 47; - acc64 ^= key64; - acc64 = acc64.wrapping_mul(u64::from(PRIME32_1)); - acc[i] = acc64; - } - } -} - -cfg_if! { - if #[cfg(target_feature = "avx2")] { - use avx2::{accumulate512, scramble_acc}; - } else if #[cfg(target_feature = "sse2")] { - use sse2::{accumulate512, scramble_acc}; - } else { - use generic::{accumulate512, scramble_acc}; - } -} - -#[inline(always)] -fn merge_accs(acc: &[u64], secret: &[u8], start: u64) -> u64 { - avalanche( - start - .wrapping_add(mix2accs(acc, secret)) - .wrapping_add(mix2accs(&acc[2..], &secret[16..])) - .wrapping_add(mix2accs(&acc[4..], &secret[32..])) - .wrapping_add(mix2accs(&acc[6..], &secret[48..])), - ) -} - -#[inline(always)] -fn mix2accs(acc: &[u64], secret: &[u8]) -> u64 { - mul128_fold64( - acc[0] ^ secret.read_u64_le(), - acc[1] ^ secret[8..].read_u64_le(), - ) -} - -#[inline(always)] -fn mix_16bytes(data: &[u8], key: &[u8], seed: u64) -> u64 { - let ll1 = data.read_u64_le(); - let ll2 = data[8..].read_u64_le(); - - mul128_fold64( - ll1 ^ key.read_u64_le().wrapping_add(seed), - ll2 ^ key[8..].read_u64_le().wrapping_sub(seed), - ) -} - -#[inline(always)] -fn mul128_fold64(ll1: u64, ll2: u64) -> u64 { - let lll = u128::from(ll1).wrapping_mul(u128::from(ll2)); - - (lll as u64) ^ ((lll >> 64) as u64) -} - -#[inline(always)] -fn avalanche(mut h64: u64) -> u64 { - h64 ^= h64 >> 37; - h64 = h64.wrapping_mul(PRIME64_3); - h64 ^ (h64 >> 32) -} - -/* === XXH3 streaming === */ - -const INTERNAL_BUFFER_SIZE: usize = 256; -const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN; - -const_assert!(INTERNAL_BUFFER_SIZE >= MIDSIZE_MAX); -const_assert_eq!(INTERNAL_BUFFER_SIZE % STRIPE_LEN, 0); - -#[repr(align(64))] -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Clone)] -struct State { - acc: Acc, - secret: With, - buf: Vec, - seed: u64, - total_len: usize, - nb_stripes_so_far: usize, -} - -#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] -#[derive(Clone)] -enum With { - Default(Secret), - Custom(Secret), - Ref(Vec), -} - -impl Deref for With { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - match self { - With::Default(secret) | With::Custom(secret) => &secret.0[..], - With::Ref(secret) => secret, - } - } -} - -impl Default for State { - fn default() -> Self { - Self::new(0, With::Default(Secret::default())) - } -} - -impl State { - fn new(seed: u64, secret: With) -> Self { - State { - acc: Acc::default(), - secret, - buf: Vec::with_capacity(INTERNAL_BUFFER_SIZE), - seed, - total_len: 0, - nb_stripes_so_far: 0, - } - } - - fn with_seed(seed: u64) -> Self { - Self::new(seed, With::Custom(Secret::with_seed(seed))) - } - - fn with_secret>>(secret: S) -> State { - let secret = secret.into(); - - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - Self::new(0, With::Ref(secret)) - } - - #[inline(always)] - fn secret_limit(&self) -> usize { - self.secret.len() - STRIPE_LEN - } - - #[inline(always)] - fn nb_stripes_per_block(&self) -> usize { - self.secret_limit() / SECRET_CONSUME_RATE - } - - #[inline(always)] - fn update(&mut self, mut input: &[u8], acc_width: AccWidth) { - let len = input.len(); - - if len == 0 { - return; - } - - self.total_len += len; - - if self.buf.len() + len <= self.buf.capacity() { - self.buf.extend_from_slice(input); - return; - } - - let nb_stripes_per_block = self.nb_stripes_per_block(); - let secret_limit = self.secret_limit(); - - if !self.buf.is_empty() { - // some data within internal buffer: fill then consume it - let (load, rest) = input.split_at(self.buf.capacity() - self.buf.len()); - self.buf.extend_from_slice(load); - input = rest; - self.nb_stripes_so_far = consume_stripes( - &mut self.acc, - self.nb_stripes_so_far, - nb_stripes_per_block, - &self.buf, - INTERNAL_BUFFER_STRIPES, - &self.secret, - secret_limit, - acc_width, - ); - self.buf.clear(); - } - - // consume input by full buffer quantities - let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE); - - for chunk in &mut chunks { - self.nb_stripes_so_far = consume_stripes( - &mut self.acc, - self.nb_stripes_so_far, - nb_stripes_per_block, - chunk, - INTERNAL_BUFFER_STRIPES, - &self.secret, - secret_limit, - acc_width, - ); - } - - // some remaining input data : buffer it - self.buf.extend_from_slice(chunks.remainder()) - } - - #[inline(always)] - fn digest_long(&self, acc_width: AccWidth) -> Acc { - let mut acc = self.acc.clone(); - let secret_limit = self.secret_limit(); - - if self.buf.len() >= STRIPE_LEN { - // digest locally, state remains unaltered, and can continue ingesting more data afterwards - let total_nb_stripes = self.buf.len() / STRIPE_LEN; - let _nb_stripes_so_far = consume_stripes( - &mut acc, - self.nb_stripes_so_far, - self.nb_stripes_per_block(), - &self.buf, - total_nb_stripes, - &self.secret, - secret_limit, - acc_width, - ); - if (self.buf.len() % STRIPE_LEN) != 0 { - unsafe { - accumulate512( - &mut acc, - &self.buf[self.buf.len() - STRIPE_LEN..], - &self.secret[secret_limit - SECRET_LASTACC_START..], - acc_width, - ); - } - } - } else if !self.buf.is_empty() { - // one last stripe - let mut last_stripe = [0u8; STRIPE_LEN]; - let catchup_size = STRIPE_LEN - self.buf.len(); - - last_stripe[..catchup_size].copy_from_slice(unsafe { - slice::from_raw_parts( - self.buf.as_ptr().add(self.buf.capacity() - catchup_size), - catchup_size, - ) - }); - last_stripe[catchup_size..].copy_from_slice(&self.buf); - - unsafe { - accumulate512( - &mut acc, - &last_stripe[..], - &self.secret[secret_limit - SECRET_LASTACC_START..], - acc_width, - ); - } - } - - acc - } - - #[inline(always)] - fn digest64(&self) -> u64 { - if self.total_len > MIDSIZE_MAX { - let acc = self.digest_long(AccWidth::Acc64Bits); - - merge_accs( - &acc, - &self.secret[SECRET_MERGEACCS_START..], - (self.total_len as u64).wrapping_mul(PRIME64_1), - ) - } else if self.seed != 0 { - hash64_with_seed(&self.buf, self.seed) - } else { - hash64_with_secret(&self.buf, &self.secret[..self.secret_limit() + STRIPE_LEN]) - } - } - - #[inline(always)] - fn digest128(&self) -> u128 { - let secret_limit = self.secret_limit(); - - if self.total_len > MIDSIZE_MAX { - let acc = self.digest_long(AccWidth::Acc128Bits); - - debug_assert!(secret_limit + STRIPE_LEN >= ACC_SIZE + SECRET_MERGEACCS_START); - - let total_len = self.total_len as u64; - - let low64 = merge_accs( - &acc, - &self.secret[SECRET_MERGEACCS_START..], - total_len.wrapping_mul(PRIME64_1), - ); - let high64 = merge_accs( - &acc, - &self.secret[secret_limit + STRIPE_LEN - ACC_SIZE - SECRET_MERGEACCS_START..], - !total_len.wrapping_mul(PRIME64_2), - ); - - u128::from(low64) + (u128::from(high64) << 64) - } else if self.seed != 0 { - hash128_with_seed(&self.buf, self.seed) - } else { - hash128_with_secret(&self.buf, &self.secret[..secret_limit + STRIPE_LEN]) - } - } -} - -#[inline(always)] -#[allow(clippy::too_many_arguments)] -fn consume_stripes( - acc: &mut [u64], - nb_stripes_so_far: usize, - nb_stripes_per_block: usize, - data: &[u8], - total_stripes: usize, - secret: &[u8], - secret_limit: usize, - acc_width: AccWidth, -) -> usize { - debug_assert!(nb_stripes_so_far < nb_stripes_per_block); - - if nb_stripes_per_block - nb_stripes_so_far <= total_stripes { - let nb_stripes = nb_stripes_per_block - nb_stripes_so_far; - - accumulate( - acc, - data, - &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], - nb_stripes, - acc_width, - ); - unsafe { - scramble_acc(acc, &secret[secret_limit..]); - } - accumulate( - acc, - &data[nb_stripes * STRIPE_LEN..], - secret, - total_stripes - nb_stripes, - acc_width, - ); - - total_stripes - nb_stripes - } else { - accumulate( - acc, - data, - &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], - total_stripes, - acc_width, - ); - - nb_stripes_so_far + total_stripes - } -} - -/* ========================================== - * XXH3 128 bits (=> XXH128) - * ========================================== */ - -#[inline(always)] -fn hash_len_0to16_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { - debug_assert!(len <= 16); - - if len > 8 { - hash_len_9to16_128bits(data, len, secret, seed) - } else if len >= 4 { - hash_len_4to8_128bits(data, len, secret, seed) - } else if len > 0 { - hash_len_1to3_128bits(data, len, secret, seed) - } else { - 0 - } -} - -#[inline(always)] -fn hash_len_1to3_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { - debug_assert!((1..=3).contains(&len)); - - let c1 = u32::from(data[0]); - let c2 = u32::from(data[len >> 1]); - let c3 = u32::from(data[len - 1]); - let combinedl = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); - let combinedh = combinedl.swap_bytes(); - let keyedl = u64::from(combinedl) ^ u64::from(key.read_u32_le()).wrapping_add(seed); - let keyedh = u64::from(combinedh) ^ u64::from(key[4..].read_u32_le()).wrapping_sub(seed); - let mixedl = keyedl.wrapping_mul(PRIME64_1); - let mixedh = keyedh.wrapping_mul(PRIME64_2); - - u128::from(avalanche(mixedl)) + (u128::from(avalanche(mixedh)) << 64) -} - -#[inline(always)] -fn hash_len_4to8_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { - debug_assert!((4..=8).contains(&len)); - - let in1 = u64::from(data.read_u32_le()); - let in2 = u64::from(data[len - 4..].read_u32_le()); - let in64l = in1.wrapping_add(in2 << 32); - let in64h = in64l.swap_bytes(); - let keyedl = in64l ^ key.read_u64_le().wrapping_add(seed); - let keyedh = in64h ^ key[8..].read_u64_le().wrapping_sub(seed); - let mix64l1 = - (len as u64).wrapping_add((keyedl ^ (keyedl >> 51)).wrapping_mul(u64::from(PRIME32_1))); - let mix64l2 = (mix64l1 ^ (mix64l1 >> 47)).wrapping_mul(PRIME64_2); - let mix64h1 = (keyedh ^ (keyedh >> 47)) - .wrapping_mul(PRIME64_1) - .wrapping_sub(len as u64); - let mix64h2 = (mix64h1 ^ (mix64h1 >> 43)).wrapping_mul(PRIME64_4); - - u128::from(avalanche(mix64l2)) + (u128::from(avalanche(mix64h2)) << 64) -} - -#[inline(always)] -fn hash_len_9to16_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { - debug_assert!((9..=16).contains(&len)); - - let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); - let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); - let inlow = ll1 ^ ll2; - - let m128 = u128::from(inlow).wrapping_mul(u128::from(PRIME64_1)); - let high64 = ((m128 >> 64) as u64).wrapping_add(ll2.wrapping_mul(PRIME64_1)); - let low64 = (m128 as u64) ^ (high64 >> 32); - - let h128 = u128::from(low64).wrapping_mul(u128::from(PRIME64_2)); - let high64 = ((h128 >> 64) as u64).wrapping_add(high64.wrapping_mul(PRIME64_2)); - let low64 = h128 as u64; - - u128::from(avalanche(low64)) + (u128::from(avalanche(high64)) << 64) -} - -#[inline(always)] -fn hash_len_17to128_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { - debug_assert!((17..=128).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let mut acc1 = PRIME64_1.wrapping_mul(len as u64); - let mut acc2 = 0u64; - - if len > 32 { - if len > 64 { - if len > 96 { - acc1 = acc1.wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); - } - acc1 = acc1.wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); - } - - acc1 = acc1.wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); - } - - acc1 = acc1.wrapping_add(mix_16bytes(data, secret, seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); - - let low64 = acc1.wrapping_add(acc2); - let high64 = acc1 - .wrapping_mul(PRIME64_1) - .wrapping_add(acc2.wrapping_mul(PRIME64_4)) - .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); - - u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) -} - -#[inline(always)] -fn hash_len_129to240_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { - debug_assert!((129..=MIDSIZE_MAX).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let acc1 = (len as u64).wrapping_mul(PRIME64_1); - let acc2 = 0u64; - - let (acc1, acc2) = (0..4).fold((acc1, acc2), |(acc1, acc2), i| { - ( - acc1.wrapping_add(mix_16bytes(&data[32 * i..], &secret[32 * i..], seed)), - acc2.wrapping_add(mix_16bytes( - &data[32 * i + 16..], - &secret[32 * i + 16..], - 0u64.wrapping_sub(seed), - )), - ) - }); - let acc1 = avalanche(acc1); - let acc2 = avalanche(acc2); - - let nb_rounds = len / 32; - debug_assert!(nb_rounds >= 4); - - let (acc1, acc2) = (4..nb_rounds).fold((acc1, acc2), |(acc1, acc2), i| { - ( - acc1.wrapping_add(mix_16bytes( - &data[32 * i..], - &secret[32 * (i - 4) + MIDSIZE_STARTOFFSET..], - seed, - )), - acc2.wrapping_add(mix_16bytes( - &data[32 * i + 16..], - &secret[32 * (i - 4) + 16 + MIDSIZE_STARTOFFSET..], - 0u64.wrapping_sub(seed), - )), - ) - }); - - // last bytes - let acc1 = acc1.wrapping_add(mix_16bytes( - &data[len - 16..], - &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], - seed, - )); - let acc2 = acc2.wrapping_add(mix_16bytes( - &data[len - 32..], - &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET - 16..], - 0u64.wrapping_sub(seed), - )); - - let low64 = acc1.wrapping_add(acc2); - let high64 = acc1 - .wrapping_mul(PRIME64_1) - .wrapping_add(acc2.wrapping_mul(PRIME64_4)) - .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); - - u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) -} - -#[inline] -fn hash_long_128bits_with_default_secret(data: &[u8], len: usize) -> u128 { - hash_long_128bits_internal(data, len, &SECRET) -} - -#[inline] -fn hash_long_128bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u128 { - hash_long_128bits_internal(data, len, secret) -} - -#[inline] -fn hash_long_128bits_with_seed(data: &[u8], len: usize, seed: u64) -> u128 { - if seed == 0 { - hash_long_128bits_with_default_secret(data, len) - } else { - let secret = Secret::with_seed(seed); - - hash_long_128bits_internal(data, len, &secret) - } -} - -#[inline(always)] -fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 { - let mut acc = Acc::default(); - - hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc128Bits); - - debug_assert!(secret.len() >= acc.len() + SECRET_MERGEACCS_START); - - let low64 = merge_accs( - &acc, - &secret[SECRET_MERGEACCS_START..], - (len as u64).wrapping_mul(PRIME64_1), - ); - let high64 = merge_accs( - &acc, - &secret[secret.len() - ACC_SIZE - SECRET_MERGEACCS_START..], - !(len as u64).wrapping_mul(PRIME64_2), - ); - - u128::from(low64) + (u128::from(high64) << 64) -} - -/* === XXH3 128-bit streaming === */ - -/* all the functions are actually the same as for 64-bit streaming variant, -just the reset one is different (different initial acc values for 0,5,6,7), -and near the end of the digest function */ - -#[cfg(test)] -mod tests { - use alloc::vec; - - use super::*; - - const PRIME: u64 = 2654435761; - const PRIME64: u64 = 11400714785074694797; - const SANITY_BUFFER_SIZE: usize = 2243; - - fn sanity_buffer() -> [u8; SANITY_BUFFER_SIZE] { - let mut buf = [0; SANITY_BUFFER_SIZE]; - let mut byte_gen: u64 = PRIME; - - for b in buf.iter_mut() { - *b = (byte_gen >> 56) as u8; - byte_gen = byte_gen.wrapping_mul(PRIME64); - } - - buf - } - - #[test] - fn hash_64bits_sanity_check() { - let buf = sanity_buffer(); - - let test_cases = vec![ - (&[][..], 0, 0), /* zero-length hash is always 0 */ - (&[][..], PRIME64, 0), - (&buf[..1], 0, 0x7198D737CFE7F386), /* 1 - 3 */ - (&buf[..1], PRIME64, 0xB70252DB7161C2BD), /* 1 - 3 */ - (&buf[..6], 0, 0x22CBF5F3E1F6257C), /* 4 - 8 */ - (&buf[..6], PRIME64, 0x6398631C12AB94CE), /* 4 - 8 */ - (&buf[..12], 0, 0xD5361CCEEBB5A0CC), /* 9 - 16 */ - (&buf[..12], PRIME64, 0xC4C125E75A808C3D), /* 9 - 16 */ - (&buf[..24], 0, 0x46796F3F78B20F6B), /* 17 - 32 */ - (&buf[..24], PRIME64, 0x60171A7CD0A44C10), /* 17 - 32 */ - (&buf[..48], 0, 0xD8D4D3590D136E11), /* 33 - 64 */ - (&buf[..48], PRIME64, 0x05441F2AEC2A1296), /* 33 - 64 */ - (&buf[..80], 0, 0xA1DC8ADB3145B86A), /* 65 - 96 */ - (&buf[..80], PRIME64, 0xC9D55256965B7093), /* 65 - 96 */ - (&buf[..112], 0, 0xE43E5717A61D3759), /* 97 -128 */ - (&buf[..112], PRIME64, 0x5A5F89A3FECE44A5), /* 97 -128 */ - (&buf[..195], 0, 0x6F747739CBAC22A5), /* 129-240 */ - (&buf[..195], PRIME64, 0x33368E23C7F95810), /* 129-240 */ - (&buf[..403], 0, 0x4834389B15D981E8), /* one block, last stripe is overlapping */ - (&buf[..403], PRIME64, 0x85CE5DFFC7B07C87), /* one block, last stripe is overlapping */ - (&buf[..512], 0, 0x6A1B982631F059A8), /* one block, finishing at stripe boundary */ - (&buf[..512], PRIME64, 0x10086868CF0ADC99), /* one block, finishing at stripe boundary */ - (&buf[..2048], 0, 0xEFEFD4449323CDD4), /* 2 blocks, finishing at block boundary */ - (&buf[..2048], PRIME64, 0x01C85E405ECA3F6E), /* 2 blocks, finishing at block boundary */ - (&buf[..2240], 0, 0x998C0437486672C7), /* 3 blocks, finishing at stripe boundary */ - (&buf[..2240], PRIME64, 0x4ED38056B87ABC7F), /* 3 blocks, finishing at stripe boundary */ - (&buf[..2243], 0, 0xA559D20581D742D3), /* 3 blocks, last stripe is overlapping */ - (&buf[..2243], PRIME64, 0x96E051AB57F21FC8), /* 3 blocks, last stripe is overlapping */ - ]; - - for (buf, seed, result) in test_cases { - { - let hash = hash64_with_seed(buf, seed); - - assert_eq!( - hash, - result, - "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - - // streaming API test - - // single ingestio - { - let mut hasher = Hash64::with_seed(seed); - hasher.write(buf); - let hash = hasher.finish(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - - if buf.len() > 3 { - // 2 ingestions - let mut hasher = Hash64::with_seed(seed); - hasher.write(&buf[..3]); - hasher.write(&buf[3..]); - let hash = hasher.finish(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - - // byte by byte ingestion - { - let mut hasher = Hash64::with_seed(seed); - - for chunk in buf.chunks(1) { - hasher.write(chunk); - } - - let hash = hasher.finish(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - } - } - - #[test] - fn hash_64bits_with_secret_sanity_check() { - let buf = sanity_buffer(); - let secret = &buf[7..7 + SECRET_SIZE_MIN + 11]; - - let test_cases = vec![ - (&[][..], secret, 0), /* zero-length hash is always 0 */ - (&buf[..1], secret, 0x7F69735D618DB3F0), /* 1 - 3 */ - (&buf[..6], secret, 0xBFCC7CB1B3554DCE), /* 6 - 8 */ - (&buf[..12], secret, 0x8C50DC90AC9206FC), /* 9 - 16 */ - (&buf[..24], secret, 0x1CD2C2EE9B9A0928), /* 17 - 32 */ - (&buf[..48], secret, 0xA785256D9D65D514), /* 33 - 64 */ - (&buf[..80], secret, 0x6F3053360D21BBB7), /* 65 - 96 */ - (&buf[..112], secret, 0x560E82D25684154C), /* 97 -128 */ - (&buf[..195], secret, 0xBA5BDDBC5A767B11), /* 129-240 */ - (&buf[..403], secret, 0xFC3911BBA656DB58), /* one block, last stripe is overlapping */ - (&buf[..512], secret, 0x306137DD875741F1), /* one block, finishing at stripe boundary */ - (&buf[..2048], secret, 0x2836B83880AD3C0C), /* > one block, at least one scrambling */ - (&buf[..2243], secret, 0x3446E248A00CB44A), /* > one block, at least one scrambling, last stripe unaligned */ - ]; - - for (buf, secret, result) in test_cases { - { - let hash = hash64_with_secret(buf, secret); - - assert_eq!( - hash, - result, - "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - hash, - result - ); - } - - // streaming API test - - // single ingestio - { - let mut hasher = Hash64::with_secret(secret); - hasher.write(buf); - let hash = hasher.finish(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - hash, - result - ); - } - - // byte by byte ingestion - { - let mut hasher = Hash64::with_secret(secret); - - for chunk in buf.chunks(1) { - hasher.write(chunk); - } - - let hash = hasher.finish(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - hash, - result - ); - } - } - } - - #[test] - fn hash_128bits_sanity_check() { - let buf = sanity_buffer(); - - let test_cases = vec![ - (&[][..], 0, 0u64, 0u64), /* zero-length hash is { seed, -seed } by default */ - (&[][..], PRIME, 0, 0), - (&buf[..1], 0, 0x7198D737CFE7F386, 0x3EE70EA338F3F1E8), /* 1-3 */ - (&buf[..1], PRIME, 0x8E05996EC27C0F46, 0x90DFC659A8BDCC0C), /* 1-3 */ - (&buf[..6], 0, 0x22CBF5F3E1F6257C, 0xD4E6C2B94FFC3BFA), /* 4-8 */ - (&buf[..6], PRIME, 0x97B28D3079F8541F, 0xEFC0B954298E6555), /* 4-8 */ - (&buf[..12], 0, 0x0E0CD01F05AC2F0D, 0x2B55C95951070D4B), /* 9-16 */ - (&buf[..12], PRIME, 0xA9DE561CA04CDF37, 0x609E31FDC00A43C9), /* 9-16 */ - (&buf[..24], 0, 0x46796F3F78B20F6B, 0x58FF55C3926C13FA), /* 17-32 */ - (&buf[..24], PRIME, 0x30D5C4E9EB415C55, 0x8868344B3A4645D0), /* 17-32 */ - (&buf[..48], 0, 0xD8D4D3590D136E11, 0x5527A42843020A62), /* 33-64 */ - (&buf[..48], PRIME, 0x1D8834E1A5407A1C, 0x44375B9FB060F541), /* 33-64 */ - (&buf[..81], 0, 0x4B9B448ED8DFD3DD, 0xE805A6D1A43D70E5), /* 65-96 */ - (&buf[..81], PRIME, 0xD2D6B075945617BA, 0xE58BE5736F6E7550), /* 65-96 */ - (&buf[..103], 0, 0xC5A9F97B29EFA44E, 0x254DB7BE881E125C), /* 97-128 */ - (&buf[..103], PRIME, 0xFA2086367CDB177F, 0x0AEDEA68C988B0C0), /* 97-128 */ - (&buf[..192], 0, 0xC3142FDDD9102A3F, 0x06F1747E77185F97), /* 129-240 */ - (&buf[..192], PRIME, 0xA89F07B35987540F, 0xCF1B35FB2C557F54), /* 129-240 */ - (&buf[..222], 0, 0xA61AC4EB3295F86B, 0x33FA7B7598C28A07), /* 129-240 */ - (&buf[..222], PRIME, 0x54135EB88AD8B75E, 0xBC45CE6AE50BCF53), /* 129-240 */ - (&buf[..403], 0, 0xB0C48E6D18E9D084, 0xB16FC17E992FF45D), /* one block, last stripe is overlapping */ - (&buf[..403], PRIME64, 0x0A1D320C9520871D, 0xCE11CB376EC93252), /* one block, last stripe is overlapping */ - (&buf[..512], 0, 0xA03428558AC97327, 0x4ECF51281BA406F7), /* one block, finishing at stripe boundary */ - (&buf[..512], PRIME64, 0xAF67A482D6C893F2, 0x1382D92F25B84D90), /* one block, finishing at stripe boundary */ - (&buf[..2048], 0, 0x21901B416B3B9863, 0x212AF8E6326F01E0), /* two blocks, finishing at block boundary */ - (&buf[..2048], PRIME, 0xBDBB2282577DADEC, 0xF78CDDC2C9A9A692), /* two blocks, finishing at block boundary */ - (&buf[..2240], 0, 0x00AD52FA9385B6FE, 0xC705BAD3356CE302), /* two blocks, ends at stripe boundary */ - (&buf[..2240], PRIME, 0x10FD0072EC68BFAA, 0xE1312F3458817F15), /* two blocks, ends at stripe boundary */ - (&buf[..2237], 0, 0x970C91411533862C, 0x4BBD06FF7BFF0AB1), /* two blocks, ends at stripe boundary */ - (&buf[..2237], PRIME, 0xD80282846D814431, 0x14EBB157B84D9785), /* two blocks, ends at stripe boundary */ - ]; - - for (buf, seed, lo, hi) in test_cases { - let result = u128::from(lo) + (u128::from(hi) << 64); - - { - let hash = hash128_with_seed(buf, seed); - - assert_eq!( - hash, - result, - "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - - // streaming API test - - // single ingestio - { - let mut hasher = Hash128::with_seed(seed); - hasher.write(buf); - let hash = hasher.finish_ext(); - - assert_eq!( - hash, - result, - "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - - if buf.len() > 3 { - // 2 ingestions - let mut hasher = Hash128::with_seed(seed); - hasher.write(&buf[..3]); - hasher.write(&buf[3..]); - let hash = hasher.finish_ext(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - - // byte by byte ingestion - { - let mut hasher = Hash128::with_seed(seed); - - for chunk in buf.chunks(1) { - hasher.write(chunk); - } - - let hash = hasher.finish_ext(); - - assert_eq!( - hash, - result, - "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", - buf.len(), - seed, - hash, - result - ); - } - } - } -} diff --git a/renu/src/xxhash32.rs b/src/xxhash32.rs similarity index 100% rename from renu/src/xxhash32.rs rename to src/xxhash32.rs diff --git a/renu/src/xxhash3_64.rs b/src/xxhash3_64.rs similarity index 100% rename from renu/src/xxhash3_64.rs rename to src/xxhash3_64.rs diff --git a/renu/src/xxhash3_64/avx2.rs b/src/xxhash3_64/avx2.rs similarity index 100% rename from renu/src/xxhash3_64/avx2.rs rename to src/xxhash3_64/avx2.rs diff --git a/renu/src/xxhash3_64/neon.rs b/src/xxhash3_64/neon.rs similarity index 100% rename from renu/src/xxhash3_64/neon.rs rename to src/xxhash3_64/neon.rs diff --git a/renu/src/xxhash3_64/scalar.rs b/src/xxhash3_64/scalar.rs similarity index 100% rename from renu/src/xxhash3_64/scalar.rs rename to src/xxhash3_64/scalar.rs diff --git a/renu/src/xxhash3_64/secret.rs b/src/xxhash3_64/secret.rs similarity index 100% rename from renu/src/xxhash3_64/secret.rs rename to src/xxhash3_64/secret.rs diff --git a/renu/src/xxhash3_64/sse2.rs b/src/xxhash3_64/sse2.rs similarity index 100% rename from renu/src/xxhash3_64/sse2.rs rename to src/xxhash3_64/sse2.rs diff --git a/renu/src/xxhash64.rs b/src/xxhash64.rs similarity index 100% rename from renu/src/xxhash64.rs rename to src/xxhash64.rs diff --git a/renu/renu-sum/.gitignore b/twox-hash-sum/.gitignore similarity index 100% rename from renu/renu-sum/.gitignore rename to twox-hash-sum/.gitignore diff --git a/renu/renu-sum/Cargo.toml b/twox-hash-sum/Cargo.toml similarity index 79% rename from renu/renu-sum/Cargo.toml rename to twox-hash-sum/Cargo.toml index 9de3c49a9..a175a8119 100644 --- a/renu/renu-sum/Cargo.toml +++ b/twox-hash-sum/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "renu-sum" +name = "twox-hash-sum" version = "0.1.0" edition = "2021" diff --git a/renu/renu-sum/src/main.rs b/twox-hash-sum/src/main.rs similarity index 100% rename from renu/renu-sum/src/main.rs rename to twox-hash-sum/src/main.rs diff --git a/renu/xx_hash-sys/.gitignore b/xx_hash-sys/.gitignore similarity index 100% rename from renu/xx_hash-sys/.gitignore rename to xx_hash-sys/.gitignore diff --git a/renu/xx_hash-sys/Cargo.toml b/xx_hash-sys/Cargo.toml similarity index 100% rename from renu/xx_hash-sys/Cargo.toml rename to xx_hash-sys/Cargo.toml diff --git a/renu/xx_hash-sys/build.rs b/xx_hash-sys/build.rs similarity index 100% rename from renu/xx_hash-sys/build.rs rename to xx_hash-sys/build.rs diff --git a/renu/xx_hash-sys/src/lib.rs b/xx_hash-sys/src/lib.rs similarity index 100% rename from renu/xx_hash-sys/src/lib.rs rename to xx_hash-sys/src/lib.rs diff --git a/renu/xx_hash-sys/xxHash b/xx_hash-sys/xxHash similarity index 100% rename from renu/xx_hash-sys/xxHash rename to xx_hash-sys/xxHash From 19b73deaf2ee7061a28240d0e2574e314bbc58c8 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 9 Oct 2024 14:12:53 -0400 Subject: [PATCH 150/166] Restore tests to working condition when serialization is enabled Primarily, serde_json adds some inference failures. Some traits were missing and some other warnings had popped up. --- src/xxhash32.rs | 12 ++++++++---- src/xxhash3_64.rs | 14 ++++++++------ src/xxhash64.rs | 12 ++++++++---- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 097a4c129..67dca2cc5 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -389,6 +389,8 @@ mod test { use super::*; + const EMPTY_BYTES: [u8; 0] = []; + #[test] fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes = [0; 32]; @@ -409,7 +411,7 @@ mod test { #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); - hasher.write(&[]); + hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0x02cc_5d05); } @@ -438,7 +440,7 @@ mod test { #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = Hasher::with_seed(0x42c9_1977); - hasher.write(&[]); + hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0xd6bf_8459); } @@ -628,6 +630,8 @@ mod serialize_impl { #[cfg(test)] mod test { + use std::hash::Hasher as _; + use super::*; type Result = core::result::Result; @@ -636,7 +640,7 @@ mod serialize_impl { fn test_serialization_cycle() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); - hasher.finish(); + let _ = hasher.finish(); let serialized = serde_json::to_string(&hasher)?; let unserialized: Hasher = serde_json::from_str(&serialized)?; @@ -648,7 +652,7 @@ mod serialize_impl { fn test_serialization_stability() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); - hasher.finish(); + let _ = hasher.finish(); let expected_serialized = r#"{ "total_len": 14, diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 70c84d337..5ca1a7e22 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -1327,6 +1327,8 @@ mod test { use super::*; + const EMPTY_BYTES: [u8; 0] = []; + #[test] fn default_secret_is_valid() { assert!(DEFAULT_SECRET.is_valid()) @@ -1377,13 +1379,13 @@ mod test { #[test] fn oneshot_empty() { - let hash = Hasher::oneshot(&[]); + let hash = Hasher::oneshot(&EMPTY_BYTES); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } #[test] fn streaming_empty() { - let hash = hash_byte_by_byte(&[]); + let hash = hash_byte_by_byte(&EMPTY_BYTES); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } @@ -1614,7 +1616,7 @@ mod test { let (a, b) = x.bp_as_chunks::<1>(); assert_eq!(a, &[[1], [2], [3], [4], [5]]); - assert_eq!(b, &[]); + assert_eq!(b, &[] as &[i32]); let (a, b) = x.bp_as_chunks::<2>(); assert_eq!(a, &[[1, 2], [3, 4]]); @@ -1630,7 +1632,7 @@ mod test { let (a, b) = x.bp_as_chunks::<5>(); assert_eq!(a, &[[1, 2, 3, 4, 5]]); - assert_eq!(b, &[]); + assert_eq!(b, &[] as &[i32]); let (a, b) = x.bp_as_chunks::<6>(); assert_eq!(a, &[] as &[[i32; 6]]); @@ -1642,7 +1644,7 @@ mod test { let x = [1, 2, 3, 4, 5]; let (a, b) = x.bp_as_rchunks::<1>(); - assert_eq!(a, &[]); + assert_eq!(a, &[] as &[i32]); assert_eq!(b, &[[1], [2], [3], [4], [5]]); let (a, b) = x.bp_as_rchunks::<2>(); @@ -1658,7 +1660,7 @@ mod test { assert_eq!(b, &[[2, 3, 4, 5]]); let (a, b) = x.bp_as_rchunks::<5>(); - assert_eq!(a, &[]); + assert_eq!(a, &[] as &[i32]); assert_eq!(b, &[[1, 2, 3, 4, 5]]); let (a, b) = x.bp_as_rchunks::<6>(); diff --git a/src/xxhash64.rs b/src/xxhash64.rs index f488e429f..58569cf84 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -400,6 +400,8 @@ mod test { use super::*; + const EMPTY_BYTES: [u8; 0] = []; + #[test] fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes = [0x9c; 32]; @@ -420,7 +422,7 @@ mod test { #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); - hasher.write(&[]); + hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); } @@ -449,7 +451,7 @@ mod test { #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91); - hasher.write(&[]); + hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); } @@ -617,6 +619,8 @@ mod serialize_impl { #[cfg(test)] mod test { + use std::hash::Hasher as _; + use super::*; type Result = core::result::Result; @@ -625,7 +629,7 @@ mod serialize_impl { fn test_serialization_cycle() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); - hasher.finish(); + let _ = hasher.finish(); let serialized = serde_json::to_string(&hasher)?; let unserialized: Hasher = serde_json::from_str(&serialized)?; @@ -637,7 +641,7 @@ mod serialize_impl { fn test_serialization_stability() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); - hasher.finish(); + let _ = hasher.finish(); let expected_serialized = r#"{ "total_len": 14, From f480537a50aa4f07f74fea6e95e9c9926cd23d6b Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 9 Oct 2024 14:29:27 -0400 Subject: [PATCH 151/166] Move Box-specific trait impls behind the feature gate --- src/xxhash3_64.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 5ca1a7e22..c4a351b1c 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -182,12 +182,6 @@ unsafe impl FixedBuffer for &mut [u8; N] {} // Safety: An array will never change size. unsafe impl FixedMutBuffer for &mut [u8; N] {} -// Safety: A plain slice will never change size. -unsafe impl FixedBuffer for Box<[u8]> {} - -// Safety: A plain slice will never change size. -unsafe impl FixedMutBuffer for Box<[u8]> {} - /// Holds secret and temporary buffers that are ensured to be /// appropriately sized. pub struct SecretBuffer { @@ -273,6 +267,12 @@ mod with_alloc { use super::*; + // Safety: A plain slice will never change size. + unsafe impl FixedBuffer for Box<[u8]> {} + + // Safety: A plain slice will never change size. + unsafe impl FixedMutBuffer for Box<[u8]> {} + impl Hasher { /// Constructs the hasher using the default seed and secret values. pub fn new() -> Self { From c0b5ca550cdc38c24063cfdbe297c05cc7b87742 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 9 Oct 2024 14:36:03 -0400 Subject: [PATCH 152/166] Don't warn when we use one of the implementation forcing cfgs --- src/xxhash3_64.rs | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index c4a351b1c..654ad0422 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -616,26 +616,30 @@ macro_rules! dispatch { #[cfg(_internal_xxhash3_force_scalar)] return do_scalar($($arg_name),*); - #[cfg(all(target_arch = "aarch64", feature = "std"))] + // This code can be unreachable if one of the `*_force_*` cfgs + // are set above, but that's the point. + #[allow(unreachable_code)] { - if std::arch::is_aarch64_feature_detected!("neon") { - // Safety: We just ensured we have the NEON feature - return unsafe { do_neon($($arg_name),*) }; + #[cfg(all(target_arch = "aarch64", feature = "std"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + // Safety: We just ensured we have the NEON feature + return unsafe { do_neon($($arg_name),*) }; + } } - } - #[cfg(all(target_arch = "x86_64", feature = "std"))] - { - if is_x86_feature_detected!("avx2") { - // Safety: We just ensured we have the AVX2 feature - return unsafe { do_avx2($($arg_name),*) }; - } else if is_x86_feature_detected!("sse2") { - // Safety: We just ensured we have the SSE2 feature - return unsafe { do_sse2($($arg_name),*) }; + #[cfg(all(target_arch = "x86_64", feature = "std"))] + { + if is_x86_feature_detected!("avx2") { + // Safety: We just ensured we have the AVX2 feature + return unsafe { do_avx2($($arg_name),*) }; + } else if is_x86_feature_detected!("sse2") { + // Safety: We just ensured we have the SSE2 feature + return unsafe { do_sse2($($arg_name),*) }; + } } + do_scalar($($arg_name),*) } - - do_scalar($($arg_name),*) }; } From 98e3aa6ff7d31ec1fc4a0136d6e0cbb5eab7689f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 9 Oct 2024 13:51:44 -0400 Subject: [PATCH 153/166] Update CI configuration --- .github/workflows/ci.yml | 124 ++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 66 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 566f1e2c2..cea7e7f56 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,106 +2,98 @@ on: push name: Continuous integration +env: + RUSTFLAGS: -D warnings + RUSTDOCFLAGS: -D warnings + jobs: library: - runs-on: ubuntu-latest strategy: matrix: + platform: + - ubuntu-latest + rust: - stable - beta - nightly - - 1.37.0 # MSRV + - 1.81.0 # MSRV - steps: - - uses: actions/checkout@v2 + include: + - platform: macos-latest # This serves as our aarch64 / arm64 runner + rust: stable - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: ${{ matrix.rust }} - override: true - components: rustfmt, clippy + - platform: windows-latest + rust: stable - - uses: actions-rs/cargo@v1 - with: - command: build + runs-on: ${{ matrix.platform }} - - uses: actions-rs/cargo@v1 - with: - command: test + steps: + - uses: actions/checkout@v4 - - uses: actions-rs/cargo@v1 - with: - command: test - args: --all-features + - run: git submodule update --init --recursive - - uses: actions-rs/cargo@v1 + - uses: dtolnay/rust-toolchain@master with: - command: fmt - args: --all -- --check - if: ${{ matrix.rust == 'stable' }} + toolchain: ${{ matrix.rust }} - - uses: actions-rs/cargo@v1 - with: - command: clippy - args: --all-features -- -D warnings - if: ${{ matrix.rust == 'stable' }} + - name: Unit Tests + run: cargo test --all-features - no-std: + - name: Property Tests + run: cargo test -p comparison --all-features + + miri: runs-on: ubuntu-latest + env: + MIRIFLAGS: --cfg _internal_xxhash3_force_scalar steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: dtolnay/rust-toolchain@master with: - profile: minimal - toolchain: stable - target: thumbv6m-none-eabi - override: true + toolchain: nightly + components: miri - - uses: actions-rs/cargo@v1 - with: - command: build - args: --no-default-features --target thumbv6m-none-eabi --lib + - name: Unsafe Code + run: cargo miri test --all-features - compatibility-tests: + - name: Big Endian Platform + run: cargo miri test --all-features --target s390x-unknown-linux-gnu + + lints: runs-on: ubuntu-latest - strategy: - matrix: - test: - - digest_0_8 - - digest_0_9 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + - run: git submodule update --init --recursive - - uses: actions-rs/toolchain@v1 + - uses: dtolnay/rust-toolchain@master with: - profile: minimal toolchain: stable - override: true + components: rustfmt, clippy - - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path "compatibility-tests/${{ matrix.test }}/Cargo.toml" + - run: cargo fmt --all + + - run: cargo clippy --all --all-targets --all-features - big_endian: + - run: cargo doc --all-features + + no-std: runs-on: ubuntu-latest + steps: - - name: Checkout code - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + - uses: dtolnay/rust-toolchain@master with: - platforms: s390x + toolchain: stable + targets: thumbv6m-none-eabi - - name: Cross test - uses: actions-rs/cargo@v1 - with: - use-cross: true - command: test - args: --target s390x-unknown-linux-gnu + - run: > + cargo build + --no-default-features + --features=xxhash32,xxhash64,xxhash3_64 + --target thumbv6m-none-eabi From b4ee2a5aea050ce3b45602ff0a902e6487a03698 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 16 Oct 2024 13:22:19 -0400 Subject: [PATCH 154/166] Ignore dead-code warnings for our integer conversion traits These are tiny and are used in a fun mix of the algorithms. Nothing much will be lost if we accidentally stop using them. --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 2ee51fb45..970d8d8fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -99,6 +99,7 @@ pub mod xxhash3_64; #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub use xxhash3_64::Hasher as XxHash3_64; +#[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU32 { fn into_u32(self) -> u32; } @@ -109,6 +110,7 @@ impl IntoU32 for u8 { } } +#[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU64 { fn into_u64(self) -> u64; } @@ -132,6 +134,7 @@ impl IntoU64 for usize { } } +#[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU128 { fn into_u128(self) -> u128; } From cfab4eb3516ab69cdefe6d825cedb5a2cfa923e0 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 08:55:07 -0400 Subject: [PATCH 155/166] Place SIMD code behind the `std` feature We can only detect enabled features via the standard library's macros (for now), so gate them to avoid warnings. --- src/lib.rs | 2 +- src/xxhash3_64.rs | 6 +++--- src/xxhash3_64/avx2.rs | 1 + src/xxhash3_64/neon.rs | 1 + src/xxhash3_64/sse2.rs | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 970d8d8fb..832950509 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,7 +69,7 @@ #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] -#[cfg(feature = "alloc")] +#[cfg(all(feature = "alloc", feature = "xxhash3_64"))] extern crate alloc; #[cfg(any(feature = "std", doc, test))] diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 654ad0422..7bd227ed7 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -559,7 +559,7 @@ macro_rules! dispatch { /// You must ensure that the CPU has the NEON feature #[inline] #[target_feature(enable = "neon")] - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", feature = "std"))] unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* @@ -575,7 +575,7 @@ macro_rules! dispatch { /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] - #[cfg(target_arch = "x86_64")] + #[cfg(all(target_arch = "x86_64", feature = "std"))] unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* @@ -591,7 +591,7 @@ macro_rules! dispatch { /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] - #[cfg(target_arch = "x86_64")] + #[cfg(all(target_arch = "x86_64", feature = "std"))] unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* diff --git a/src/xxhash3_64/avx2.rs b/src/xxhash3_64/avx2.rs index 8cfb54f15..752d7aa77 100644 --- a/src/xxhash3_64/avx2.rs +++ b/src/xxhash3_64/avx2.rs @@ -10,6 +10,7 @@ impl Impl { /// /// You must ensure that the CPU has the AVX2 feature #[inline] + #[cfg(feature = "std")] pub unsafe fn new_unchecked() -> Impl { Impl(()) } diff --git a/src/xxhash3_64/neon.rs b/src/xxhash3_64/neon.rs index 372bca749..f86da1522 100644 --- a/src/xxhash3_64/neon.rs +++ b/src/xxhash3_64/neon.rs @@ -10,6 +10,7 @@ impl Impl { /// /// You must ensure that the CPU has the NEON feature #[inline] + #[cfg(feature = "std")] pub unsafe fn new_unchecked() -> Self { Self(()) } diff --git a/src/xxhash3_64/sse2.rs b/src/xxhash3_64/sse2.rs index 0290038e4..29a9c2ae9 100644 --- a/src/xxhash3_64/sse2.rs +++ b/src/xxhash3_64/sse2.rs @@ -10,6 +10,7 @@ impl Impl { /// /// You must ensure that the CPU has the SSE2 feature #[inline] + #[cfg(feature = "std")] pub unsafe fn new_unchecked() -> Impl { Impl(()) } From 36975a07379f1f63c6a9aedd67befa0e5c443d4e Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 16 Oct 2024 16:14:56 -0400 Subject: [PATCH 156/166] Implement `Clone` for the hashers and states --- src/xxhash32.rs | 22 ++++++++++++++++++---- src/xxhash3_64.rs | 8 ++++++++ src/xxhash64.rs | 21 +++++++++++++++++---- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/xxhash32.rs b/src/xxhash32.rs index 67dca2cc5..42ff7a19b 100644 --- a/src/xxhash32.rs +++ b/src/xxhash32.rs @@ -21,7 +21,7 @@ type Bytes = [u8; 16]; const BYTES_IN_LANE: usize = mem::size_of::(); -#[derive(PartialEq)] +#[derive(Clone, PartialEq)] struct BufferData(Lanes); impl BufferData { @@ -48,7 +48,7 @@ impl fmt::Debug for BufferData { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] struct Buffer { offset: usize, data: BufferData, @@ -126,7 +126,7 @@ impl Buffer { } } -#[derive(PartialEq)] +#[derive(Clone, PartialEq)] struct Accumulators(Lanes); impl Accumulators { @@ -199,7 +199,7 @@ impl fmt::Debug for Accumulators { /// Although this struct implements [`hash::Hasher`][], it only calculates a /// 32-bit number, leaving the upper bits as 0. This means it is /// unlikely to be correct to use this in places like a [`HashMap`][std::collections::HashMap]. -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct Hasher { seed: u32, accumulators: Accumulators, @@ -362,6 +362,7 @@ const fn round(mut acc: u32, lane: u32) -> u32 { /// Constructs [`Hasher`][] for multiple hasher instances. See /// the [usage warning][Hasher#caution]. +#[derive(Clone)] pub struct State(u32); impl State { @@ -389,6 +390,12 @@ mod test { use super::*; + const _TRAITS: () = { + const fn is_clone() {} + is_clone::(); + is_clone::(); + }; + const EMPTY_BYTES: [u8; 0] = []; #[test] @@ -505,6 +512,7 @@ mod random_impl { /// Constructs a randomized seed and reuses it for multiple hasher /// instances. See the [usage warning][Hasher#caution]. + #[derive(Clone)] pub struct RandomState(State); impl Default for RandomState { @@ -533,6 +541,12 @@ mod random_impl { use super::*; + const _: () = { + const fn is_clone() {} + is_clone::(); + is_clone::(); + }; + #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomState> = Default::default(); diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 7bd227ed7..390c0ec65 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -68,6 +68,7 @@ const DEFAULT_SECRET_RAW: DefaultSecret = [ const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; /// Calculates the 64-bit hash. +#[derive(Clone)] pub struct Hasher { #[cfg(feature = "alloc")] inner: with_alloc::AllocRawHasher, @@ -184,6 +185,7 @@ unsafe impl FixedMutBuffer for &mut [u8; N] {} /// Holds secret and temporary buffers that are ensured to be /// appropriately sized. +#[derive(Clone)] pub struct SecretBuffer { seed: u64, secret: S, @@ -516,6 +518,7 @@ impl StripeAccumulator { /// usages may desire more flexibility. This type, combined with /// [`SecretBuffer`][], offer that flexibility at the cost of a /// generic type. +#[derive(Clone)] pub struct RawHasher { secret_buffer: SecretBuffer, buffer_usage: usize, @@ -1331,6 +1334,11 @@ mod test { use super::*; + const _: () = { + const fn is_clone() {} + is_clone::(); + }; + const EMPTY_BYTES: [u8; 0] = []; #[test] diff --git a/src/xxhash64.rs b/src/xxhash64.rs index 58569cf84..d72d57dd8 100644 --- a/src/xxhash64.rs +++ b/src/xxhash64.rs @@ -21,7 +21,7 @@ type Bytes = [u8; 32]; const BYTES_IN_LANE: usize = mem::size_of::(); -#[derive(PartialEq)] +#[derive(Clone, PartialEq)] struct BufferData(Lanes); impl BufferData { @@ -48,7 +48,7 @@ impl fmt::Debug for BufferData { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] struct Buffer { offset: usize, data: BufferData, @@ -126,7 +126,7 @@ impl Buffer { } } -#[derive(PartialEq)] +#[derive(Clone, PartialEq)] struct Accumulators(Lanes); impl Accumulators { @@ -210,7 +210,7 @@ impl fmt::Debug for Accumulators { } /// Calculates the 64-bit hash. -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct Hasher { seed: u64, accumulators: Accumulators, @@ -373,6 +373,7 @@ const fn round(mut acc: u64, lane: u64) -> u64 { } /// Constructs [`Hasher`][] for multiple hasher instances. +#[derive(Clone)] pub struct State(u64); impl State { @@ -400,6 +401,12 @@ mod test { use super::*; + const _TRAITS: () = { + const fn is_clone() {} + is_clone::(); + is_clone::(); + }; + const EMPTY_BYTES: [u8; 0] = []; #[test] @@ -494,6 +501,7 @@ mod random_impl { /// Constructs a randomized seed and reuses it for multiple hasher /// instances. + #[derive(Clone)] pub struct RandomState(State); impl Default for RandomState { @@ -522,6 +530,11 @@ mod random_impl { use super::*; + const _TRAITS: () = { + const fn is_clone() {} + is_clone::(); + }; + #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomState> = Default::default(); From e039f1314a22602013ef0f86900ca9c77a92810a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 16 Oct 2024 17:32:43 -0400 Subject: [PATCH 157/166] CI: features --- .github/workflows/ci.yml | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cea7e7f56..59b165acd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,3 +97,48 @@ jobs: --no-default-features --features=xxhash32,xxhash64,xxhash3_64 --target thumbv6m-none-eabi + + features: + runs-on: ubuntu-latest + + env: + IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64 + FEATURE_SET: random serialize std alloc + + steps: + - uses: actions/checkout@v4 + + - run: git submodule update --init --recursive + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + + - name: Compute Powerset + shell: "ruby {0}" + run: | + features = ENV['FEATURE_SET'] + .split(' ') + .reduce([[]]) { |ps, i| ps + ps.map { |e| e + [i] } } + .map { |s| s.join(',') } + .join(" ") + + File.open(ENV['GITHUB_ENV'], 'a') { |f| f.write("FEATURES=#{features}") } + + - name: Check implementations with features + run: | + for impl in ${IMPLEMENTATIONS}; do + echo "::group::Implementation ${impl}" + + # Check the implementation by itself + cargo check --no-default-features --features="${impl}" + + # And with extra features + for feature in ${FEATURES}; do + echo "::group::Features ${feature}" + cargo check --no-default-features --features="${impl},${feature}" + echo "::endgroup::" + done + + echo ::endgroup:: + done From 0d1105f0c36d7529410382aff8defd46bb3e2fb1 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 12:33:04 -0400 Subject: [PATCH 158/166] Pin to xxHash 0.8.2 --- xx_hash-sys/xxHash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xx_hash-sys/xxHash b/xx_hash-sys/xxHash index 805c00b68..bbb27a5ef 160000 --- a/xx_hash-sys/xxHash +++ b/xx_hash-sys/xxHash @@ -1 +1 @@ -Subproject commit 805c00b68fa754200ada0c207ffeaa7a4409377c +Subproject commit bbb27a5efb85b92a0486cf361a8635715a53f6ba From 28f0d836b78a00b1e1804d45c5cb9d5d1300267a Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 12:59:41 -0400 Subject: [PATCH 159/166] Unify the README and crate documentation --- README.md | 140 ++++++++++++++++++++++++++--------------------------- src/lib.rs | 66 +------------------------ 2 files changed, 69 insertions(+), 137 deletions(-) diff --git a/README.md b/README.md index d8656f327..7d0b96f33 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,59 @@ -# TwoX-Hash +A Rust implementation of the [xxHash] algorithm. -A Rust implementation of the [XXHash] algorithm. +[![Crates.io][crates-badge]][crates-url] +[![Documentation][docs-badge]][docs-url] +[![Build Status][actions-badge]][actions-url] -[![Build Status](https://travis-ci.org/shepmaster/twox-hash.svg)](https://travis-ci.org/shepmaster/twox-hash) [![Current Version](https://img.shields.io/crates/v/twox-hash.svg)](https://crates.io/crates/twox-hash) +[xxHash]: https://github.com/Cyan4973/xxHash -[Documentation](https://docs.rs/twox-hash/) +[crates-badge]: https://img.shields.io/crates/v/twox-hash.svg +[crates-url]: https://crates.io/crates/twox-hash +[docs-badge]: https://img.shields.io/docsrs/twox-hash +[docs-url]: https://docs.rs/twox-hash/ +[actions-badge]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml/badge.svg?branch=main +[actions-url]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml?query=branch%3Amain -[XXHash]: https://github.com/Cyan4973/xxHash +# Examples -## Examples +These examples use [`XxHash64`](XxHash64) but the same ideas can be +used for [`XxHash32`](XxHash32) or [`XxHash3_64`](XxHash3_64). -### With a fixed seed +## Hashing arbitrary data + +### When all the data is available at once ```rust -use std::hash::BuildHasherDefault; -use std::collections::HashMap; use twox_hash::XxHash64; -let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); +let seed = 1234; +let hash = XxHash64::oneshot(seed, b"some bytes"); +assert_eq!(0xeab5_5659_a496_d78b, hash); +``` + +### When the data is streaming + +```rust +use std::hash::Hasher as _; +use twox_hash::XxHash64; + +let seed = 1234; +let mut hasher = XxHash64::with_seed(seed); +hasher.write(b"some"); +hasher.write(b" "); +hasher.write(b"bytes"); +let hash = hasher.finish(); +assert_eq!(0xeab5_5659_a496_d78b, hash); +``` + +## In a [`HashMap`](std::collections::HashMap) + +### With a default seed + +```rust +use std::{collections::HashMap, hash::BuildHasherDefault}; +use twox_hash::XxHash64; + +let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` @@ -26,73 +62,33 @@ assert_eq!(hash.get(&42), Some(&"the answer")); ```rust use std::collections::HashMap; -use twox_hash::RandomXxHashBuilder64; +use twox_hash::xxhash64; + +let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); +hash.insert(42, "the answer"); +assert_eq!(hash.get(&42), Some(&"the answer")); +``` + +### With a fixed seed + +```rust +use std::collections::HashMap; +use twox_hash::xxhash64; -let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); +let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` -## Benchmarks - -### 64-bit - -| Bytes | SipHasher (MB/s) | XXHash (MB/s) | Ratio | -|---------|------------------|---------------|-------| -| 1 | 52 | 38 | 73% | -| 4 | 210 | 148 | 70% | -| 16 | 615 | 615 | 100% | -| 32 | 914 | 1391 | 152% | -| 128 | 1347 | 3657 | 271% | -| 256 | 1414 | 5019 | 355% | -| 512 | 1546 | 6168 | 399% | -| 1024 | 1565 | 6206 | 397% | -| 1048576 | 1592 | 7564 | 475% | - -| Bytes | [FnvHasher][fnv] (MB/s) | XXHash (MB/s) | Ratio | -|---------|-------------------------|---------------|-------| -| 1 | 1000 | 38 | 4% | -| 4 | 800 | 148 | 19% | -| 16 | 761 | 615 | 81% | -| 32 | 761 | 1391 | 183% | -| 128 | 727 | 3657 | 503% | -| 256 | 759 | 5019 | 661% | -| 512 | 745 | 6168 | 828% | -| 1024 | 741 | 6206 | 838% | -| 1048576 | 745 | 7564 | 1015% | - -### 32-bit - -| Bytes | SipHasher (MB/s) | XXHash32 (MB/s) | Ratio | -|---------|------------------|-----------------|-------| -| 1 | 52 | 55 | 106% | -| 4 | 210 | 210 | 100% | -| 16 | 615 | 1230 | 200% | -| 32 | 914 | 1882 | 206% | -| 128 | 1347 | 3282 | 244% | -| 256 | 1414 | 3459 | 245% | -| 512 | 1546 | 3792 | 245% | -| 1024 | 1565 | 3938 | 252% | -| 1048576 | 1592 | 4127 | 259% | - -| Bytes | [FnvHasher][fnv] (MB/s) | XXHash32 (MB/s) | Ratio | -|---------|-------------------------|-----------------|-------| -| 1 | 1000 | 55 | 6% | -| 4 | 800 | 210 | 26% | -| 16 | 761 | 1230 | 162% | -| 32 | 761 | 1882 | 247% | -| 128 | 727 | 3282 | 451% | -| 256 | 759 | 3459 | 456% | -| 512 | 745 | 3792 | 509% | -| 1024 | 741 | 3938 | 531% | -| 1048576 | 745 | 4127 | 554% | - - -[fnv]: https://github.com/servo/rust-fnv - -## Contributing - -1. Fork it ( https://github.com/shepmaster/twox-hash/fork ) +# Benchmarks + +See benchmarks in the [comparison][] README. + +[comparison]: https://github.com/shepmaster/twox-hash/tree/main/comparison + +# Contributing + +1. Fork it () 2. Create your feature branch (`git checkout -b my-new-feature`) 3. Add a failing test. 4. Add code to pass the test. diff --git a/src/lib.rs b/src/lib.rs index 832950509..156cafd29 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,68 +1,4 @@ -//! A Rust implementation of the [XXHash][] algorithm. -//! -//! [XXHash]: https://github.com/Cyan4973/xxHash -//! -//! ## Hashing arbitrary data -//! -//! ### When all the data is available at once -//! -//! ```rust -//! use twox_hash::XxHash64; -//! -//! let seed = 1234; -//! let hash = XxHash64::oneshot(seed, b"some bytes"); -//! assert_eq!(0xeab5_5659_a496_d78b, hash); -//! ``` -//! -//! ### When the data is streaming -//! -//! ```rust -//! use std::hash::Hasher as _; -//! use twox_hash::XxHash64; -//! -//! let seed = 1234; -//! let mut hasher = XxHash64::with_seed(seed); -//! hasher.write(b"some"); -//! hasher.write(b" "); -//! hasher.write(b"bytes"); -//! let hash = hasher.finish(); -//! assert_eq!(0xeab5_5659_a496_d78b, hash); -//! ``` -//! -//! ## In a [`HashMap`](std::collections::HashMap) -//! -//! ### With a default seed -//! -//! ```rust -//! use std::{collections::HashMap, hash::BuildHasherDefault}; -//! use twox_hash::XxHash64; -//! -//! let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); -//! hash.insert(42, "the answer"); -//! assert_eq!(hash.get(&42), Some(&"the answer")); -//! ``` -//! -//! ### With a random seed -//! -//! ```rust -//! use std::collections::HashMap; -//! use twox_hash::xxhash64; -//! -//! let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); -//! hash.insert(42, "the answer"); -//! assert_eq!(hash.get(&42), Some(&"the answer")); -//! ``` -//! -//! ### With a fixed seed -//! -//! ```rust -//! use std::collections::HashMap; -//! use twox_hash::xxhash64; -//! -//! let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); -//! hash.insert(42, "the answer"); -//! assert_eq!(hash.get(&42), Some(&"the answer")); -//! ``` +#![doc = include_str!("../README.md")] #![deny(rust_2018_idioms)] #![deny(missing_docs)] From 45f21b028f88801af5af9b198f0c14108e784300 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 13:12:25 -0400 Subject: [PATCH 160/166] Document the feature flags --- README.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7d0b96f33..6939cf498 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,8 @@ A Rust implementation of the [xxHash] algorithm. # Examples -These examples use [`XxHash64`](XxHash64) but the same ideas can be -used for [`XxHash32`](XxHash32) or [`XxHash3_64`](XxHash3_64). +These examples use [`XxHash64`][] but the same ideas can be +used for [`XxHash32`][] or [`XxHash3_64`][]. ## Hashing arbitrary data @@ -45,7 +45,7 @@ let hash = hasher.finish(); assert_eq!(0xeab5_5659_a496_d78b, hash); ``` -## In a [`HashMap`](std::collections::HashMap) +## In a [`HashMap`][] ### With a default seed @@ -80,6 +80,18 @@ hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` +# Feature Flags + +| name | description | +|------------|---------------------------------------------------------------------------------------------------------| +| xxhash32 | Include the [`XxHash32`][] algorithm | +| xxhash64 | Include the [`XxHash64`][] algorithm | +| xxhash3_64 | Include the [`XxHash3_64`][] algorithm | +| random | Create random instances of the hashers | +| serialize | Serialize and deserialize hasher state with Serde | +| std | Use the Rust standard library. Enable this if you want SIMD support in [`XxHash3_64`][] | +| alloc | Use the Rust allocator library. Enable this if you want to create [`XxHash3_64`][] with dynamic secrets | + # Benchmarks See benchmarks in the [comparison][] README. @@ -96,3 +108,9 @@ See benchmarks in the [comparison][] README. 6. Ensure tests pass. 7. Push to the branch (`git push origin my-new-feature`) 8. Create a new Pull Request + + +[`Hashmap`]: std::collections::HashMap +[`XxHash32`]: crate::XxHash32 +[`XxHash64`]: crate::XxHash64 +[`XxHash3_64`]: crate::XxHash3_64 From 4c577a33d4957e79d6f5d9db38652562251edcc6 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 13:53:35 -0400 Subject: [PATCH 161/166] Test for minimal dependency versions --- .github/workflows/ci.yml | 30 ++++++++++++++++++++++++++++++ Cargo.toml | 2 ++ 2 files changed, 32 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 59b165acd..f7806408b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -142,3 +142,33 @@ jobs: echo ::endgroup:: done + + minimal-versions: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - run: git submodule update --init --recursive + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: 1.81.0 # MSRV + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + + - name: Remove non-essential dependencies + run: | + # Remove workspace dependencies + sed -i '/\[workspace]/,/#END-\[workspace]/d' Cargo.toml + + # Remove dev-dependencies + sed -i '/\[dev-dependencies]/,/#END-\[dev-dependencies]/d' Cargo.toml + + - name: Downgrade to minimal dependencies + run: | + cargo +nightly -Z minimal-versions update + + - run: cargo +1.81.0 build --all-features diff --git a/Cargo.toml b/Cargo.toml index 426cd074e..cf7483bfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ members = [ "twox-hash-sum", "xx_hash-sys", ] +#END-[workspace] [features] default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"] @@ -52,6 +53,7 @@ serde = { version = "1.0.0", optional = true, default-features = false, features [dev-dependencies] serde_json = "1.0.117" +#END-[dev-dependencies] [package.metadata.docs.rs] all-features = true From 9bd194326fa76a9cd5f2ec8d48526c88640428fb Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 14:19:38 -0400 Subject: [PATCH 162/166] Don't create empty ranges in proptests --- comparison/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs index 7701a8314..2e1c04aa9 100644 --- a/comparison/src/lib.rs +++ b/comparison/src/lib.rs @@ -366,7 +366,7 @@ mod xxhash3_64 { fn vec_and_index() -> impl Strategy, usize)> { prop::collection::vec(num::u8::ANY, 0..=32 * 1024).prop_flat_map(|vec| { let len = vec.len(); - (Just(vec), 0..len) + (Just(vec), 0..=len) }) } From 5ce7f4b071a7ad45dd1314320b6afbfe04302db9 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 14:21:01 -0400 Subject: [PATCH 163/166] Introduce a changelog --- CHANGELOG.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..6db2c9b23 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,85 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [2.0.0] - Unreleased + +[2.0.0]: https://github.com/shepmaster/twox-hash/tree/v2.0.0 + +This release is a complete rewrite of the crate, including +reorganization of the code. The XXH3 algorithm now matches the 0.8 +release of the reference C xxHash implementation. + +### Added + +- `XxHash32::oneshot` and `XxHash64::oneshot` can perform hashing with + zero allocation and generally improved performance. If you have code + that creates a hasher and hashes a slice of bytes exactly once, you + are strongly encouraged to use the new functions. This might look + like: + + ```rust + // Before + let mut hasher = XxHash64::new(); // or XxHash32, or with seeds + some_bytes.hash(&mut hasher); + let hash = hasher.finish(); + + // After + let hash = XxHash64::oneshot(some_bytes); + ``` + +- There is a feature flag for each hashing implementation. It is + recommended that you opt-out of the crate's default features and + only select the implementations you need to improve compile speed. + +### Changed + +- The crates minimum supported Rust version (MSRV) is now 1.81. + +- Functional and performance comparisons are made against the + reference C xxHash library version 0.8.2, which includes a stable + XXH3 algorithm. + +- Support for randomly-generated hasher instances is now behind the + `random` feature flag. It was previously combined with the `std` + feature flag. + +### Removed + +- The deprecated type aliases `XxHash` and `RandomXxHashBuilder` have + been removed. Replace them with `XxHash64` and + `xxhash64::RandomState` respectively. + +- `RandomXxHashBuilder32` and `RandomXxHashBuilder64` are no longer + available at the top-level of the crate. Replace them with + `xxhash32::RandomState` and ``xxhash64::RandomState` respectively. + +- `Xxh3Hash64` and `xx3::Hash64` have been renamed to `XxHash3_64` and + `xxhash3_64::Hasher` respectively. + +- The free functions `xxh3::hash64`, `xxh3::hash64_with_seed`, and + `xxh3::hash64_with_secret` are now associated functions of + `xxhash3_64::Hasher`: `oneshot`, `oneshot_with_seed` and + `oneshot_with_secret`. Note that the argument order has changed. + +- Support for the [digest][] crate has been removed. The digest crate + is for **cryptographic** hash functions and xxHash is + **non-cryptographic**. + +- `XxHash32` and `XxHash64` no longer implement `Copy`. This prevents + accidentally mutating a duplicate instance of the state instead of + the original state. `Clone` is still implemented so you can make + deliberate duplicates. + +- The XXH3 128-bit variant is not yet re-written. Work is in progress + for this. + +- We no longer provide support for randomly-generated instances of the + XXH3 64-bit variant. The XXH3 algorithm takes both a seed and a + secret as input and deciding what to randomize is non-trivial and + can have negative impacts on performance. + +[digest]: https://docs.rs/digest/latest/digest/ From a635afe6fc4b1e4464940c5805b52bce48cb2e9d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 17 Oct 2024 14:33:34 -0400 Subject: [PATCH 164/166] Tweaks to get benchmarking running again after renaming --- comparison/benchmark.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/comparison/benchmark.sh b/comparison/benchmark.sh index 145cabbcd..af793ef55 100755 --- a/comparison/benchmark.sh +++ b/comparison/benchmark.sh @@ -11,7 +11,9 @@ function capture() { raw_data="${temp_dir}/raw-data.streaming-json" - cargo criterion -p compare --message-format=json -- "${subset}" > "${raw_data}" + echo "Benchmarking with $(rustc --version)" + + cargo criterion -p comparison --message-format=json -- "${subset}" > "${raw_data}" echo "Raw benchmark data captured to ${raw_data}" echo "Next, run \`${SCRIPT_INVOKED_AS} analyze ${raw_data}\`" @@ -50,7 +52,8 @@ case "${mode}" in ;; *) - echo "Unknown command '${mode}'" + echo "Unknown command '${mode}'" >&2 + echo "Known commands: capture, analyze" >&2 exit 1 ;; esac From 979e71b5f94d1fe01e62b3b70084993eed0fc9b6 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 18 Oct 2024 08:33:38 -0400 Subject: [PATCH 165/166] Update benchmarks for Rust 1.81 / xxHash 0.8.2 --- comparison/README.md | 24 +- .../results/xxhash3_64-streaming-aarch64.svg | 174 +++++++------- .../results/xxhash3_64-streaming-x86_64.svg | 216 +++++++++--------- .../results/xxhash3_64-tiny_data-aarch64.svg | 143 ++++++------ .../results/xxhash3_64-tiny_data-x86_64.svg | 175 +++++++------- .../results/xxhash64-streaming-aarch64.svg | 107 ++++----- .../results/xxhash64-streaming-x86_64.svg | 107 ++++----- .../results/xxhash64-tiny_data-aarch64.svg | 152 ++++++------ .../results/xxhash64-tiny_data-x86_64.svg | 150 ++++++------ 9 files changed, 624 insertions(+), 624 deletions(-) diff --git a/comparison/README.md b/comparison/README.md index efb868e2b..69a16a924 100644 --- a/comparison/README.md +++ b/comparison/README.md @@ -19,15 +19,15 @@ graphs are boring flat lines, so a table is used instead. | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 13.4 | -| C | 13.4 | +| Rust | 13.5 | +| C | 13.5 | ## x86_64 | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 15.7 | -| C | 15.8 | +| Rust | 16.5 | +| C | 16.5 | ## Streaming data @@ -88,20 +88,20 @@ graphs are boring flat lines, so a table is used instead. | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 34.8 | -| C | 34.8 | -| C (scalar) | 21.0 | -| C (NEON) | 34.7 | +| Rust | 35.2 | +| C | 35.0 | +| C (scalar) | 21.2 | +| C (NEON) | 35.1 | ### x86_64 | Implementation | Throughput (GiB/s) | |----------------|--------------------| -| Rust | 58.3 | +| Rust | 58.6 | | C | 25.0 | | C (scalar) | 7.5 | | C (SSE2) | 25.1 | -| C (AVX2) | 58.1 | +| C (AVX2) | 57.8 | ## Streaming data @@ -156,7 +156,7 @@ cluttering the graph and wasting benchmarking time. | CPU | Memory | C compiler | |-------------------|--------|--------------------| -| Apple M1 Max | 64 GiB | clang 15.0.0 | +| Apple M1 Max | 64 GiB | clang 16.0.0 | | AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 | Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`. @@ -178,7 +178,7 @@ Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`. C compiler - Apple clang version 15.0.0 (clang-1500.3.9.4) + Apple clang version 16.0.0 (clang-1600.0.26.3) diff --git a/comparison/results/xxhash3_64-streaming-aarch64.svg b/comparison/results/xxhash3_64-streaming-aarch64.svg index 8e21742b4..dc46b30db 100644 --- a/comparison/results/xxhash3_64-streaming-aarch64.svg +++ b/comparison/results/xxhash3_64-streaming-aarch64.svg @@ -21,94 +21,94 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec diff --git a/comparison/results/xxhash3_64-streaming-x86_64.svg b/comparison/results/xxhash3_64-streaming-x86_64.svg index 38428ce59..6a3997ed9 100644 --- a/comparison/results/xxhash3_64-streaming-x86_64.svg +++ b/comparison/results/xxhash3_64-streaming-x86_64.svg @@ -22,115 +22,115 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 8 MiB/sec diff --git a/comparison/results/xxhash3_64-tiny_data-aarch64.svg b/comparison/results/xxhash3_64-tiny_data-aarch64.svg index 50e2a7f14..753a36cc5 100644 --- a/comparison/results/xxhash3_64-tiny_data-aarch64.svg +++ b/comparison/results/xxhash3_64-tiny_data-aarch64.svg @@ -20,81 +20,80 @@ - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 4ns - 8ns - 12ns - 16ns - 20ns - 24ns - + 4ns + 8ns + 12ns + 16ns + 20ns + 0 B 50 B 100 B diff --git a/comparison/results/xxhash3_64-tiny_data-x86_64.svg b/comparison/results/xxhash3_64-tiny_data-x86_64.svg index 671c7e0d7..81b4fc317 100644 --- a/comparison/results/xxhash3_64-tiny_data-x86_64.svg +++ b/comparison/results/xxhash3_64-tiny_data-x86_64.svg @@ -20,97 +20,96 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + - 4ns - 8ns - 12ns - 16ns - 20ns - 24ns - + 4ns + 8ns + 12ns + 16ns + 20ns + 0 B 50 B 100 B diff --git a/comparison/results/xxhash64-streaming-aarch64.svg b/comparison/results/xxhash64-streaming-aarch64.svg index effd1c847..1f58161a6 100644 --- a/comparison/results/xxhash64-streaming-aarch64.svg +++ b/comparison/results/xxhash64-streaming-aarch64.svg @@ -20,62 +20,63 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 256 MiB/sec - 512 MiB/sec - 1 GiB/sec - 2 GiB/sec - 4 GiB/sec - 8 GiB/sec - 16 GiB/sec + 128 MiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec 32 GiB/sec - + 1 B 2 B 4 B diff --git a/comparison/results/xxhash64-streaming-x86_64.svg b/comparison/results/xxhash64-streaming-x86_64.svg index 636c7eafc..064188466 100644 --- a/comparison/results/xxhash64-streaming-x86_64.svg +++ b/comparison/results/xxhash64-streaming-x86_64.svg @@ -20,62 +20,63 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 256 MiB/sec - 512 MiB/sec - 1 GiB/sec - 2 GiB/sec - 4 GiB/sec - 8 GiB/sec - 16 GiB/sec + 128 MiB/sec + 256 MiB/sec + 512 MiB/sec + 1 GiB/sec + 2 GiB/sec + 4 GiB/sec + 8 GiB/sec + 16 GiB/sec 32 GiB/sec - + 1 B 2 B 4 B diff --git a/comparison/results/xxhash64-tiny_data-aarch64.svg b/comparison/results/xxhash64-tiny_data-aarch64.svg index d732a33eb..df15121a7 100644 --- a/comparison/results/xxhash64-tiny_data-aarch64.svg +++ b/comparison/results/xxhash64-tiny_data-aarch64.svg @@ -20,84 +20,84 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 0 - 2ns - 4ns - 6ns - 8ns - + 0 + 2ns + 4ns + 6ns + 8ns + 0 B 10 B 20 B diff --git a/comparison/results/xxhash64-tiny_data-x86_64.svg b/comparison/results/xxhash64-tiny_data-x86_64.svg index cdbb6396d..ca5185f8c 100644 --- a/comparison/results/xxhash64-tiny_data-x86_64.svg +++ b/comparison/results/xxhash64-tiny_data-x86_64.svg @@ -20,84 +20,84 @@ - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 0 - 2ns - 4ns - 6ns - 8ns - + 0 + 2ns + 4ns + 6ns + 8ns + 0 B 10 B 20 B From 6d4ffd4e846325ed3e51a05524747616a2d251e8 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 18 Oct 2024 09:01:27 -0400 Subject: [PATCH 166/166] Remove vestigial comment --- src/xxhash3_64.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/xxhash3_64.rs b/src/xxhash3_64.rs index 390c0ec65..336ba7aaf 100644 --- a/src/xxhash3_64.rs +++ b/src/xxhash3_64.rs @@ -250,8 +250,6 @@ impl SecretBuffer { impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> { /// Use the default seed and secret values while allocating nothing. - /// - /// Note that this type may take up a surprising amount of stack space. #[inline] pub const fn default() -> Self { SecretBuffer {