Skip to content

Commit

Permalink
Faster hex encoding (#144)
Browse files Browse the repository at this point in the history
* faster hex encoding
  • Loading branch information
novacrazy authored Sep 13, 2023
1 parent 5aa4e27 commit 9b12a1a
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 21 deletions.
12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,23 @@ typenum = { version = "1.16", features = ["const-generics"] }
const-default = { version = "1", optional = true, default-features = false }
serde = { version = "1.0", optional = true, default-features = false }
zeroize = { version = "1", optional = true, default-features = false }
faster-hex = { version = "0.8", optional = true, default-features = false }

[dev-dependencies]
# this can't yet be made optional, see https://github.com/rust-lang/cargo/issues/1596
serde_json = "1.0"
bincode = "1.0"
criterion = { version = "0.5", features = ["html_reports"] }
rand = "0.8"

[[bench]]
name = "hex"
harness = false

[profile.bench]
opt-level = 3
lto = 'fat'
codegen-units = 1

[package.metadata.docs.rs]
# all but "internals", don't show those on docs.rs
Expand Down
46 changes: 46 additions & 0 deletions benches/hex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use criterion::{
criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion,
};
use generic_array::{typenum::*, ArrayLength, GenericArray};
use rand::RngCore;

use std::{fmt::UpperHex, io::Write};

fn criterion_benchmark(c: &mut Criterion) {
let mut hex = c.benchmark_group("hex");

let mut rng = rand::thread_rng();

macro_rules! all_hex_benches {
($($len:ty,)*) => {
$(bench_hex::<$len>(&mut rng, &mut hex);)*
}
}

all_hex_benches!(
U1, U2, U4, U8, U12, U15, U16, U32, U64, U100, U128, U160, U255, U256, U500, U512, U900,
U1023, U1024, Sum<U1024, U1>, U2048, U4096, Prod<U1000, U5>, U10000,
);

hex.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

fn bench_hex<N: ArrayLength>(mut rng: impl RngCore, g: &mut BenchmarkGroup<'_, WallTime>)
where
GenericArray<u8, N>: UpperHex,
{
let mut fixture = Box::<GenericArray<u8, N>>::default();
rng.fill_bytes(fixture.as_mut_slice());

g.bench_function(format!("N{:08}", N::USIZE), |b| {
let mut out = Vec::with_capacity(N::USIZE * 2);

b.iter(|| {
_ = write!(out, "{:X}", &*fixture);
out.clear();
});
});
}
81 changes: 60 additions & 21 deletions src/hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,40 @@ use typenum::*;

use crate::{ArrayLength, GenericArray};

static LOWER_CHARS: [u8; 16] = *b"0123456789abcdef";
static UPPER_CHARS: [u8; 16] = *b"0123456789ABCDEF";
#[inline(always)]
fn hex_encode_fallback<const UPPER: bool>(src: &[u8], dst: &mut [u8]) {
if dst.len() < src.len() * 2 {
unsafe { core::hint::unreachable_unchecked() };
}

let alphabet = match UPPER {
true => b"0123456789ABCDEF",
false => b"0123456789abcdef",
};

dst.chunks_exact_mut(2).zip(src).for_each(|(s, c)| {
s[0] = alphabet[(c >> 4) as usize];
s[1] = alphabet[(c & 0xF) as usize];
});
}

#[inline]
fn hex_encode<const UPPER: bool>(src: &[u8], dst: &mut [u8]) {
debug_assert!(dst.len() >= (src.len() * 2));

fn generic_hex<N: ArrayLength>(
#[cfg(any(miri, not(feature = "faster-hex")))]
hex_encode_fallback::<UPPER>(src, dst);

// the `unwrap_unchecked` is to avoid the length checks
#[cfg(all(feature = "faster-hex", not(miri)))]
match UPPER {
true => unsafe { faster_hex::hex_encode_upper(src, dst).unwrap_unchecked() },
false => unsafe { faster_hex::hex_encode(src, dst).unwrap_unchecked() },
};
}

fn generic_hex<N: ArrayLength, const UPPER: bool>(
arr: &GenericArray<u8, N>,
alphabet: &[u8; 16], // use fixed-length array to avoid slice index checks
f: &mut fmt::Formatter<'_>,
) -> fmt::Result
where
Expand All @@ -36,32 +64,43 @@ where
_ => max_digits,
};

let max_hex = (max_digits >> 1) + (max_digits & 1);
// ceil(max_digits / 2)
let max_bytes = (max_digits >> 1) + (max_digits & 1);

let input = {
// LLVM can't seem to automatically prove this
if max_bytes > N::USIZE {
unsafe { core::hint::unreachable_unchecked() };
}

&arr[..max_bytes]
};

if N::USIZE <= 1024 {
// For small arrays use a stack allocated
// buffer of 2x number of bytes
let mut res = GenericArray::<u8, Sum<N, N>>::default();
// For small arrays use a stack allocated buffer of 2x number of bytes
let mut buf = GenericArray::<u8, Sum<N, N>>::default();

arr.iter().take(max_hex).enumerate().for_each(|(i, c)| {
res[i * 2] = alphabet[(c >> 4) as usize];
res[i * 2 + 1] = alphabet[(c & 0xF) as usize];
});
if N::USIZE < 16 {
// for the smallest inputs, don't bother limiting to max_bytes,
// just process the entire array. When "faster-hex" is enabled,
// this avoids its logic that winds up going to the fallback anyway
hex_encode_fallback::<UPPER>(arr, &mut buf);
} else {
hex_encode::<UPPER>(input, &mut buf);
}

f.write_str(unsafe { str::from_utf8_unchecked(&res[..max_digits]) })?;
f.write_str(unsafe { str::from_utf8_unchecked(buf.get_unchecked(..max_digits)) })?;
} else {
// For large array use chunks of up to 1024 bytes (2048 hex chars)
let mut buf = [0u8; 2048];
let mut digits_left = max_digits;

for chunk in arr[..max_hex].chunks(1024) {
chunk.iter().enumerate().for_each(|(i, c)| {
buf[i * 2] = alphabet[(c >> 4) as usize];
buf[i * 2 + 1] = alphabet[(c & 0xF) as usize];
});
for chunk in input.chunks(1024) {
hex_encode::<UPPER>(chunk, &mut buf);

let n = min(chunk.len() * 2, digits_left);
f.write_str(unsafe { str::from_utf8_unchecked(&buf[..n]) })?;
// SAFETY: n will always be within bounds due to the above min
f.write_str(unsafe { str::from_utf8_unchecked(buf.get_unchecked(..n)) })?;
digits_left -= n;
}
}
Expand All @@ -74,7 +113,7 @@ where
Sum<N, N>: ArrayLength,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
generic_hex(self, &LOWER_CHARS, f)
generic_hex::<_, false>(self, f)
}
}

Expand All @@ -84,6 +123,6 @@ where
Sum<N, N>: ArrayLength,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
generic_hex(self, &UPPER_CHARS, f)
generic_hex::<_, true>(self, f)
}
}

0 comments on commit 9b12a1a

Please sign in to comment.