Skip to content

Commit

Permalink
Deterministic HashSet (#276)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vrixyz authored Nov 15, 2024
1 parent 4654845 commit 2314d28
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 81 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/parry-ci-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ jobs:
run: cd crates/parry3d; cargo build --verbose --features simd-stable;
- name: Check serialization
run: cargo check --features bytemuck-serialize,serde-serialize,rkyv-serialize;
- name: Check enhanced-determinism
run: cargo check --features enhanced-determinism
- name: Run tests
run: cargo test --features wavefront
build-wasm:
Expand Down
80 changes: 80 additions & 0 deletions src/utils/fx_hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//! FxHasher taken from rustc_hash, except that it does not depend on the pointer size.
const K: u32 = 0x9e3779b9;

/// This is the same as FxHasher, but with the guarantee that the internal hash is
/// an u32 instead of something that depends on the platform.
pub struct FxHasher32 {
hash: u32,
}

impl Default for FxHasher32 {
#[inline]
fn default() -> FxHasher32 {
FxHasher32 { hash: 0 }
}
}

impl FxHasher32 {
#[inline]
fn add_to_hash(&mut self, i: u32) {
use std::ops::BitXor;
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
}
}

impl std::hash::Hasher for FxHasher32 {
#[inline]
fn write(&mut self, mut bytes: &[u8]) {
let read_u32 = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap());
let mut hash = FxHasher32 { hash: self.hash };
assert!(size_of::<u32>() <= 8);
while bytes.len() >= size_of::<u32>() {
hash.add_to_hash(read_u32(bytes));
bytes = &bytes[size_of::<u32>()..];
}
if (size_of::<u32>() > 4) && (bytes.len() >= 4) {
hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()));
bytes = &bytes[4..];
}
if (size_of::<u32>() > 2) && bytes.len() >= 2 {
hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as u32);
bytes = &bytes[2..];
}
if (size_of::<u32>() > 1) && !bytes.is_empty() {
hash.add_to_hash(bytes[0] as u32);
}
self.hash = hash.hash;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.add_to_hash(i);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.add_to_hash(i as u32);
self.add_to_hash((i >> 32) as u32);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.add_to_hash(i as u32);
}

#[inline]
fn finish(&self) -> u64 {
self.hash as u64
}
}
84 changes: 3 additions & 81 deletions src/utils/hashmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use indexmap::IndexMap as StdHashMap;
#[cfg(all(not(feature = "enhanced-determinism"), feature = "serde-serialize"))]
use std::collections::HashMap as StdHashMap;
use std::mem::size_of;

/// Serializes only the capacity of a hash-map instead of its actual content.
#[cfg(feature = "serde-serialize")]
Expand Down Expand Up @@ -50,88 +49,11 @@ pub fn deserialize_hashmap_capacity<
/*
* FxHasher taken from rustc_hash, except that it does not depend on the pointer size.
*/
/// Deterministic hashmap using [`indexmap::IndexMap`]
#[cfg(feature = "enhanced-determinism")]
pub type FxHashMap32<K, V> = indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<FxHasher32>>;
pub type FxHashMap32<K, V> =
indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<super::fx_hasher::FxHasher32>>;
#[cfg(feature = "enhanced-determinism")]
pub use {self::FxHashMap32 as HashMap, indexmap::map::Entry};
#[cfg(not(feature = "enhanced-determinism"))]
pub use {rustc_hash::FxHashMap as HashMap, std::collections::hash_map::Entry};

const K: u32 = 0x9e3779b9;

/// This is the same as FxHasher, but with the guarantee that the internal hash is
/// an u32 instead of something that depends on the platform.
pub struct FxHasher32 {
hash: u32,
}

impl Default for FxHasher32 {
#[inline]
fn default() -> FxHasher32 {
FxHasher32 { hash: 0 }
}
}

impl FxHasher32 {
#[inline]
fn add_to_hash(&mut self, i: u32) {
use std::ops::BitXor;
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
}
}

impl std::hash::Hasher for FxHasher32 {
#[inline]
fn write(&mut self, mut bytes: &[u8]) {
let read_u32 = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap());
let mut hash = FxHasher32 { hash: self.hash };
assert!(size_of::<u32>() <= 8);
while bytes.len() >= size_of::<u32>() {
hash.add_to_hash(read_u32(bytes));
bytes = &bytes[size_of::<u32>()..];
}
if (size_of::<u32>() > 4) && (bytes.len() >= 4) {
hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()));
bytes = &bytes[4..];
}
if (size_of::<u32>() > 2) && bytes.len() >= 2 {
hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as u32);
bytes = &bytes[2..];
}
if (size_of::<u32>() > 1) && !bytes.is_empty() {
hash.add_to_hash(bytes[0] as u32);
}
self.hash = hash.hash;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.add_to_hash(i);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.add_to_hash(i as u32);
self.add_to_hash((i >> 32) as u32);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.add_to_hash(i as u32);
}

#[inline]
fn finish(&self) -> u64 {
self.hash as u64
}
}
56 changes: 56 additions & 0 deletions src/utils/hashset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//! A hash-map that behaves deterministically when the
//! `enhanced-determinism` feature is enabled.
#[cfg(all(feature = "enhanced-determinism", feature = "serde-serialize"))]
use indexmap::IndexSet as StdHashSet;
#[cfg(all(not(feature = "enhanced-determinism"), feature = "serde-serialize"))]
use std::collections::HashSet as StdHashSet;

/// Serializes only the capacity of a hash-set instead of its actual content.
#[cfg(feature = "serde-serialize")]
pub fn serialize_hashset_capacity<S: serde::Serializer, K, H: std::hash::BuildHasher>(
set: &StdHashSet<K, H>,
s: S,
) -> Result<S::Ok, S::Error> {
s.serialize_u64(set.capacity() as u64)
}

/// Creates a new hash-set with its capacity deserialized from `d`.
#[cfg(feature = "serde-serialize")]
pub fn deserialize_hashset_capacity<
'de,
D: serde::Deserializer<'de>,
K,
V,
H: std::hash::BuildHasher + Default,
>(
d: D,
) -> Result<StdHashSet<K, H>, D::Error> {
struct CapacityVisitor;
impl<'de> serde::de::Visitor<'de> for CapacityVisitor {
type Value = u64;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "an integer between 0 and 2^64")
}

fn visit_u64<E: serde::de::Error>(self, val: u64) -> Result<Self::Value, E> {
Ok(val)
}
}

let capacity = d.deserialize_u64(CapacityVisitor)? as usize;
Ok(StdHashSet::with_capacity_and_hasher(
capacity,
Default::default(),
))
}

/// Deterministic hashset using [`indexmap::IndexSet`]
#[cfg(feature = "enhanced-determinism")]
pub type FxHashSet32<K> =
indexmap::IndexSet<K, std::hash::BuildHasherDefault<super::fx_hasher::FxHasher32>>;
#[cfg(feature = "enhanced-determinism")]
pub use self::FxHashSet32 as HashSet;
#[cfg(not(feature = "enhanced-determinism"))]
pub use rustc_hash::FxHashSet as HashSet;
4 changes: 4 additions & 0 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,14 @@ mod consts;
mod cov;
#[cfg(feature = "std")]
mod deterministic_state;
#[cfg(feature = "enhanced-determinism")]
mod fx_hasher;
mod hashable_partial_eq;
#[cfg(feature = "std")]
pub mod hashmap;
#[cfg(feature = "std")]
pub mod hashset;
#[cfg(feature = "std")]
mod interval;
mod inv;
mod isometry_ops;
Expand Down

0 comments on commit 2314d28

Please sign in to comment.