Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deterministic HashSet #276

Merged
merged 3 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/parry-ci-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ jobs:
run: cd crates/parry3d; cargo build --verbose --features simd-stable;
- name: Check serialization
run: cargo check --features bytemuck-serialize,serde-serialize,rkyv-serialize;
- name: Check enhanced-determinism
run: cargo check --features enhanced-determinism
- name: Run tests
run: cargo test --features wavefront
build-wasm:
Expand Down
80 changes: 80 additions & 0 deletions src/utils/fx_hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//! FxHasher taken from rustc_hash, except that it does not depend on the pointer size.

const K: u32 = 0x9e3779b9;

/// This is the same as FxHasher, but with the guarantee that the internal hash is
/// an u32 instead of something that depends on the platform.
pub struct FxHasher32 {
hash: u32,
}

impl Default for FxHasher32 {
#[inline]
fn default() -> FxHasher32 {
FxHasher32 { hash: 0 }
}
}

impl FxHasher32 {
#[inline]
fn add_to_hash(&mut self, i: u32) {
use std::ops::BitXor;
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
}
}

impl std::hash::Hasher for FxHasher32 {
#[inline]
fn write(&mut self, mut bytes: &[u8]) {
let read_u32 = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap());
let mut hash = FxHasher32 { hash: self.hash };
assert!(size_of::<u32>() <= 8);
while bytes.len() >= size_of::<u32>() {
hash.add_to_hash(read_u32(bytes));
bytes = &bytes[size_of::<u32>()..];
}
if (size_of::<u32>() > 4) && (bytes.len() >= 4) {
hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()));
bytes = &bytes[4..];
}
if (size_of::<u32>() > 2) && bytes.len() >= 2 {
hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as u32);
bytes = &bytes[2..];
}
if (size_of::<u32>() > 1) && !bytes.is_empty() {
hash.add_to_hash(bytes[0] as u32);
}
self.hash = hash.hash;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.add_to_hash(i);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.add_to_hash(i as u32);
self.add_to_hash((i >> 32) as u32);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.add_to_hash(i as u32);
}

#[inline]
fn finish(&self) -> u64 {
self.hash as u64
}
}
84 changes: 3 additions & 81 deletions src/utils/hashmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use indexmap::IndexMap as StdHashMap;
#[cfg(all(not(feature = "enhanced-determinism"), feature = "serde-serialize"))]
use std::collections::HashMap as StdHashMap;
use std::mem::size_of;

/// Serializes only the capacity of a hash-map instead of its actual content.
#[cfg(feature = "serde-serialize")]
Expand Down Expand Up @@ -50,88 +49,11 @@ pub fn deserialize_hashmap_capacity<
/*
* FxHasher taken from rustc_hash, except that it does not depend on the pointer size.
*/
/// Deterministic hashmap using [`indexmap::IndexMap`]
#[cfg(feature = "enhanced-determinism")]
pub type FxHashMap32<K, V> = indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<FxHasher32>>;
pub type FxHashMap32<K, V> =
indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<super::fx_hasher::FxHasher32>>;
#[cfg(feature = "enhanced-determinism")]
pub use {self::FxHashMap32 as HashMap, indexmap::map::Entry};
#[cfg(not(feature = "enhanced-determinism"))]
pub use {rustc_hash::FxHashMap as HashMap, std::collections::hash_map::Entry};

const K: u32 = 0x9e3779b9;

/// This is the same as FxHasher, but with the guarantee that the internal hash is
/// an u32 instead of something that depends on the platform.
pub struct FxHasher32 {
hash: u32,
}

impl Default for FxHasher32 {
#[inline]
fn default() -> FxHasher32 {
FxHasher32 { hash: 0 }
}
}

impl FxHasher32 {
#[inline]
fn add_to_hash(&mut self, i: u32) {
use std::ops::BitXor;
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
}
}

impl std::hash::Hasher for FxHasher32 {
#[inline]
fn write(&mut self, mut bytes: &[u8]) {
let read_u32 = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap());
let mut hash = FxHasher32 { hash: self.hash };
assert!(size_of::<u32>() <= 8);
while bytes.len() >= size_of::<u32>() {
hash.add_to_hash(read_u32(bytes));
bytes = &bytes[size_of::<u32>()..];
}
if (size_of::<u32>() > 4) && (bytes.len() >= 4) {
hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()));
bytes = &bytes[4..];
}
if (size_of::<u32>() > 2) && bytes.len() >= 2 {
hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as u32);
bytes = &bytes[2..];
}
if (size_of::<u32>() > 1) && !bytes.is_empty() {
hash.add_to_hash(bytes[0] as u32);
}
self.hash = hash.hash;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.add_to_hash(i as u32);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.add_to_hash(i);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.add_to_hash(i as u32);
self.add_to_hash((i >> 32) as u32);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.add_to_hash(i as u32);
}

#[inline]
fn finish(&self) -> u64 {
self.hash as u64
}
}
56 changes: 56 additions & 0 deletions src/utils/hashset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//! A hash-map that behaves deterministically when the
//! `enhanced-determinism` feature is enabled.

#[cfg(all(feature = "enhanced-determinism", feature = "serde-serialize"))]
use indexmap::IndexSet as StdHashSet;
#[cfg(all(not(feature = "enhanced-determinism"), feature = "serde-serialize"))]
use std::collections::HashSet as StdHashSet;

/// Serializes only the capacity of a hash-set instead of its actual content.
#[cfg(feature = "serde-serialize")]
pub fn serialize_hashset_capacity<S: serde::Serializer, K, H: std::hash::BuildHasher>(
set: &StdHashSet<K, H>,
s: S,
) -> Result<S::Ok, S::Error> {
s.serialize_u64(set.capacity() as u64)
}

/// Creates a new hash-set with its capacity deserialized from `d`.
#[cfg(feature = "serde-serialize")]
pub fn deserialize_hashset_capacity<
'de,
D: serde::Deserializer<'de>,
K,
V,
H: std::hash::BuildHasher + Default,
>(
d: D,
) -> Result<StdHashSet<K, H>, D::Error> {
struct CapacityVisitor;
impl<'de> serde::de::Visitor<'de> for CapacityVisitor {
type Value = u64;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "an integer between 0 and 2^64")
}

fn visit_u64<E: serde::de::Error>(self, val: u64) -> Result<Self::Value, E> {
Ok(val)
}
}

let capacity = d.deserialize_u64(CapacityVisitor)? as usize;
Ok(StdHashSet::with_capacity_and_hasher(
capacity,
Default::default(),
))
}

/// Deterministic hashset using [`indexmap::IndexSet`]
#[cfg(feature = "enhanced-determinism")]
pub type FxHashSet32<K> =
indexmap::IndexSet<K, std::hash::BuildHasherDefault<super::fx_hasher::FxHasher32>>;
#[cfg(feature = "enhanced-determinism")]
pub use self::FxHashSet32 as HashSet;
#[cfg(not(feature = "enhanced-determinism"))]
pub use rustc_hash::FxHashSet as HashSet;
4 changes: 4 additions & 0 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,14 @@ mod consts;
mod cov;
#[cfg(feature = "std")]
mod deterministic_state;
#[cfg(feature = "enhanced-determinism")]
mod fx_hasher;
mod hashable_partial_eq;
#[cfg(feature = "std")]
pub mod hashmap;
#[cfg(feature = "std")]
pub mod hashset;
#[cfg(feature = "std")]
mod interval;
mod inv;
mod isometry_ops;
Expand Down