From bceae1e83dcf7e3fde298343085896f7e7c840cf Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 31 May 2023 22:47:15 +0100 Subject: [PATCH 1/4] Remove redundant `make_insert_hash` internal function --- src/map.rs | 37 +++++++------------------------------ src/rustc_entry.rs | 4 ++-- 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/src/map.rs b/src/map.rs index 9d374d060..27caef811 100644 --- a/src/map.rs +++ b/src/map.rs @@ -260,29 +260,6 @@ where hash_builder.hash_one(val) } -#[cfg(not(feature = "nightly"))] -#[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 -where - K: Hash, - S: BuildHasher, -{ - use core::hash::Hasher; - let mut state = hash_builder.build_hasher(); - val.hash(&mut state); - state.finish() -} - -#[cfg(feature = "nightly")] -#[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 -where - K: Hash, - S: BuildHasher, -{ - hash_builder.hash_one(val) -} - #[cfg(feature = "ahash")] impl HashMap { /// Creates an empty `HashMap`. @@ -1262,7 +1239,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S, A> { - let hash = make_insert_hash::(&self.hash_builder, &key); + let hash = make_hash::(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, equivalent_key(&key)) { Entry::Occupied(OccupiedEntry { hash, @@ -1782,7 +1759,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = make_insert_hash::(&self.hash_builder, &k); + let hash = make_hash::(&self.hash_builder, &k); let hasher = make_hasher::<_, V, S>(&self.hash_builder); match self .table @@ -1849,7 +1826,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert_unique_unchecked(&mut self, k: K, v: V) -> (&K, &mut V) { - let hash = make_insert_hash::(&self.hash_builder, &k); + let hash = make_hash::(&self.hash_builder, &k); let bucket = self .table .insert(hash, (k, v), make_hasher::<_, V, S>(&self.hash_builder)); @@ -4003,7 +3980,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let hash = make_insert_hash::(self.hash_builder, &key); + let hash = make_hash::(self.hash_builder, &key); self.insert_hashed_nocheck(hash, key, value) } @@ -4111,7 +4088,7 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let hash = make_insert_hash::(self.hash_builder, &key); + let hash = make_hash::(self.hash_builder, &key); let elem = self.table.insert( hash, (key, value), @@ -8194,7 +8171,7 @@ mod test_map { let mut map: HashMap<_, _> = xs.iter().copied().collect(); let compute_hash = |map: &HashMap, k: i32| -> u64 { - super::make_insert_hash::(map.hasher(), &k) + super::make_hash::(map.hasher(), &k) }; // Existing key (insert) @@ -8356,7 +8333,7 @@ mod test_map { loop { // occasionally remove some elements if i < n && rng.gen_bool(0.1) { - let hash_value = super::make_insert_hash(&hash_builder, &i); + let hash_value = super::make_hash(&hash_builder, &i); unsafe { let e = map.table.find(hash_value, |q| q.0.eq(&i)); diff --git a/src/rustc_entry.rs b/src/rustc_entry.rs index ebb481e27..89447d27d 100644 --- a/src/rustc_entry.rs +++ b/src/rustc_entry.rs @@ -1,5 +1,5 @@ use self::RustcEntry::*; -use crate::map::{make_insert_hash, Drain, HashMap, IntoIter, Iter, IterMut}; +use crate::map::{make_hash, Drain, HashMap, IntoIter, Iter, IterMut}; use crate::raw::{Allocator, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; @@ -32,7 +32,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn rustc_entry(&mut self, key: K) -> RustcEntry<'_, K, V, A> { - let hash = make_insert_hash(&self.hash_builder, &key); + let hash = make_hash(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { RustcEntry::Occupied(RustcOccupiedEntry { key: Some(key), From 4d8c0594550459152100728fba6a7cfc4d0118ad Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 31 May 2023 22:53:04 +0100 Subject: [PATCH 2/4] Enable `bumpalo/allocator-api2` in dev-dependencies for doc-tests --- Cargo.toml | 2 +- src/map.rs | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 768870529..8c29cfc5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ rayon = "1.0" fnv = "1.0.7" serde_test = "1.0" doc-comment = "0.3.1" -bumpalo = "3.6.0" +bumpalo = { version = "3.13.0", features = ["allocator-api2"] } rkyv = { version = "0.7.42", features = ["validation"] } [features] diff --git a/src/map.rs b/src/map.rs index 27caef811..548ca0f9e 100644 --- a/src/map.rs +++ b/src/map.rs @@ -345,8 +345,6 @@ impl HashMap { /// # Examples /// /// ``` - /// # #[cfg(feature = "nightly")] - /// # fn test() { /// use hashbrown::HashMap; /// use bumpalo::Bump; /// @@ -365,11 +363,6 @@ impl HashMap { /// assert_eq!(map.len(), 1); /// // And it also allocates some capacity /// assert!(map.capacity() > 1); - /// # } - /// # fn main() { - /// # #[cfg(feature = "nightly")] - /// # test() - /// # } /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn new_in(alloc: A) -> Self { @@ -396,8 +389,6 @@ impl HashMap { /// # Examples /// /// ``` - /// # #[cfg(feature = "nightly")] - /// # fn test() { /// use hashbrown::HashMap; /// use bumpalo::Bump; /// @@ -421,11 +412,6 @@ impl HashMap { /// assert_eq!(map.len(), 5); /// // But its capacity isn't changed /// assert_eq!(map.capacity(), empty_map_capacity) - /// # } - /// # fn main() { - /// # #[cfg(feature = "nightly")] - /// # test() - /// # } /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { From d677fd42df6978522045a9561242588ef970ed0c Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 31 May 2023 23:10:59 +0100 Subject: [PATCH 3/4] Remove backup implementation of likely/unlikely that didn't work --- src/raw/mod.rs | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index eaebaf719..770823761 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -44,32 +44,12 @@ use self::imp::Group; // Branch prediction hint. This is currently only available on nightly but it // consistently improves performance by 10-15%. -#[cfg(feature = "nightly")] -use core::intrinsics::{likely, unlikely}; - -// On stable we can use #[cold] to get a equivalent effect: this attributes -// suggests that the function is unlikely to be called #[cfg(not(feature = "nightly"))] -#[inline] -#[cold] -fn cold() {} - -#[cfg(not(feature = "nightly"))] -#[inline] -fn likely(b: bool) -> bool { - if !b { - cold(); - } - b -} +use core::convert::identity as likely; #[cfg(not(feature = "nightly"))] -#[inline] -fn unlikely(b: bool) -> bool { - if b { - cold(); - } - b -} +use core::convert::identity as unlikely; +#[cfg(feature = "nightly")] +use core::intrinsics::{likely, unlikely}; // Use strict provenance functions if available. #[cfg(feature = "nightly")] From 9f20bd03377ed725ff125db1bcbabf8c11cd81c7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 31 May 2023 23:08:49 +0100 Subject: [PATCH 4/4] Replace `intrinsics::cttz_nonzero` with `NonZero::trailing_zeros` --- src/raw/bitmask.rs | 37 ++++++++++++++++++------------------- src/raw/generic.rs | 30 ++++++++++++++++-------------- src/raw/mod.rs | 8 ++++---- src/raw/neon.rs | 2 ++ src/raw/sse2.rs | 2 ++ 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/src/raw/bitmask.rs b/src/raw/bitmask.rs index 16af3d6d2..6576b3c5c 100644 --- a/src/raw/bitmask.rs +++ b/src/raw/bitmask.rs @@ -1,6 +1,6 @@ -use super::imp::{BitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE}; -#[cfg(feature = "nightly")] -use core::intrinsics; +use super::imp::{ + BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE, +}; /// A bit mask which contains the result of a `Match` operation on a `Group` and /// allows iterating through them. @@ -47,26 +47,13 @@ impl BitMask { /// Returns the first set bit in the `BitMask`, if there is one. #[inline] pub(crate) fn lowest_set_bit(self) -> Option { - if self.0 == 0 { - None + if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) { + Some(Self::nonzero_trailing_zeros(nonzero)) } else { - Some(unsafe { self.lowest_set_bit_nonzero() }) + None } } - /// Returns the first set bit in the `BitMask`, if there is one. The - /// bitmask must not be empty. - #[inline] - #[cfg(feature = "nightly")] - pub(crate) unsafe fn lowest_set_bit_nonzero(self) -> usize { - intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE - } - #[inline] - #[cfg(not(feature = "nightly"))] - pub(crate) unsafe fn lowest_set_bit_nonzero(self) -> usize { - self.trailing_zeros() - } - /// Returns the number of trailing zeroes in the `BitMask`. #[inline] pub(crate) fn trailing_zeros(self) -> usize { @@ -82,6 +69,18 @@ impl BitMask { } } + /// Same as above but takes a `NonZeroBitMaskWord`. + #[inline] + fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize { + if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + // SAFETY: A byte-swapped non-zero value is still non-zero. + let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) }; + swapped.leading_zeros() as usize / BITMASK_STRIDE + } else { + nonzero.trailing_zeros() as usize / BITMASK_STRIDE + } + } + /// Returns the number of leading zeroes in the `BitMask`. #[inline] pub(crate) fn leading_zeros(self) -> usize { diff --git a/src/raw/generic.rs b/src/raw/generic.rs index 97c6923cd..c668b0642 100644 --- a/src/raw/generic.rs +++ b/src/raw/generic.rs @@ -5,22 +5,24 @@ use core::{mem, ptr}; // Use the native word size as the group size. Using a 64-bit group size on // a 32-bit architecture will just end up being more expensive because // shifts and multiplies will need to be emulated. -#[cfg(any( - target_pointer_width = "64", - target_arch = "aarch64", - target_arch = "x86_64", - target_arch = "wasm32", -))] -type GroupWord = u64; -#[cfg(all( - any(target_pointer_width = "32", target_pointer_width = "16"), - not(target_arch = "aarch64"), - not(target_arch = "x86_64"), - not(target_arch = "wasm32"), -))] -type GroupWord = u32; + +cfg_if! { + if #[cfg(any( + target_pointer_width = "64", + target_arch = "aarch64", + target_arch = "x86_64", + target_arch = "wasm32", + ))] { + type GroupWord = u64; + type NonZeroGroupWord = core::num::NonZeroU64; + } else { + type GroupWord = u32; + type NonZeroGroupWord = core::num::NonZeroU32; + } +} pub(crate) type BitMaskWord = GroupWord; +pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord; pub(crate) const BITMASK_STRIDE: usize = 8; // We only care about the highest bit of each byte for the mask. #[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)] diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 770823761..1a6dced4b 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1648,14 +1648,14 @@ impl RawTableInner { // we will never end up in the given branch, since // `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` cannot // return a full bucket index. For tables smaller than the group width, calling the - // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also + // `unwrap_unchecked` function is also // safe, as the trailing control bytes outside the range of the table are filled // with EMPTY bytes, so this second scan either finds an empty slot (due to the - // load factor) or hits the trailing control bytes (containing EMPTY). See - // `intrinsics::cttz_nonzero` for more information. + // load factor) or hits the trailing control bytes (containing EMPTY). index = Group::load_aligned(self.ctrl(0)) .match_empty_or_deleted() - .lowest_set_bit_nonzero(); + .lowest_set_bit() + .unwrap_unchecked(); } InsertSlot { index } } diff --git a/src/raw/neon.rs b/src/raw/neon.rs index 732052ebb..44e82d57d 100644 --- a/src/raw/neon.rs +++ b/src/raw/neon.rs @@ -2,8 +2,10 @@ use super::bitmask::BitMask; use super::EMPTY; use core::arch::aarch64 as neon; use core::mem; +use core::num::NonZeroU64; pub(crate) type BitMaskWord = u64; +pub(crate) type NonZeroBitMaskWord = NonZeroU64; pub(crate) const BITMASK_STRIDE: usize = 8; pub(crate) const BITMASK_MASK: BitMaskWord = !0; pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080; diff --git a/src/raw/sse2.rs b/src/raw/sse2.rs index 436d15c35..956ba5d26 100644 --- a/src/raw/sse2.rs +++ b/src/raw/sse2.rs @@ -1,6 +1,7 @@ use super::bitmask::BitMask; use super::EMPTY; use core::mem; +use core::num::NonZeroU16; #[cfg(target_arch = "x86")] use core::arch::x86; @@ -8,6 +9,7 @@ use core::arch::x86; use core::arch::x86_64 as x86; pub(crate) type BitMaskWord = u16; +pub(crate) type NonZeroBitMaskWord = NonZeroU16; pub(crate) const BITMASK_STRIDE: usize = 1; pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff; pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;