Skip to content

Commit

Permalink
Replace intrinsics::cttz_nonzero with NonZero::trailing_zeros
Browse files Browse the repository at this point in the history
  • Loading branch information
Amanieu committed May 31, 2023
1 parent d677fd4 commit 9f20bd0
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 37 deletions.
37 changes: 18 additions & 19 deletions src/raw/bitmask.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::imp::{BitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE};
#[cfg(feature = "nightly")]
use core::intrinsics;
use super::imp::{
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
};

/// A bit mask which contains the result of a `Match` operation on a `Group` and
/// allows iterating through them.
Expand Down Expand Up @@ -47,26 +47,13 @@ impl BitMask {
/// Returns the first set bit in the `BitMask`, if there is one.
#[inline]
pub(crate) fn lowest_set_bit(self) -> Option<usize> {
if self.0 == 0 {
None
if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) {
Some(Self::nonzero_trailing_zeros(nonzero))
} else {
Some(unsafe { self.lowest_set_bit_nonzero() })
None
}
}

/// Returns the first set bit in the `BitMask`, if there is one. The
/// bitmask must not be empty.
#[inline]
#[cfg(feature = "nightly")]
pub(crate) unsafe fn lowest_set_bit_nonzero(self) -> usize {
intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE
}
#[inline]
#[cfg(not(feature = "nightly"))]
pub(crate) unsafe fn lowest_set_bit_nonzero(self) -> usize {
self.trailing_zeros()
}

/// Returns the number of trailing zeroes in the `BitMask`.
#[inline]
pub(crate) fn trailing_zeros(self) -> usize {
Expand All @@ -82,6 +69,18 @@ impl BitMask {
}
}

/// Same as above but takes a `NonZeroBitMaskWord`.
#[inline]
fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize {
if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 {
// SAFETY: A byte-swapped non-zero value is still non-zero.
let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) };
swapped.leading_zeros() as usize / BITMASK_STRIDE
} else {
nonzero.trailing_zeros() as usize / BITMASK_STRIDE
}
}

/// Returns the number of leading zeroes in the `BitMask`.
#[inline]
pub(crate) fn leading_zeros(self) -> usize {
Expand Down
30 changes: 16 additions & 14 deletions src/raw/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,24 @@ use core::{mem, ptr};
// Use the native word size as the group size. Using a 64-bit group size on
// a 32-bit architecture will just end up being more expensive because
// shifts and multiplies will need to be emulated.
#[cfg(any(
target_pointer_width = "64",
target_arch = "aarch64",
target_arch = "x86_64",
target_arch = "wasm32",
))]
type GroupWord = u64;
#[cfg(all(
any(target_pointer_width = "32", target_pointer_width = "16"),
not(target_arch = "aarch64"),
not(target_arch = "x86_64"),
not(target_arch = "wasm32"),
))]
type GroupWord = u32;

cfg_if! {
if #[cfg(any(
target_pointer_width = "64",
target_arch = "aarch64",
target_arch = "x86_64",
target_arch = "wasm32",
))] {
type GroupWord = u64;
type NonZeroGroupWord = core::num::NonZeroU64;
} else {
type GroupWord = u32;
type NonZeroGroupWord = core::num::NonZeroU32;
}
}

pub(crate) type BitMaskWord = GroupWord;
pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord;
pub(crate) const BITMASK_STRIDE: usize = 8;
// We only care about the highest bit of each byte for the mask.
#[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)]
Expand Down
8 changes: 4 additions & 4 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1648,14 +1648,14 @@ impl<A: Allocator + Clone> RawTableInner<A> {
// we will never end up in the given branch, since
// `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` cannot
// return a full bucket index. For tables smaller than the group width, calling the
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
// `unwrap_unchecked` function is also
// safe, as the trailing control bytes outside the range of the table are filled
// with EMPTY bytes, so this second scan either finds an empty slot (due to the
// load factor) or hits the trailing control bytes (containing EMPTY). See
// `intrinsics::cttz_nonzero` for more information.
// load factor) or hits the trailing control bytes (containing EMPTY).
index = Group::load_aligned(self.ctrl(0))
.match_empty_or_deleted()
.lowest_set_bit_nonzero();
.lowest_set_bit()
.unwrap_unchecked();
}
InsertSlot { index }
}
Expand Down
2 changes: 2 additions & 0 deletions src/raw/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ use super::bitmask::BitMask;
use super::EMPTY;
use core::arch::aarch64 as neon;
use core::mem;
use core::num::NonZeroU64;

pub(crate) type BitMaskWord = u64;
pub(crate) type NonZeroBitMaskWord = NonZeroU64;
pub(crate) const BITMASK_STRIDE: usize = 8;
pub(crate) const BITMASK_MASK: BitMaskWord = !0;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080;
Expand Down
2 changes: 2 additions & 0 deletions src/raw/sse2.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use super::bitmask::BitMask;
use super::EMPTY;
use core::mem;
use core::num::NonZeroU16;

#[cfg(target_arch = "x86")]
use core::arch::x86;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as x86;

pub(crate) type BitMaskWord = u16;
pub(crate) type NonZeroBitMaskWord = NonZeroU16;
pub(crate) const BITMASK_STRIDE: usize = 1;
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
Expand Down

0 comments on commit 9f20bd0

Please sign in to comment.