-
Notifications
You must be signed in to change notification settings - Fork 356
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #3674 - TDecking:bmi, r=RalfJung
Implement LLVM x86 bmi intrinsics This implements the intrinsics for both the bmi1 and bmi2 ISA extensions. All of these intrinsics live inside the same namespace as far as LLVM is concerned, which is why it is arguably better to bundle the implementations of these two extensions.
- Loading branch information
Showing
3 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
use rustc_span::Symbol; | ||
use rustc_target::spec::abi::Abi; | ||
|
||
use crate::*; | ||
|
||
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} | ||
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { | ||
fn emulate_x86_bmi_intrinsic( | ||
&mut self, | ||
link_name: Symbol, | ||
abi: Abi, | ||
args: &[OpTy<'tcx>], | ||
dest: &MPlaceTy<'tcx>, | ||
) -> InterpResult<'tcx, EmulateItemResult> { | ||
let this = self.eval_context_mut(); | ||
|
||
// Prefix should have already been checked. | ||
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap(); | ||
|
||
// The intrinsics are suffixed with the bit size of their operands. | ||
let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") { | ||
(true, unprefixed_name.strip_suffix(".64").unwrap_or("")) | ||
} else { | ||
(false, unprefixed_name.strip_suffix(".32").unwrap_or("")) | ||
}; | ||
|
||
// All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension. | ||
// The exception is "bextr", which belongs to "bmi1". | ||
let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" }; | ||
this.expect_target_feature_for_intrinsic(link_name, target_feature)?; | ||
|
||
if is_64_bit && this.tcx.sess.target.arch != "x86_64" { | ||
return Ok(EmulateItemResult::NotSupported); | ||
} | ||
|
||
let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; | ||
let left = this.read_scalar(left)?; | ||
let right = this.read_scalar(right)?; | ||
|
||
let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) }; | ||
let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) }; | ||
|
||
let result = match unprefixed_name { | ||
// Extract a contigous range of bits from an unsigned integer. | ||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32 | ||
"bextr" => { | ||
let start = u32::try_from(right & 0xff).unwrap(); | ||
let len = u32::try_from((right >> 8) & 0xff).unwrap(); | ||
let shifted = left.checked_shr(start).unwrap_or(0); | ||
// Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big. | ||
if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) } | ||
} | ||
// Create a copy of an unsigned integer with bits above a certain index cleared. | ||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32 | ||
"bzhi" => { | ||
let index = u32::try_from(right & 0xff).unwrap(); | ||
// Keep the `index` lowest bits of `left`, or all bits if `index` is too big. | ||
if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) } | ||
} | ||
// Extract bit values of an unsigned integer at positions marked by a mask. | ||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32 | ||
"pext" => { | ||
let mut mask = right; | ||
let mut i = 0u32; | ||
let mut result = 0; | ||
// Iterate over the mask one 1-bit at a time, from | ||
// the least significant bit to the most significant bit. | ||
while mask != 0 { | ||
// Extract the bit marked by the mask's least significant set bit | ||
// and put it at position `i` of the result. | ||
result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i; | ||
i = i.wrapping_add(1); | ||
// Clear the least significant set bit. | ||
mask &= mask.wrapping_sub(1); | ||
} | ||
result | ||
} | ||
// Deposit bit values of an unsigned integer to positions marked by a mask. | ||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32 | ||
"pdep" => { | ||
let mut mask = right; | ||
let mut set = left; | ||
let mut result = 0; | ||
// Iterate over the mask one 1-bit at a time, from | ||
// the least significant bit to the most significant bit. | ||
while mask != 0 { | ||
// Put rightmost bit of `set` at the position of the current `mask` bit. | ||
result |= (set & 1) << mask.trailing_zeros(); | ||
// Go to next bit of `set`. | ||
set >>= 1; | ||
// Clear the least significant set bit. | ||
mask &= mask.wrapping_sub(1); | ||
} | ||
result | ||
} | ||
_ => return Ok(EmulateItemResult::NotSupported), | ||
}; | ||
|
||
let result = if is_64_bit { | ||
Scalar::from_u64(result) | ||
} else { | ||
Scalar::from_u32(u32::try_from(result).unwrap()) | ||
}; | ||
this.write_scalar(result, dest)?; | ||
|
||
Ok(EmulateItemResult::NeedsReturn) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
// Ignore everything except x86 and x86_64 | ||
// Any new targets that are added to CI should be ignored here. | ||
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) | ||
//@ignore-target-aarch64 | ||
//@ignore-target-arm | ||
//@ignore-target-avr | ||
//@ignore-target-s390x | ||
//@ignore-target-thumbv7em | ||
//@ignore-target-wasm32 | ||
//@compile-flags: -C target-feature=+bmi1,+bmi2 | ||
|
||
#[cfg(target_arch = "x86")] | ||
use std::arch::x86::*; | ||
#[cfg(target_arch = "x86_64")] | ||
use std::arch::x86_64::*; | ||
|
||
fn main() { | ||
// BMI1 and BMI2 are independent from each other, so both must be checked. | ||
assert!(is_x86_feature_detected!("bmi1")); | ||
assert!(is_x86_feature_detected!("bmi2")); | ||
|
||
unsafe { | ||
test_bmi_32(); | ||
test_bmi_64(); | ||
} | ||
} | ||
|
||
/// Test the 32-bit variants of the intrinsics. | ||
unsafe fn test_bmi_32() { | ||
unsafe fn test_bextr_u32() { | ||
let r = _bextr_u32(0b0101_0000u32, 4, 4); | ||
assert_eq!(r, 0b0000_0101u32); | ||
|
||
for i in 0..16 { | ||
assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111); | ||
assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1); | ||
} | ||
|
||
// Ensure that indices larger than the bit count are covered. | ||
// It is important to go above 32 in order to verify the bit selection | ||
// of the instruction. | ||
|
||
for i in 0..256 { | ||
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`. | ||
assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32)); | ||
} | ||
|
||
for i in 0..256 { | ||
assert_eq!(_bextr_u32(u32::MAX, i, 0), 0); | ||
} | ||
|
||
// Test cases with completly random values. These cases also test | ||
// that the function works even if upper bits of the control value are set. | ||
assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c); | ||
assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692); | ||
assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646); | ||
} | ||
test_bextr_u32(); | ||
|
||
unsafe fn test_pext_u32() { | ||
let n = 0b1011_1110_1001_0011u32; | ||
|
||
let m0 = 0b0110_0011_1000_0101u32; | ||
let s0 = 0b0000_0000_0011_0101u32; | ||
|
||
let m1 = 0b1110_1011_1110_1111u32; | ||
let s1 = 0b0001_0111_0100_0011u32; | ||
|
||
// Testing of random values. | ||
assert_eq!(_pext_u32(n, m0), s0); | ||
assert_eq!(_pext_u32(n, m1), s1); | ||
assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567); | ||
|
||
// Testing of various identities. | ||
assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX); | ||
assert_eq!(_pext_u32(u32::MAX, 0), 0); | ||
assert_eq!(_pext_u32(0, u32::MAX), 0); | ||
} | ||
test_pext_u32(); | ||
|
||
unsafe fn test_pdep_u32() { | ||
let n = 0b1011_1110_1001_0011u32; | ||
|
||
let m0 = 0b0110_0011_1000_0101u32; | ||
let s0 = 0b0000_0010_0000_0101u32; | ||
|
||
let m1 = 0b1110_1011_1110_1111u32; | ||
let s1 = 0b1110_1001_0010_0011u32; | ||
|
||
// Testing of random values. | ||
assert_eq!(_pdep_u32(n, m0), s0); | ||
assert_eq!(_pdep_u32(n, m1), s1); | ||
assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670); | ||
|
||
// Testing of various identities. | ||
assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX); | ||
assert_eq!(_pdep_u32(0, u32::MAX), 0); | ||
assert_eq!(_pdep_u32(u32::MAX, 0), 0); | ||
} | ||
test_pdep_u32(); | ||
|
||
unsafe fn test_bzhi_u32() { | ||
let n = 0b1111_0010u32; | ||
let s = 0b0001_0010u32; | ||
assert_eq!(_bzhi_u32(n, 5), s); | ||
|
||
// Ensure that indices larger than the bit count are covered. | ||
// It is important to go above 32 in order to verify the bit selection | ||
// of the instruction. | ||
for i in 0..=512 { | ||
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`. | ||
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`. | ||
let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1); | ||
let actual = _bzhi_u32(u32::MAX, i); | ||
assert_eq!(expected, actual); | ||
} | ||
} | ||
test_bzhi_u32(); | ||
} | ||
|
||
#[cfg(not(target_arch = "x86_64"))] | ||
unsafe fn test_bmi_64() {} | ||
|
||
/// Test the 64-bit variants of the intrinsics. | ||
#[cfg(target_arch = "x86_64")] | ||
unsafe fn test_bmi_64() { | ||
unsafe fn test_bextr_u64() { | ||
let r = _bextr_u64(0b0101_0000u64, 4, 4); | ||
assert_eq!(r, 0b0000_0101u64); | ||
|
||
for i in 0..16 { | ||
assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111); | ||
assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1); | ||
} | ||
|
||
// Ensure that indices larger than the bit count are covered. | ||
// It is important to go above 64 in order to verify the bit selection | ||
// of the instruction. | ||
|
||
for i in 0..256 { | ||
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`. | ||
assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64)); | ||
} | ||
|
||
for i in 0..256 { | ||
assert_eq!(_bextr_u64(u64::MAX, i, 0), 0); | ||
} | ||
|
||
// Test cases with completly random values. These cases also test | ||
// that the function works even if upper bits of the control value are set. | ||
assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75); | ||
assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f); | ||
assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19); | ||
} | ||
test_bextr_u64(); | ||
|
||
unsafe fn test_pext_u64() { | ||
let n = 0b1011_1110_1001_0011u64; | ||
|
||
let m0 = 0b0110_0011_1000_0101u64; | ||
let s0 = 0b0000_0000_0011_0101u64; | ||
|
||
let m1 = 0b1110_1011_1110_1111u64; | ||
let s1 = 0b0001_0111_0100_0011u64; | ||
|
||
// Testing of random values. | ||
assert_eq!(_pext_u64(n, m0), s0); | ||
assert_eq!(_pext_u64(n, m1), s1); | ||
assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567); | ||
|
||
// Testing of various identities. | ||
assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX); | ||
assert_eq!(_pext_u64(u64::MAX, 0), 0); | ||
assert_eq!(_pext_u64(0, u64::MAX), 0); | ||
} | ||
test_pext_u64(); | ||
|
||
unsafe fn test_pdep_u64() { | ||
let n = 0b1011_1110_1001_0011u64; | ||
|
||
let m0 = 0b0110_0011_1000_0101u64; | ||
let s0 = 0b0000_0010_0000_0101u64; | ||
|
||
let m1 = 0b1110_1011_1110_1111u64; | ||
let s1 = 0b1110_1001_0010_0011u64; | ||
|
||
// Testing of random values. | ||
assert_eq!(_pdep_u64(n, m0), s0); | ||
assert_eq!(_pdep_u64(n, m1), s1); | ||
assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670); | ||
|
||
// Testing of various identities. | ||
assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX); | ||
assert_eq!(_pdep_u64(0, u64::MAX), 0); | ||
assert_eq!(_pdep_u64(u64::MAX, 0), 0); | ||
} | ||
test_pdep_u64(); | ||
|
||
unsafe fn test_bzhi_u64() { | ||
let n = 0b1111_0010u64; | ||
let s = 0b0001_0010u64; | ||
assert_eq!(_bzhi_u64(n, 5), s); | ||
|
||
// Ensure that indices larger than the bit count are covered. | ||
// It is important to go above 255 in order to verify the bit selection | ||
// of the instruction. | ||
for i in 0..=512 { | ||
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`. | ||
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`. | ||
let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1); | ||
let actual = _bzhi_u64(u64::MAX, i); | ||
assert_eq!(expected, actual); | ||
} | ||
} | ||
test_bzhi_u64(); | ||
} |