diff --git a/.travis.yml b/.travis.yml index a23108135d..95be95f59f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,8 +26,6 @@ matrix: script: | cargo install clippy cargo clippy --all -- -D clippy-pedantic - cd coresimd - cargo clippy --all -- -D clippy-pedantic allow_failures: - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1 - env: CLIPPY=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1 diff --git a/coresimd/src/runtime/x86.rs b/coresimd/src/runtime/x86.rs index 3fb4f0af03..54dda2eeca 100644 --- a/coresimd/src/runtime/x86.rs +++ b/coresimd/src/runtime/x86.rs @@ -472,8 +472,14 @@ mod tests { assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw()); assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq()); assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl()); - assert_eq!(cfg_feature_enabled!("avx512ifma"), information.avx512_ifma()); - assert_eq!(cfg_feature_enabled!("avx512vbmi"), information.avx512_vbmi()); + assert_eq!( + cfg_feature_enabled!("avx512ifma"), + information.avx512_ifma() + ); + assert_eq!( + cfg_feature_enabled!("avx512vbmi"), + information.avx512_vbmi() + ); assert_eq!( cfg_feature_enabled!("avx512vpopcntdq"), information.avx512_vpopcntdq() diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs index 66b00271f4..7bf376b95f 100644 --- a/coresimd/src/x86/i586/avx2.rs +++ b/coresimd/src/x86/i586/avx2.rs @@ -695,400 +695,496 @@ pub unsafe fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 { phsubsw(a, b) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm_i32gather_epi32(slice: *const i32, offsets: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm_i32gather_epi32( + slice: *const i32, offsets: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherdd(i32x4::splat(0), slice as *const i8, offsets, i32x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm_mask_i32gather_epi32(src: i32x4, slice: *const i32, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm_mask_i32gather_epi32( + src: i32x4, slice: *const i32, offsets: i32x4, mask: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherdd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm256_i32gather_epi32(slice: *const i32, offsets: i32x8, scale: i8) -> i32x8 { +pub unsafe fn _mm256_i32gather_epi32( + slice: *const i32, offsets: i32x8, scale: i8 +) -> i32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdd(i32x8::splat(0), slice as *const i8, offsets, i32x8::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_epi32(src: i32x8, slice: *const i32, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8 { +pub unsafe fn _mm256_mask_i32gather_epi32( + src: i32x8, slice: *const i32, offsets: i32x8, mask: i32x8, scale: i8 +) -> i32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: i32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm_i32gather_ps( + slice: *const f32, offsets: i32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherdps(f32x4::splat(0.0), slice as *const i8, offsets, f32x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm_mask_i32gather_ps(src: f32x4, slice: *const f32, offsets: i32x4, mask: f32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm_mask_i32gather_ps( + src: f32x4, slice: *const f32, offsets: i32x4, mask: f32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherdps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm256_i32gather_ps(slice: *const f32, offsets: i32x8, scale: i8) -> f32x8 { +pub unsafe fn _mm256_i32gather_ps( + slice: *const f32, offsets: i32x8, scale: i8 +) -> f32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdps(f32x8::splat(0.0), slice as *const i8, offsets, f32x8::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_ps(src: f32x8, slice: *const f32, offsets: i32x8, mask: f32x8, scale: i8) -> f32x8 { +pub unsafe fn _mm256_mask_i32gather_ps( + src: f32x8, slice: *const f32, offsets: i32x8, mask: f32x8, scale: i8 +) -> f32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm_i32gather_epi64(slice: *const i64, offsets: i32x4, scale: i8) -> i64x2 { +pub unsafe fn _mm_i32gather_epi64( + slice: *const i64, offsets: i32x4, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherdq(i64x2::splat(0), slice as *const i8, offsets, i64x2::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm_mask_i32gather_epi64(src: i64x2, slice: *const i64, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2 { +pub unsafe fn _mm_mask_i32gather_epi64( + src: i64x2, slice: *const i64, offsets: i32x4, mask: i64x2, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherdq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm256_i32gather_epi64(slice: *const i64, offsets: i32x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_i32gather_epi64( + slice: *const i64, offsets: i32x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdq(i64x4::splat(0), slice as *const i8, offsets, i64x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_epi64(src: i64x4, slice: *const i64, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_mask_i32gather_epi64( + src: i64x4, slice: *const i64, offsets: i32x4, mask: i64x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm_i32gather_pd(slice: *const f64, offsets: i32x4, scale: i8) -> f64x2 { +pub unsafe fn _mm_i32gather_pd( + slice: *const f64, offsets: i32x4, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherdpd(f64x2::splat(0.0), slice as *const i8, offsets, f64x2::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm_mask_i32gather_pd(src: f64x2, slice: *const f64, offsets: i32x4, mask: f64x2, scale: i8) -> f64x2 { +pub unsafe fn _mm_mask_i32gather_pd( + src: f64x2, slice: *const f64, offsets: i32x4, mask: f64x2, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherdpd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm256_i32gather_pd(slice: *const f64, offsets: i32x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_i32gather_pd( + slice: *const f64, offsets: i32x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdpd(f64x4::splat(0.0), slice as *const i8, offsets, f64x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_pd(src: f64x4, slice: *const f64, offsets: i32x4, mask: f64x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_mask_i32gather_pd( + src: f64x4, slice: *const f64, offsets: i32x4, mask: f64x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdpd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm_i64gather_epi32(slice: *const i32, offsets: i64x2, scale: i8) -> i32x4 { +pub unsafe fn _mm_i64gather_epi32( + slice: *const i32, offsets: i64x2, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherqd(i32x4::splat(0), slice as *const i8, offsets, i32x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm_mask_i64gather_epi32(src: i32x4, slice: *const i32, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm_mask_i64gather_epi32( + src: i32x4, slice: *const i32, offsets: i64x2, mask: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherqd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm256_i64gather_epi32(slice: *const i32, offsets: i64x4, scale: i8) -> i32x4 { +pub unsafe fn _mm256_i64gather_epi32( + slice: *const i32, offsets: i64x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqd(i32x4::splat(0), slice as *const i8, offsets, i32x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_epi32(src: i32x4, slice: *const i32, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm256_mask_i64gather_epi32( + src: i32x4, slice: *const i32, offsets: i64x4, mask: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: i64x2, scale: i8) -> f32x4 { +pub unsafe fn _mm_i64gather_ps( + slice: *const f32, offsets: i64x2, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherqps(f32x4::splat(0.0), slice as *const i8, offsets, f32x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm_mask_i64gather_ps(src: f32x4, slice: *const f32, offsets: i64x2, mask: f32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm_mask_i64gather_ps( + src: f32x4, slice: *const f32, offsets: i64x2, mask: f32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherqps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm256_i64gather_ps(slice: *const f32, offsets: i64x4, scale: i8) -> f32x4 { +pub unsafe fn _mm256_i64gather_ps( + slice: *const f32, offsets: i64x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqps(f32x4::splat(0.0), slice as *const i8, offsets, f32x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_ps(src: f32x4, slice: *const f32, offsets: i64x4, mask: f32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm256_mask_i64gather_ps( + src: f32x4, slice: *const f32, offsets: i64x4, mask: f32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm_i64gather_epi64(slice: *const i64, offsets: i64x2, scale: i8) -> i64x2 { +pub unsafe fn _mm_i64gather_epi64( + slice: *const i64, offsets: i64x2, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherqq(i64x2::splat(0), slice as *const i8, offsets, i64x2::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm_mask_i64gather_epi64(src: i64x2, slice: *const i64, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2 { +pub unsafe fn _mm_mask_i64gather_epi64( + src: i64x2, slice: *const i64, offsets: i64x2, mask: i64x2, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherqq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm256_i64gather_epi64(slice: *const i64, offsets: i64x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_i64gather_epi64( + slice: *const i64, offsets: i64x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqq(i64x4::splat(0), slice as *const i8, offsets, i64x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_epi64(src: i64x4, slice: *const i64, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_mask_i64gather_epi64( + src: i64x4, slice: *const i64, offsets: i64x4, mask: i64x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm_i64gather_pd(slice: *const f64, offsets: i64x2, scale: i8) -> f64x2 { +pub unsafe fn _mm_i64gather_pd( + slice: *const f64, offsets: i64x2, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherqpd(f64x2::splat(0.0), slice as *const i8, offsets, f64x2::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm_mask_i64gather_pd(src: f64x2, slice: *const f64, offsets: i64x2, mask: f64x2, scale: i8) -> f64x2 { +pub unsafe fn _mm_mask_i64gather_pd( + src: f64x2, slice: *const f64, offsets: i64x2, mask: f64x2, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherqpd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm256_i64gather_pd(slice: *const f64, offsets: i64x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_i64gather_pd( + slice: *const f64, offsets: i64x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqpd(f64x4::splat(0.0), slice as *const i8, offsets, f64x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_pd(src: f64x4, slice: *const f64, offsets: i64x4, mask: f64x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_mask_i64gather_pd( + src: f64x4, slice: *const f64, offsets: i64x4, mask: f64x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqpd(src, slice as *const i8, offsets, mask, $imm8)) } @@ -2531,37 +2627,69 @@ extern "C" { #[link_name = "llvm.x86.avx2.permd"] fn permd(a: u32x8, b: u32x8) -> u32x8; #[link_name = "llvm.x86.avx2.gather.d.d"] - fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4; + fn pgatherdd( + src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8 + ) -> i32x4; #[link_name = "llvm.x86.avx2.gather.d.d.256"] - fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8; + fn vpgatherdd( + src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8 + ) -> i32x8; #[link_name = "llvm.x86.avx2.gather.d.q"] - fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2; + fn pgatherdq( + src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8 + ) -> i64x2; #[link_name = "llvm.x86.avx2.gather.d.q.256"] - fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4; + fn vpgatherdq( + src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8 + ) -> i64x4; #[link_name = "llvm.x86.avx2.gather.q.d"] - fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4; + fn pgatherqd( + src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8 + ) -> i32x4; #[link_name = "llvm.x86.avx2.gather.q.d.256"] - fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4; + fn vpgatherqd( + src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8 + ) -> i32x4; #[link_name = "llvm.x86.avx2.gather.q.q"] - fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2; + fn pgatherqq( + src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8 + ) -> i64x2; #[link_name = "llvm.x86.avx2.gather.q.q.256"] - fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4; + fn vpgatherqq( + src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8 + ) -> i64x4; #[link_name = "llvm.x86.avx2.gather.d.pd"] - fn pgatherdpd (src: f64x2, slice: *const i8, offsets: i32x4, mask: f64x2, scale: i8) -> f64x2; + fn pgatherdpd( + src: f64x2, slice: *const i8, offsets: i32x4, mask: f64x2, scale: i8 + ) -> f64x2; #[link_name = "llvm.x86.avx2.gather.d.pd.256"] - fn vpgatherdpd (src: f64x4, slice: *const i8, offsets: i32x4, mask: f64x4, scale: i8) -> f64x4; + fn vpgatherdpd( + src: f64x4, slice: *const i8, offsets: i32x4, mask: f64x4, scale: i8 + ) -> f64x4; #[link_name = "llvm.x86.avx2.gather.q.pd"] - fn pgatherqpd (src: f64x2, slice: *const i8, offsets: i64x2, mask: f64x2, scale: i8) -> f64x2; + fn pgatherqpd( + src: f64x2, slice: *const i8, offsets: i64x2, mask: f64x2, scale: i8 + ) -> f64x2; #[link_name = "llvm.x86.avx2.gather.q.pd.256"] - fn vpgatherqpd (src: f64x4, slice: *const i8, offsets: i64x4, mask: f64x4, scale: i8) -> f64x4; + fn vpgatherqpd( + src: f64x4, slice: *const i8, offsets: i64x4, mask: f64x4, scale: i8 + ) -> f64x4; #[link_name = "llvm.x86.avx2.gather.d.ps"] - fn pgatherdps (src: f32x4, slice: *const i8, offsets: i32x4, mask: f32x4, scale: i8) -> f32x4; + fn pgatherdps( + src: f32x4, slice: *const i8, offsets: i32x4, mask: f32x4, scale: i8 + ) -> f32x4; #[link_name = "llvm.x86.avx2.gather.d.ps.256"] - fn vpgatherdps (src: f32x8, slice: *const i8, offsets: i32x8, mask: f32x8, scale: i8) -> f32x8; + fn vpgatherdps( + src: f32x8, slice: *const i8, offsets: i32x8, mask: f32x8, scale: i8 + ) -> f32x8; #[link_name = "llvm.x86.avx2.gather.q.ps"] - fn pgatherqps (src: f32x4, slice: *const i8, offsets: i64x2, mask: f32x4, scale: i8) -> f32x4; + fn pgatherqps( + src: f32x4, slice: *const i8, offsets: i64x2, mask: f32x4, scale: i8 + ) -> f32x4; #[link_name = "llvm.x86.avx2.gather.q.ps.256"] - fn vpgatherqps (src: f32x4, slice: *const i8, offsets: i64x4, mask: f32x4, scale: i8) -> f32x4; + fn vpgatherqps( + src: f32x4, slice: *const i8, offsets: i64x4, mask: f32x4, scale: i8 + ) -> f32x4; } @@ -3998,7 +4126,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm_i32gather_epi32(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 4); + let r = avx2::_mm_i32gather_epi32( + arr.as_ptr(), + i32x4::new(0, 16, 32, 48), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 32, 48)); } @@ -4009,10 +4141,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm_mask_i32gather_epi32(i32x4::splat(256), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - i32x4::new(-1, -1, -1, 0), - 4); + let r = avx2::_mm_mask_i32gather_epi32( + i32x4::splat(256), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + i32x4::new(-1, -1, -1, 0), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 64, 256)); } @@ -4023,7 +4158,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_i32gather_epi32(arr.as_ptr(), i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), 4); + let r = avx2::_mm256_i32gather_epi32( + arr.as_ptr(), + i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), + 4, + ); assert_eq!(r, i32x8::new(0, 16, 32, 48, 1, 2, 3, 4)); } @@ -4034,10 +4173,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_mask_i32gather_epi32(i32x8::splat(256), arr.as_ptr(), - i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), - i32x8::new(-1, -1, -1, 0, 0, 0, 0, 0), - 4); + let r = avx2::_mm256_mask_i32gather_epi32( + i32x8::splat(256), + arr.as_ptr(), + i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), + i32x8::new(-1, -1, -1, 0, 0, 0, 0, 0), + 4, + ); assert_eq!(r, i32x8::new(0, 16, 64, 256, 256, 256, 256, 256)); } @@ -4050,7 +4192,8 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm_i32gather_ps(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 4); + let r = + avx2::_mm_i32gather_ps(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 4); assert_eq!(r, f32x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4063,10 +4206,13 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm_mask_i32gather_ps(f32x4::splat(256.0), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - f32x4::new(-1.0, -1.0, -1.0, 0.0), - 4); + let r = avx2::_mm_mask_i32gather_ps( + f32x4::splat(256.0), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + f32x4::new(-1.0, -1.0, -1.0, 0.0), + 4, + ); assert_eq!(r, f32x4::new(0.0, 16.0, 64.0, 256.0)); } @@ -4079,7 +4225,11 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_i32gather_ps(arr.as_ptr(), i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), 4); + let r = avx2::_mm256_i32gather_ps( + arr.as_ptr(), + i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), + 4, + ); assert_eq!(r, f32x8::new(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0)); } @@ -4092,11 +4242,17 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_mask_i32gather_ps(f32x8::splat(256.0), arr.as_ptr(), - i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), - f32x8::new(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), - 4); - assert_eq!(r, f32x8::new(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0)); + let r = avx2::_mm256_mask_i32gather_ps( + f32x8::splat(256.0), + arr.as_ptr(), + i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), + f32x8::new(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), + 4, + ); + assert_eq!( + r, + f32x8::new(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0) + ); } @@ -4107,7 +4263,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm_i32gather_epi64(arr.as_ptr(), i32x4::new(0, 16, 0, 0), 8); + let r = avx2::_mm_i32gather_epi64( + arr.as_ptr(), + i32x4::new(0, 16, 0, 0), + 8, + ); assert_eq!(r, i64x2::new(0, 16)); } @@ -4118,10 +4278,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm_mask_i32gather_epi64(i64x2::splat(256), arr.as_ptr(), - i32x4::new(16, 16, 16, 16), - i64x2::new(-1, 0), - 8); + let r = avx2::_mm_mask_i32gather_epi64( + i64x2::splat(256), + arr.as_ptr(), + i32x4::new(16, 16, 16, 16), + i64x2::new(-1, 0), + 8, + ); assert_eq!(r, i64x2::new(16, 256)); } @@ -4132,7 +4295,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_i32gather_epi64(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i32gather_epi64( + arr.as_ptr(), + i32x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 32, 48)); } @@ -4143,10 +4310,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_mask_i32gather_epi64(i64x4::splat(256), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - i64x4::new(-1, -1, -1, 0), - 8); + let r = avx2::_mm256_mask_i32gather_epi64( + i64x4::splat(256), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + i64x4::new(-1, -1, -1, 0), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 64, 256)); } @@ -4159,7 +4329,8 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm_i32gather_pd(arr.as_ptr(), i32x4::new(0, 16, 0, 0), 8); + let r = + avx2::_mm_i32gather_pd(arr.as_ptr(), i32x4::new(0, 16, 0, 0), 8); assert_eq!(r, f64x2::new(0.0, 16.0)); } @@ -4172,10 +4343,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm_mask_i32gather_pd(f64x2::splat(256.0), arr.as_ptr(), - i32x4::new(16, 16, 16, 16), - f64x2::new(-1.0, 0.0), - 8); + let r = avx2::_mm_mask_i32gather_pd( + f64x2::splat(256.0), + arr.as_ptr(), + i32x4::new(16, 16, 16, 16), + f64x2::new(-1.0, 0.0), + 8, + ); assert_eq!(r, f64x2::new(16.0, 256.0)); } @@ -4188,7 +4362,11 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_i32gather_pd(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i32gather_pd( + arr.as_ptr(), + i32x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4201,10 +4379,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_mask_i32gather_pd(f64x4::splat(256.0), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - f64x4::new(-1.0, -1.0, -1.0, 0.0), - 8); + let r = avx2::_mm256_mask_i32gather_pd( + f64x4::splat(256.0), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + f64x4::new(-1.0, -1.0, -1.0, 0.0), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 64.0, 256.0)); } @@ -4226,10 +4407,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm_mask_i64gather_epi32(i32x4::splat(256), arr.as_ptr(), - i64x2::new(0, 16), - i32x4::new(-1, 0, -1, 0), - 4); + let r = avx2::_mm_mask_i64gather_epi32( + i32x4::splat(256), + arr.as_ptr(), + i64x2::new(0, 16), + i32x4::new(-1, 0, -1, 0), + 4, + ); assert_eq!(r, i32x4::new(0, 256, 0, 0)); } @@ -4240,7 +4424,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_i64gather_epi32(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 4); + let r = avx2::_mm256_i64gather_epi32( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 32, 48)); } @@ -4251,10 +4439,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_mask_i64gather_epi32(i32x4::splat(256), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - i32x4::new(-1, -1, -1, 0), - 4); + let r = avx2::_mm256_mask_i64gather_epi32( + i32x4::splat(256), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + i32x4::new(-1, -1, -1, 0), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 64, 256)); } @@ -4280,10 +4471,13 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm_mask_i64gather_ps(f32x4::splat(256.0), arr.as_ptr(), - i64x2::new(0, 16), - f32x4::new(-1.0, 0.0, -1.0, 0.0), - 4); + let r = avx2::_mm_mask_i64gather_ps( + f32x4::splat(256.0), + arr.as_ptr(), + i64x2::new(0, 16), + f32x4::new(-1.0, 0.0, -1.0, 0.0), + 4, + ); assert_eq!(r, f32x4::new(0.0, 256.0, 0.0, 0.0)); } @@ -4296,7 +4490,11 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_i64gather_ps(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 4); + let r = avx2::_mm256_i64gather_ps( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 4, + ); assert_eq!(r, f32x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4309,10 +4507,13 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_mask_i64gather_ps(f32x4::splat(256.0), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - f32x4::new(-1.0, -1.0, -1.0, 0.0), - 4); + let r = avx2::_mm256_mask_i64gather_ps( + f32x4::splat(256.0), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + f32x4::new(-1.0, -1.0, -1.0, 0.0), + 4, + ); assert_eq!(r, f32x4::new(0.0, 16.0, 64.0, 256.0)); } @@ -4335,10 +4536,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm_mask_i64gather_epi64(i64x2::splat(256), arr.as_ptr(), - i64x2::new(16, 16), - i64x2::new(-1, 0), - 8); + let r = avx2::_mm_mask_i64gather_epi64( + i64x2::splat(256), + arr.as_ptr(), + i64x2::new(16, 16), + i64x2::new(-1, 0), + 8, + ); assert_eq!(r, i64x2::new(16, 256)); } @@ -4349,7 +4553,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_i64gather_epi64(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i64gather_epi64( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 32, 48)); } @@ -4360,10 +4568,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_mask_i64gather_epi64(i64x4::splat(256), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - i64x4::new(-1, -1, -1, 0), - 8); + let r = avx2::_mm256_mask_i64gather_epi64( + i64x4::splat(256), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + i64x4::new(-1, -1, -1, 0), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 64, 256)); } @@ -4389,10 +4600,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm_mask_i64gather_pd(f64x2::splat(256.0), arr.as_ptr(), - i64x2::new(16, 16), - f64x2::new(-1.0, 0.0), - 8); + let r = avx2::_mm_mask_i64gather_pd( + f64x2::splat(256.0), + arr.as_ptr(), + i64x2::new(16, 16), + f64x2::new(-1.0, 0.0), + 8, + ); assert_eq!(r, f64x2::new(16.0, 256.0)); } @@ -4405,7 +4619,11 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_i64gather_pd(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i64gather_pd( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4418,10 +4636,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_mask_i64gather_pd(f64x4::splat(256.0), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - f64x4::new(-1.0, -1.0, -1.0, 0.0), - 8); + let r = avx2::_mm256_mask_i64gather_pd( + f64x4::splat(256.0), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + f64x4::new(-1.0, -1.0, -1.0, 0.0), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 64.0, 256.0)); } diff --git a/src/runtime/linux/cpuinfo.rs b/src/runtime/linux/cpuinfo.rs index b65a2b8034..777be3de56 100644 --- a/src/runtime/linux/cpuinfo.rs +++ b/src/runtime/linux/cpuinfo.rs @@ -151,7 +151,8 @@ power management: assert!(!cpuinfo.field("flags").has("avx")); } - const ARM_CORTEX_A53: &str = r"Processor : AArch64 Processor rev 3 (aarch64) + const ARM_CORTEX_A53: &str = + r"Processor : AArch64 Processor rev 3 (aarch64) processor : 0 processor : 1 processor : 2 diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs index ce52ea5d27..bd497b6cd5 100644 --- a/stdsimd-test/src/lib.rs +++ b/stdsimd-test/src/lib.rs @@ -5,6 +5,8 @@ //! assertions about the disassembly of a function. #![feature(proc_macro)] +#![cfg_attr(feature = "cargo-clippy", + allow(missing_docs_in_private_items, print_stdout))] extern crate assert_instr_macro; extern crate backtrace; @@ -71,9 +73,10 @@ fn disassemble_myself() -> HashMap> { ); assert!(output.status.success()); - parse_otool(&str::from_utf8(&output.stdout).expect("stdout not utf8")) + parse_otool(str::from_utf8(&output.stdout).expect("stdout not utf8")) } else { - let objdump = env::var("OBJDUMP").unwrap_or("objdump".to_string()); + let objdump = + env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string()); let output = Command::new(objdump) .arg("--disassemble") .arg(&me) @@ -86,21 +89,18 @@ fn disassemble_myself() -> HashMap> { ); assert!(output.status.success()); - parse_objdump( - &str::from_utf8(&output.stdout).expect("stdout not utf8"), - ) + parse_objdump(str::from_utf8(&output.stdout).expect("stdout not utf8")) } } fn parse_objdump(output: &str) -> HashMap> { let mut lines = output.lines(); - let expected_len = if cfg!(target_arch = "arm") { - 8 - } else if cfg!(target_arch = "aarch64") { - 8 - } else { - 2 - }; + let expected_len = + if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") { + 8 + } else { + 2 + }; for line in output.lines().take(100) { println!("{}", line); @@ -112,7 +112,8 @@ fn parse_objdump(output: &str) -> HashMap> { if !header.ends_with(">:") { continue; } - let start = header.find("<").unwrap(); + let start = header.find('<') + .expect(&format!("\"<\" not found in symbol pattern of the form \"$hex_addr <$name>\": {}", header)); let symbol = &header[start + 1..header.len() - 2]; let mut instructions = Vec::new(); @@ -136,13 +137,13 @@ fn parse_objdump(output: &str) -> HashMap> { } ret.entry(normalize(symbol)) - .or_insert(Vec::new()) + .or_insert_with(Vec::new) .push(Function { instrs: instructions, }); } - return ret; + ret } fn parse_otool(output: &str) -> HashMap> { @@ -154,13 +155,9 @@ fn parse_otool(output: &str) -> HashMap> { let mut ret = HashMap::new(); let mut cached_header = None; - loop { - let header = match cached_header.take().or_else(|| lines.next()) { - Some(header) => header, - None => break, - }; + while let Some(header) = cached_header.take().or_else(|| lines.next()) { // symbols should start with `$symbol:` - if !header.ends_with(":") { + if !header.ends_with(':') { continue; } // strip the leading underscore and the trailing colon @@ -168,7 +165,7 @@ fn parse_otool(output: &str) -> HashMap> { let mut instructions = Vec::new(); while let Some(instruction) = lines.next() { - if instruction.ends_with(":") { + if instruction.ends_with(':') { cached_header = Some(instruction); break; } @@ -184,13 +181,13 @@ fn parse_otool(output: &str) -> HashMap> { } ret.entry(normalize(symbol)) - .or_insert(Vec::new()) + .or_insert_with(Vec::new) .push(Function { instrs: instructions, }); } - return ret; + ret } fn parse_dumpbin(output: &str) -> HashMap> { @@ -202,13 +199,9 @@ fn parse_dumpbin(output: &str) -> HashMap> { let mut ret = HashMap::new(); let mut cached_header = None; - loop { - let header = match cached_header.take().or_else(|| lines.next()) { - Some(header) => header, - None => break, - }; + while let Some(header) = cached_header.take().or_else(|| lines.next()) { // symbols should start with `$symbol:` - if !header.ends_with(":") { + if !header.ends_with(':') { continue; } // strip the trailing colon @@ -239,13 +232,13 @@ fn parse_dumpbin(output: &str) -> HashMap> { } ret.entry(normalize(symbol)) - .or_insert(Vec::new()) + .or_insert_with(Vec::new) .push(Function { instrs: instructions, }); } - return ret; + ret } fn normalize(symbol: &str) -> String { @@ -268,9 +261,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { sym = name.name().and_then(|s| s.as_str()).map(normalize); }); - let functions = match sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { - Some(s) => s, - None => { + let functions = + if let Some(s) = sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { + s + } else { if let Some(sym) = sym { println!("assumed symbol name: `{}`", sym); } @@ -279,8 +273,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { println!("\t- {}", f); } panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname); - } - }; + }; assert_eq!(functions.len(), 1); let function = &functions[0]; @@ -288,7 +281,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Look for `expected` as the first part of any instruction in this // function, returning if we do indeed find it. let mut found = false; - for instr in function.instrs.iter() { + for instr in &function.instrs { // Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax if let Some(part) = instr.parts.get(0) { // Truncates the instruction with the length of the expected @@ -308,13 +301,13 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Help debug by printing out the found disassembly, and then panic as we // didn't find the instruction. - println!("disassembly for {}: ", sym.as_ref().unwrap()); + println!("disassembly for {}: ", sym.as_ref().expect("symbol not found")); for (i, instr) in function.instrs.iter().enumerate() { print!("\t{:2}: ", i); - for part in instr.parts.iter() { + for part in &instr.parts { print!("{} ", part); } - println!(""); + println!(); } if !found {