From 86a0198240f37df7f1a34681655ac7a25af36650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= <eduardosm-dev@e64.io>
Date: Tue, 10 Oct 2023 18:43:42 +0200
Subject: [PATCH 1/2] Remove from miri LLVM intrinsics that are no longer
 needed

---
 src/shims/x86/sse.rs  |  19 -----
 src/shims/x86/sse2.rs | 176 +-----------------------------------------
 src/shims/x86/sse3.rs |  26 -------
 3 files changed, 1 insertion(+), 220 deletions(-)

diff --git a/src/shims/x86/sse.rs b/src/shims/x86/sse.rs
index 6f0b76059f..831228b7a2 100644
--- a/src/shims/x86/sse.rs
+++ b/src/shims/x86/sse.rs
@@ -209,25 +209,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     )?;
                 }
             }
-            // Used to implement the _mm_movemask_ps function.
-            // Returns a scalar integer where the i-th bit is the highest
-            // bit of the i-th component of `op`.
-            // https://www.felixcloutier.com/x86/movmskps
-            "movmsk.ps" => {
-                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                let (op, op_len) = this.operand_to_simd(op)?;
-
-                let mut res = 0;
-                for i in 0..op_len {
-                    let op = this.read_scalar(&this.project_index(&op, i)?)?;
-                    let op = op.to_u32()?;
-
-                    // Extract the highest bit of `op` and place it in the `i`-th bit of `res`
-                    res |= (op >> 31) << i;
-                }
-
-                this.write_scalar(Scalar::from_u32(res), dest)?;
-            }
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         }
         Ok(EmulateForeignItemResult::NeedsJumping)
diff --git a/src/shims/x86/sse2.rs b/src/shims/x86/sse2.rs
index c6a847b5cf..3f2b9f5f0a 100644
--- a/src/shims/x86/sse2.rs
+++ b/src/shims/x86/sse2.rs
@@ -1,8 +1,4 @@
-use rustc_apfloat::{
-    ieee::{Double, Single},
-    Float as _,
-};
-use rustc_middle::mir;
+use rustc_apfloat::ieee::Double;
 use rustc_middle::ty::layout::LayoutOf as _;
 use rustc_middle::ty::Ty;
 use rustc_span::Symbol;
@@ -39,49 +35,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
         // Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
         // vectors.
         match unprefixed_name {
-            // Used to implement the _mm_avg_epu8 and _mm_avg_epu16 functions.
-            // Averages packed unsigned 8/16-bit integers in `left` and `right`.
-            "pavg.b" | "pavg.w" => {
-                let [left, right] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-
-                let (left, left_len) = this.operand_to_simd(left)?;
-                let (right, right_len) = this.operand_to_simd(right)?;
-                let (dest, dest_len) = this.place_to_simd(dest)?;
-
-                assert_eq!(dest_len, left_len);
-                assert_eq!(dest_len, right_len);
-
-                for i in 0..dest_len {
-                    let left = this.read_immediate(&this.project_index(&left, i)?)?;
-                    let right = this.read_immediate(&this.project_index(&right, i)?)?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    // Widen the operands to avoid overflow
-                    let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
-                    let left = this.int_to_int_or_float(&left, twice_wide)?;
-                    let right = this.int_to_int_or_float(&right, twice_wide)?;
-
-                    // Calculate left + right + 1
-                    let added = this.wrapping_binary_op(mir::BinOp::Add, &left, &right)?;
-                    let added = this.wrapping_binary_op(
-                        mir::BinOp::Add,
-                        &added,
-                        &ImmTy::from_uint(1u32, twice_wide),
-                    )?;
-
-                    // Calculate (left + right + 1) / 2
-                    let divided = this.wrapping_binary_op(
-                        mir::BinOp::Div,
-                        &added,
-                        &ImmTy::from_uint(2u32, twice_wide),
-                    )?;
-
-                    // Narrow back to the original type
-                    let res = this.int_to_int_or_float(&divided, dest.layout)?;
-                    this.write_immediate(*res, &dest)?;
-                }
-            }
             // Used to implement the _mm_madd_epi16 function.
             // Multiplies packed signed 16-bit integers in `left` and `right`, producing
             // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
@@ -118,70 +71,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this.write_scalar(Scalar::from_i32(res), &dest)?;
                 }
             }
-            // Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
-            "pmulh.w" | "pmulhu.w" => {
-                let [left, right] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-
-                let (left, left_len) = this.operand_to_simd(left)?;
-                let (right, right_len) = this.operand_to_simd(right)?;
-                let (dest, dest_len) = this.place_to_simd(dest)?;
-
-                assert_eq!(dest_len, left_len);
-                assert_eq!(dest_len, right_len);
-
-                for i in 0..dest_len {
-                    let left = this.read_immediate(&this.project_index(&left, i)?)?;
-                    let right = this.read_immediate(&this.project_index(&right, i)?)?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    // Widen the operands to avoid overflow
-                    let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
-                    let left = this.int_to_int_or_float(&left, twice_wide)?;
-                    let right = this.int_to_int_or_float(&right, twice_wide)?;
-
-                    // Multiply
-                    let multiplied = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
-                    // Keep the high half
-                    let high = this.wrapping_binary_op(
-                        mir::BinOp::Shr,
-                        &multiplied,
-                        &ImmTy::from_uint(dest.layout.size.bits(), twice_wide),
-                    )?;
-
-                    // Narrow back to the original type
-                    let res = this.int_to_int_or_float(&high, dest.layout)?;
-                    this.write_immediate(*res, &dest)?;
-                }
-            }
-            // Used to implement the _mm_mul_epu32 function.
-            // Multiplies the the low unsigned 32-bit integers from each packed
-            // 64-bit element and stores the result as 64-bit unsigned integers.
-            "pmulu.dq" => {
-                let [left, right] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-
-                let (left, left_len) = this.operand_to_simd(left)?;
-                let (right, right_len) = this.operand_to_simd(right)?;
-                let (dest, dest_len) = this.place_to_simd(dest)?;
-
-                // left and right are u32x4, dest is u64x2
-                assert_eq!(left_len, 4);
-                assert_eq!(right_len, 4);
-                assert_eq!(dest_len, 2);
-
-                for i in 0..dest_len {
-                    let op_i = i.checked_mul(2).unwrap();
-                    let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u32()?;
-                    let right = this.read_scalar(&this.project_index(&right, op_i)?)?.to_u32()?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    // The multiplication will not overflow because stripping the
-                    // operands are expanded from 32-bit to 64-bit.
-                    let res = u64::from(left).checked_mul(u64::from(right)).unwrap();
-                    this.write_scalar(Scalar::from_u64(res), &dest)?;
-                }
-            }
             // Used to implement the _mm_sad_epu8 function.
             // Computes the absolute differences of packed unsigned 8-bit integers in `a`
             // and `b`, then horizontally sum each consecutive 8 differences to produce
@@ -370,25 +259,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this.write_scalar(Scalar::from_u64(res), &dest)?;
                 }
             }
-            // Used to implement the _mm_cvtepi32_ps function.
-            // Converts packed i32 to packed f32.
-            // FIXME: Can we get rid of this intrinsic and just use simd_as?
-            "cvtdq2ps" => {
-                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-
-                let (op, op_len) = this.operand_to_simd(op)?;
-                let (dest, dest_len) = this.place_to_simd(dest)?;
-
-                assert_eq!(dest_len, op_len);
-
-                for i in 0..dest_len {
-                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_i32()?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    let res = Scalar::from_f32(Single::from_i128(op.into()).value);
-                    this.write_scalar(res, &dest)?;
-                }
-            }
             // Used to implement the _mm_cvtps_epi32 and _mm_cvttps_epi32 functions.
             // Converts packed f32 to packed i32.
             "cvtps2dq" | "cvttps2dq" => {
@@ -652,31 +522,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 };
                 this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
             }
-            // Used to implement the _mm_cvtpd_ps and _mm_cvtps_pd functions.
-            // Converts packed f32/f64 to packed f64/f32.
-            "cvtpd2ps" | "cvtps2pd" => {
-                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-
-                let (op, op_len) = this.operand_to_simd(op)?;
-                let (dest, dest_len) = this.place_to_simd(dest)?;
-
-                // For cvtpd2ps: op is f64x2, dest is f32x4
-                // For cvtps2pd: op is f32x4, dest is f64x2
-                // In either case, the two first values are converted
-                for i in 0..op_len.min(dest_len) {
-                    let op = this.read_immediate(&this.project_index(&op, i)?)?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    let res = this.float_to_float_or_int(&op, dest.layout)?;
-                    this.write_immediate(*res, &dest)?;
-                }
-                // For f32 -> f64, ignore the remaining
-                // For f64 -> f32, fill the remaining with zeros
-                for i in op_len..dest_len {
-                    let dest = this.project_index(&dest, i)?;
-                    this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
-                }
-            }
             // Used to implement the _mm_cvtpd_epi32 and _mm_cvttpd_epi32 functions.
             // Converts packed f64 to packed i32.
             "cvtpd2dq" | "cvttpd2dq" => {
@@ -772,25 +617,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     )?;
                 }
             }
-            // Used to implement the _mm_movemask_pd function.
-            // Returns a scalar integer where the i-th bit is the highest
-            // bit of the i-th component of `op`.
-            // https://www.felixcloutier.com/x86/movmskpd
-            "movmsk.pd" => {
-                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                let (op, op_len) = this.operand_to_simd(op)?;
-
-                let mut res = 0;
-                for i in 0..op_len {
-                    let op = this.read_scalar(&this.project_index(&op, i)?)?;
-                    let op = op.to_u64()?;
-
-                    // Extract the highest bit of `op` and place it in the `i`-th bit of `res`
-                    res |= (op >> 63) << i;
-                }
-
-                this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
-            }
             // Used to implement the `_mm_pause` function.
             // The intrinsic is used to hint the processor that the code is in a spin-loop.
             "pause" => {
diff --git a/src/shims/x86/sse3.rs b/src/shims/x86/sse3.rs
index a41de5dbf7..20a4b56074 100644
--- a/src/shims/x86/sse3.rs
+++ b/src/shims/x86/sse3.rs
@@ -23,32 +23,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
         let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();
 
         match unprefixed_name {
-            // Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
-            // Alternatingly add and subtract floating point (f32 or f64) from
-            // `left` and `right`
-            "addsub.ps" | "addsub.pd" => {
-                let [left, right] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-
-                let (left, left_len) = this.operand_to_simd(left)?;
-                let (right, right_len) = this.operand_to_simd(right)?;
-                let (dest, dest_len) = this.place_to_simd(dest)?;
-
-                assert_eq!(dest_len, left_len);
-                assert_eq!(dest_len, right_len);
-
-                for i in 0..dest_len {
-                    let left = this.read_immediate(&this.project_index(&left, i)?)?;
-                    let right = this.read_immediate(&this.project_index(&right, i)?)?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    // Even elements are subtracted and odd elements are added.
-                    let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
-                    let res = this.wrapping_binary_op(op, &left, &right)?;
-
-                    this.write_immediate(*res, &dest)?;
-                }
-            }
             // Used to implement the _mm_h{add,sub}_p{s,d} functions.
             // Horizontally add/subtract adjacent floating point values
             // in `left` and `right`.

From 0043cc9274d7b8c2ca642afcbb8f5a60496413f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= <eduardosm-dev@e64.io>
Date: Tue, 10 Oct 2023 18:44:53 +0200
Subject: [PATCH 2/2] Fix identation of a `rustfmt::skip`ed statement

---
 tests/pass/intrinsics-x86-sse2.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/pass/intrinsics-x86-sse2.rs b/tests/pass/intrinsics-x86-sse2.rs
index 2c7665bc73..e636d6c8aa 100644
--- a/tests/pass/intrinsics-x86-sse2.rs
+++ b/tests/pass/intrinsics-x86-sse2.rs
@@ -117,12 +117,12 @@ mod tests {
         #[target_feature(enable = "sse2")]
         unsafe fn test_mm_sad_epu8() {
             #[rustfmt::skip]
-        let a = _mm_setr_epi8(
-            255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
-            1, 2, 3, 4,
-            155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
-            1, 2, 3, 4,
-        );
+            let a = _mm_setr_epi8(
+                255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
+                1, 2, 3, 4,
+                155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
+                1, 2, 3, 4,
+            );
             let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
             let r = _mm_sad_epu8(a, b);
             let e = _mm_setr_epi64x(1020, 614);