From 1b62efb2a184d6d534c060dc78a6cf4084bd59ee Mon Sep 17 00:00:00 2001
From: Dan Robertson <danlrobertson89@gmail.com>
Date: Thu, 28 Sep 2017 12:16:52 +0000
Subject: [PATCH] [x86] Add _mm_cvtps_epi32 (cvtps2dq) function

_mm_cvtepi32_ps has been implemented, but _mm_cvtps_epi32 is missing.
Use the implementation of _mm_cvtepi32_ps as a guide for implementing
_mm_cvtps_epi32.
---
 src/x86/sse2.rs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
index 162ac313b8..979e5efdd6 100644
--- a/src/x86/sse2.rs
+++ b/src/x86/sse2.rs
@@ -691,6 +691,15 @@ pub unsafe fn _mm_cvtepi32_ps(a: i32x4) -> f32x4 {
     cvtdq2ps(a)
 }
 
+/// Convert packed single-precision (32-bit) floating-point elements in `a`
+/// to packed 32-bit integers.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(cvtps2dq))]
+pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 {
+    cvtps2dq(a)
+}
+
 /// Return a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 #[inline(always)]
@@ -1819,6 +1828,8 @@ extern {
     fn psrlq(a: i64x2, count: i64x2) -> i64x2;
     #[link_name = "llvm.x86.sse2.cvtdq2ps"]
     fn cvtdq2ps(a: i32x4) -> f32x4;
+    #[link_name = "llvm.x86.sse2.cvtps2dq"]
+    fn cvtps2dq(a: f32x4) -> i32x4;
     #[link_name = "llvm.x86.sse2.maskmov.dqu"]
     fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
     #[link_name = "llvm.x86.sse2.packsswb.128"]
@@ -2565,6 +2576,13 @@ mod tests {
         assert_eq!(r, f32x4::new(1.0, 2.0, 3.0, 4.0));
     }
 
+    #[simd_test = "sse2"]
+    unsafe fn _mm_cvtps_epi32() {
+        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
+        let r = sse2::_mm_cvtps_epi32(a);
+        assert_eq!(r, i32x4::new(1, 2, 3, 4));
+    }
+
     #[simd_test = "sse2"]
     unsafe fn _mm_cvtsi32_si128() {
         let r = sse2::_mm_cvtsi32_si128(5);