From feb4fcd2cac625e20dd77126d2bbda7a8ab53188 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 14 Apr 2020 14:45:35 -0700 Subject: [PATCH] Simplifying the special importation logic for Sse/Sse2 compare GreaterThan functions --- src/coreclr/src/jit/hwintrinsic.h | 1 + src/coreclr/src/jit/hwintrinsiclistxarch.h | 32 +- src/coreclr/src/jit/hwintrinsicxarch.cpp | 410 +++++++-------------- 3 files changed, 141 insertions(+), 302 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsic.h b/src/coreclr/src/jit/hwintrinsic.h index ca58012c5a483..65ce34d7d24fe 100644 --- a/src/coreclr/src/jit/hwintrinsic.h +++ b/src/coreclr/src/jit/hwintrinsic.h @@ -150,6 +150,7 @@ struct HWIntrinsicInfo #ifdef TARGET_XARCH static bool isAVX2GatherIntrinsic(NamedIntrinsic id); + static int lookupFloatingComparisonForSwappedArgs(int comparison); #endif // Member lookup diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 6ceaacd8bf61d..02dde9c4e94be 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -91,13 +91,13 @@ HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqua HARDWARE_INTRINSIC(SSE_CompareScalarOrderedEqual, "CompareScalarOrderedEqual", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareScalarEqual, "CompareScalarEqual", SSE, _CMP_EQ_OQ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedEqual, "CompareScalarUnorderedEqual", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(SSE_CompareScalarOrderedGreaterThan, "CompareScalarOrderedGreaterThan", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE_CompareScalarGreaterThan, "CompareScalarGreaterThan", SSE, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE_CompareScalarGreaterThan, "CompareScalarGreaterThan", SSE, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedGreaterThan, "CompareScalarUnorderedGreaterThan", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(SSE_CompareScalarOrderedGreaterThanOrEqual, "CompareScalarOrderedGreaterThanOrEqual", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE_CompareScalarGreaterThanOrEqual, "CompareScalarGreaterThanOrEqual", SSE, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE_CompareScalarGreaterThanOrEqual, "CompareScalarGreaterThanOrEqual", SSE, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedGreaterThanOrEqual, "CompareScalarUnorderedGreaterThanOrEqual", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE_CompareScalarOrderedLessThan, "CompareScalarOrderedLessThan", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -111,10 +111,10 @@ HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotE HARDWARE_INTRINSIC(SSE_CompareScalarOrderedNotEqual, "CompareScalarOrderedNotEqual", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareScalarNotEqual, "CompareScalarNotEqual", SSE, _CMP_NEQ_UQ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedNotEqual, "CompareScalarUnorderedNotEqual", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(SSE_CompareScalarNotGreaterThan, "CompareScalarNotGreaterThan", SSE, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(SSE_CompareScalarNotGreaterThanOrEqual, "CompareScalarNotGreaterThanOrEqual", SSE, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE_CompareScalarNotGreaterThan, "CompareScalarNotGreaterThan", SSE, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE_CompareScalarNotGreaterThanOrEqual, "CompareScalarNotGreaterThanOrEqual", SSE, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareNotLessThan, "CompareNotLessThan", SSE, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE_CompareScalarNotLessThan, "CompareScalarNotLessThan", SSE, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqual, "CompareNotLessThanOrEqual", SSE, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -192,13 +192,13 @@ HARDWARE_INTRINSIC(SSE2_CompareEqual, "CompareEqua HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedEqual, "CompareScalarOrderedEqual", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareScalarEqual, "CompareScalarEqual", SSE2, _CMP_EQ_OQ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedEqual, "CompareScalarUnorderedEqual", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_CompareGreaterThan, "CompareGreaterThan", SSE2, _CMP_NLE_US, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2_CompareGreaterThan, "CompareGreaterThan", SSE2, _CMP_LT_OS, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedGreaterThan, "CompareScalarOrderedGreaterThan", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_CompareScalarGreaterThan, "CompareScalarGreaterThan", SSE2, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE2_CompareScalarGreaterThan, "CompareScalarGreaterThan", SSE2, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedGreaterThan, "CompareScalarUnorderedGreaterThan", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE2, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE2, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedGreaterThanOrEqual, "CompareScalarOrderedGreaterThanOrEqual", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_CompareScalarGreaterThanOrEqual, "CompareScalarGreaterThanOrEqual", SSE2, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE2_CompareScalarGreaterThanOrEqual, "CompareScalarGreaterThanOrEqual", SSE2, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedGreaterThanOrEqual, "CompareScalarUnorderedGreaterThanOrEqual", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareLessThan, "CompareLessThan", SSE2, _CMP_LT_OS, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_Special, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedLessThan, "CompareScalarOrderedLessThan", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -212,10 +212,10 @@ HARDWARE_INTRINSIC(SSE2_CompareNotEqual, "CompareNotE HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedNotEqual, "CompareScalarOrderedNotEqual", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareScalarNotEqual, "CompareScalarNotEqual", SSE2, _CMP_NEQ_UQ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedNotEqual, "CompareScalarUnorderedNotEqual", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan, "CompareNotGreaterThan", SSE2, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(SSE2_CompareScalarNotGreaterThan, "CompareScalarNotGreaterThan", SSE2, _CMP_LE_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE2, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(SSE2_CompareScalarNotGreaterThanOrEqual, "CompareScalarNotGreaterThanOrEqual", SSE2, _CMP_LT_OS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan, "CompareNotGreaterThan", SSE2, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2_CompareScalarNotGreaterThan, "CompareScalarNotGreaterThan", SSE2, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE2, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2_CompareScalarNotGreaterThanOrEqual, "CompareScalarNotGreaterThanOrEqual", SSE2, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareNotLessThan, "CompareNotLessThan", SSE2, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_CompareScalarNotLessThan, "CompareScalarNotLessThan", SSE2, _CMP_NLT_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareNotLessThanOrEqual, "CompareNotLessThanOrEqual", SSE2, _CMP_NLE_US, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/src/jit/hwintrinsicxarch.cpp b/src/coreclr/src/jit/hwintrinsicxarch.cpp index 0799de4b2ea75..13a86ec6a4ea0 100644 --- a/src/coreclr/src/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsicxarch.cpp @@ -245,6 +245,89 @@ bool HWIntrinsicInfo::isAVX2GatherIntrinsic(NamedIntrinsic id) } } +//------------------------------------------------------------------------ +// lookupFloatingComparisonForSwappedArgs: Get the floating-point comparison +// mode to use when the operands are swapped. +// +// Arguments: +// comparison -- The comparison mode used for (op1, op2) +// +// Return Value: +// The comparison mode to use for (op2, op1) +// +int HWIntrinsicInfo::lookupFloatingComparisonForSwappedArgs(int comparison) +{ + switch (comparison) + { + case _CMP_EQ_OQ: + return _CMP_EQ_OQ; + case _CMP_LT_OS: + return _CMP_GT_OS; + case _CMP_LE_OS: + return _CMP_GE_OS; + case _CMP_UNORD_Q: + return _CMP_UNORD_Q; + case _CMP_NEQ_UQ: + return _CMP_NEQ_UQ; + case _CMP_NLT_US: + return _CMP_NGT_US; + case _CMP_NLE_US: + return _CMP_NLE_US; + case _CMP_ORD_Q: + return _CMP_ORD_Q; + case _CMP_EQ_UQ: + return _CMP_EQ_UQ; + case _CMP_NGE_US: + return _CMP_NLE_US; + case _CMP_NGT_US: + return _CMP_NLT_US; + case _CMP_FALSE_OQ: + return _CMP_FALSE_OQ; + case _CMP_NEQ_OQ: + return _CMP_NEQ_OQ; + case _CMP_GE_OS: + return _CMP_LE_OS; + case _CMP_GT_OS: + return _CMP_LT_OS; + case _CMP_TRUE_UQ: + return _CMP_TRUE_UQ; + case _CMP_EQ_OS: + return _CMP_EQ_OS; + case _CMP_LT_OQ: + return _CMP_GT_OQ; + case _CMP_LE_OQ: + return _CMP_GE_OQ; + case _CMP_UNORD_S: + return _CMP_UNORD_S; + case _CMP_NEQ_US: + return _CMP_NEQ_US; + case _CMP_NLT_UQ: + return _CMP_NGT_UQ; + case _CMP_NLE_UQ: + return _CMP_NGE_UQ; + case _CMP_ORD_S: + return _CMP_ORD_S; + case _CMP_EQ_US: + return _CMP_EQ_US; + case _CMP_NGE_UQ: + return _CMP_NLE_UQ; + case _CMP_NGT_UQ: + return _CMP_NLT_UQ; + case _CMP_FALSE_OS: + return _CMP_FALSE_OS; + case _CMP_NEQ_OS: + return _CMP_NEQ_OS; + case _CMP_GE_OQ: + return _CMP_LE_OQ; + case _CMP_GT_OQ: + return _CMP_LT_OQ; + case _CMP_TRUE_US: + return _CMP_TRUE_US; + default: + unreached(); + } +} + //------------------------------------------------------------------------ // isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented // @@ -1186,66 +1269,8 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND switch (intrinsic) { case NI_SSE_CompareGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GT_OS), NI_AVX_Compare, - baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareLessThan, baseType, simdSize); - } - break; - } - case NI_SSE_CompareGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GE_OS), NI_AVX_Compare, - baseType, simdSize); - } - else - { - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareLessThanOrEqual, baseType, simdSize); - } - break; - } - case NI_SSE_CompareNotGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGT_US), NI_AVX_Compare, - baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareNotLessThan, baseType, simdSize); - } - break; - } - case NI_SSE_CompareNotGreaterThanOrEqual: { assert(sig->numArgs == 2); @@ -1256,99 +1281,24 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGE_US), NI_AVX_Compare, + // These intrinsics are "special import" because the non-AVX path isn't directly + // hardware supported. Instead, they start with "swapped operands" and we fix that here. + + int comparison = HWIntrinsicInfo::lookupIval(intrinsic); + comparison = HWIntrinsicInfo::lookupFloatingComparisonForSwappedArgs(comparison); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(comparison), NI_AVX_Compare, baseType, simdSize); } else { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareNotLessThanOrEqual, baseType, - simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); } break; } case NI_SSE_CompareScalarGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GT_OS), - NI_AVX_CompareScalar, baseType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan")); - - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareScalarLessThan, baseType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, baseType, simdSize); - } - break; - } - case NI_SSE_CompareScalarGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GE_OS), - NI_AVX_CompareScalar, baseType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThanOrEqual")); - - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareScalarLessThanOrEqual, baseType, - simdSize); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, baseType, simdSize); - } - break; - } - case NI_SSE_CompareScalarNotGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGT_US), - NI_AVX_CompareScalar, baseType, simdSize); - } - else - { - - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse.CompareScalarNotGreaterThan")); - - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareScalarNotLessThan, baseType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, baseType, simdSize); - } - break; - } - case NI_SSE_CompareScalarNotGreaterThanOrEqual: { assert(sig->numArgs == 2); @@ -1359,17 +1309,21 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGE_US), + // These intrinsics are "special import" because the non-AVX path isn't directly + // hardware supported. Instead, they start with "swapped operands" and we fix that here. + + int comparison = HWIntrinsicInfo::lookupIval(intrinsic); + comparison = HWIntrinsicInfo::lookupFloatingComparisonForSwappedArgs(comparison); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(comparison), NI_AVX_CompareScalar, baseType, simdSize); } else { GenTree* clonedOp1 = nullptr; op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse.CompareScalarNotGreaterThanOrEqual")); + nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan")); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_CompareScalarNotLessThanOrEqual, - baseType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, baseType, simdSize); } @@ -1421,29 +1375,25 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN { case NI_SSE2_CompareGreaterThan: { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - if (baseType != TYP_DOUBLE) { + assert(sig->numArgs == 2); + op2 = impSIMDPopStack(TYP_SIMD16); + op1 = impSIMDPopStack(TYP_SIMD16); + baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_CompareGreaterThan, baseType, simdSize); + + break; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GT_OS), NI_AVX_Compare, - baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareLessThan, baseType, simdSize); - } - break; + + __fallthrough; } case NI_SSE2_CompareGreaterThanOrEqual: + case NI_SSE2_CompareNotGreaterThan: + case NI_SSE2_CompareNotGreaterThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(TYP_SIMD16); @@ -1453,13 +1403,17 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GE_OS), NI_AVX_Compare, + // These intrinsics are "special import" because the non-AVX path isn't directly + // hardware supported. Instead, they start with "swapped operands" and we fix that here. + + int comparison = HWIntrinsicInfo::lookupIval(intrinsic); + comparison = HWIntrinsicInfo::lookupFloatingComparisonForSwappedArgs(comparison); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(comparison), NI_AVX_Compare, baseType, simdSize); } else { - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareLessThanOrEqual, baseType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); } break; } @@ -1483,129 +1437,9 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN break; } - case NI_SSE2_CompareNotGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGT_US), NI_AVX_Compare, - baseType, simdSize); - } - else - { - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareNotLessThan, baseType, simdSize); - } - break; - } - - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGE_US), NI_AVX_Compare, - baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareNotLessThanOrEqual, baseType, - simdSize); - } - break; - } - case NI_SSE2_CompareScalarGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GT_OS), - NI_AVX_CompareScalar, baseType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan")); - - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareScalarLessThan, baseType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, baseType, simdSize); - } - break; - } - case NI_SSE2_CompareScalarGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_GE_OS), - NI_AVX_CompareScalar, baseType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThanOrEqual")); - - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareScalarLessThanOrEqual, baseType, - simdSize); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, baseType, simdSize); - } - break; - } - case NI_SSE2_CompareScalarNotGreaterThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGT_US), - NI_AVX_CompareScalar, baseType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarNotGreaterThan")); - - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareScalarNotLessThan, baseType, - simdSize); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, baseType, simdSize); - } - break; - } - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: { assert(sig->numArgs == 2); @@ -1616,17 +1450,21 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(_CMP_NGE_US), + // These intrinsics are "special import" because the non-AVX path isn't directly + // hardware supported. Instead, they start with "swapped operands" and we fix that here. + + int comparison = HWIntrinsicInfo::lookupIval(intrinsic); + comparison = HWIntrinsicInfo::lookupFloatingComparisonForSwappedArgs(comparison); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(comparison), NI_AVX_CompareScalar, baseType, simdSize); } else { GenTree* clonedOp1 = nullptr; op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarNotGreaterThanOrEqual")); + nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan")); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareScalarNotLessThanOrEqual, - baseType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, baseType, simdSize); }