From 7f6fc8fcf8515964899a733d38f70901336cd770 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 1 Aug 2024 17:35:46 -0400 Subject: [PATCH 1/7] Fix overshifting for RSZ folding --- src/coreclr/jit/simd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 0e10abd29fcd8..35f34e35533c7 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -668,10 +668,10 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* template TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1) { -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) if ((arg1 < 0) || (arg1 >= (sizeof(TBase) * 8))) { - // For SIMD, xarch allows overshifting and treats + // For SIMD, xarch and ARM64 allow overshifting and treat // it as zeroing. So ensure we do the same here. // // The xplat APIs ensure the shiftAmount is masked @@ -683,7 +683,7 @@ TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1) // Other platforms enforce masking in their encoding unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; arg1 &= shiftCountMask; -#endif +#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) return arg0 >> arg1; } From 227197c160d536291d58211c5166e2d578699156 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 1 Aug 2024 18:08:13 -0400 Subject: [PATCH 2/7] Add tests --- .../JitBlue/Runtime_105817/Runtime_105817.cs | 60 +++++++++++++++++++ .../Runtime_105817/Runtime_105817.csproj | 8 +++ 2 files changed, 68 insertions(+) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.csproj diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs new file mode 100644 index 0000000000000..84c38d22f8dd7 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Generated by Fuzzlyn v2.2 on 2024-08-01 14:37:47 +// Run on Arm64 MacOS +// Seed: 14773448547728333023-vectort,vector64,vector128,armadvsimd,armadvsimdarm64,armaes,armarmbase,armarmbasearm64,armcrc32,armcrc32arm64,armdp,armrdm,armrdmarm64,armsha1,armsha256 +// Reduced from 270.2 KiB to 0.4 KiB in 00:01:48 +// Debug: Outputs 0 +// Release: Outputs 1 +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +public class Runtime_105817 +{ + [Fact] + public static void TestOverShiftRightLogical() + { + if (AdvSimd.IsSupported) + { + var vr6 = Vector128.Create(1); + var vr7 = AdvSimd.ShiftRightLogical(vr6, 16); + Assert.Equal(vr7, Vector128.Zero); + } + } + + [Fact] + public static void TestOverShiftRightLogicalScalar() + { + if (AdvSimd.IsSupported) + { + var vr6 = Vector64.Create(1); + var vr7 = AdvSimd.ShiftRightLogicalScalar(vr6, 128); + Assert.Equal(vr7, Vector64.Zero); + } + } + + [Fact] + public static void TestOverShiftRightArithmetic() + { + if (AdvSimd.IsSupported) + { + var vr6 = Vector128.Create(1); + var vr7 = AdvSimd.ShiftRightArithmetic(vr6, 16); + Assert.Equal(vr7, Vector128.Zero); + } + } + + [Fact] + public static void TestOverShiftRightArithmeticScalar() + { + if (AdvSimd.IsSupported) + { + var vr6 = Vector64.Create(1); + var vr7 = AdvSimd.ShiftRightArithmeticScalar(vr6, 128); + Assert.Equal(vr7, Vector64.Zero); + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.csproj new file mode 100644 index 0000000000000..15edd99711a1a --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.csproj @@ -0,0 +1,8 @@ + + + True + + + + + \ No newline at end of file From 1c58151fc4e7d27e2122967c2cafb911c934d401 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 1 Aug 2024 18:23:13 -0400 Subject: [PATCH 3/7] Fix for RSH folding too --- src/coreclr/jit/simd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 35f34e35533c7..5f4f00249cd7c 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -683,7 +683,7 @@ TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1) // Other platforms enforce masking in their encoding unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; arg1 &= shiftCountMask; -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) +#endif return arg0 >> arg1; } @@ -817,10 +817,10 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) case GT_RSH: { -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) if ((arg1 < 0) || (arg1 >= (sizeof(TBase) * 8))) { - // For SIMD, xarch allows overshifting and treats + // For SIMD, xarch and ARM64 allow overshifting and treat // it as propagating the sign bit (returning Zero // or AllBitsSet). So ensure we do the same here. // From 1e257b8834b45cf271195145f4adc5cd31804c63 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 1 Aug 2024 18:34:59 -0400 Subject: [PATCH 4/7] Fix imms --- .../JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs index 84c38d22f8dd7..122a67b382847 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs @@ -31,7 +31,7 @@ public static void TestOverShiftRightLogicalScalar() if (AdvSimd.IsSupported) { var vr6 = Vector64.Create(1); - var vr7 = AdvSimd.ShiftRightLogicalScalar(vr6, 128); + var vr7 = AdvSimd.ShiftRightLogicalScalar(vr6, 64); Assert.Equal(vr7, Vector64.Zero); } } @@ -53,7 +53,7 @@ public static void TestOverShiftRightArithmeticScalar() if (AdvSimd.IsSupported) { var vr6 = Vector64.Create(1); - var vr7 = AdvSimd.ShiftRightArithmeticScalar(vr6, 128); + var vr7 = AdvSimd.ShiftRightArithmeticScalar(vr6, 64); Assert.Equal(vr7, Vector64.Zero); } } From a83832eabe050ffe1464a62cea36800ce64ee0a7 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 1 Aug 2024 22:36:13 -0400 Subject: [PATCH 5/7] Add fallbacks for ShiftRight APIs; use them when imm is non-const/OOB --- src/coreclr/jit/hwintrinsic.cpp | 7 +++--- src/coreclr/jit/hwintrinsicarm64.cpp | 30 ++++++++++++++++++++++++-- src/coreclr/jit/hwintrinsiclistarm64.h | 8 +++---- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 174637d5be6e2..d3545759d9c91 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1667,11 +1667,12 @@ bool Compiler::CheckHWIntrinsicImmRange(NamedIntrinsic intrinsic, switch (intrinsic) { case NI_AdvSimd_ShiftRightLogical: + case NI_AdvSimd_ShiftRightLogicalScalar: + case NI_AdvSimd_ShiftRightArithmetic: + case NI_AdvSimd_ShiftRightArithmeticScalar: *useFallback = true; break; - // TODO: Implement more AdvSimd fallbacks in Compiler::impNonConstFallback - default: assert(*useFallback == false); break; @@ -1702,7 +1703,7 @@ bool Compiler::CheckHWIntrinsicImmRange(NamedIntrinsic intrinsic, } } else -#endif // TARGET_XARCH +#endif // TARGET_X86 { *useFallback = true; return false; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 9647cc826ca08..32d533b9a7538 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -567,17 +567,43 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT switch (intrinsic) { case NI_AdvSimd_ShiftRightLogical: + case NI_AdvSimd_ShiftRightLogicalScalar: + case NI_AdvSimd_ShiftRightArithmetic: + case NI_AdvSimd_ShiftRightArithmeticScalar: { - // AdvSimd.ShiftRightLogical be replaced with AdvSimd.ShiftLogical, which takes op2 in a simd register + // AdvSimd.ShiftRight* be replaced with AdvSimd.Shift*, which takes op2 in a simd register GenTree* op2 = impPopStack().val; GenTree* op1 = impSIMDPopStack(); + NamedIntrinsic fallbackIntrinsic; + switch (intrinsic) + { + case NI_AdvSimd_ShiftRightLogical: + fallbackIntrinsic = NI_AdvSimd_ShiftLogical; + break; + + case NI_AdvSimd_ShiftRightLogicalScalar: + fallbackIntrinsic = NI_AdvSimd_ShiftLogicalScalar; + break; + + case NI_AdvSimd_ShiftRightArithmetic: + fallbackIntrinsic = NI_AdvSimd_ShiftArithmetic; + break; + + case NI_AdvSimd_ShiftRightArithmeticScalar: + fallbackIntrinsic = NI_AdvSimd_ShiftArithmeticScalar; + break; + + default: + unreached(); + } + // AdvSimd.ShiftLogical does right-shifts with negative immediates, hence the negation GenTree* tmpOp = gtNewSimdCreateBroadcastNode(simdType, gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2), simdBaseJitType, genTypeSize(simdType)); - return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, NI_AdvSimd_ShiftLogical, simdBaseJitType, + return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, fallbackIntrinsic, simdBaseJitType, genTypeSize(simdType)); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index e5c54aabfd276..24c4fc2b1879c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -451,7 +451,7 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushl, INS_ushl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, ShiftRightAndInsert, -1, 3, true, {INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightAndInsertScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sri, INS_sri, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmetic, -1, 2, true, {INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmetic, -1, 2, true, {INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticAdd, -1, 3, true, {INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticAddScalar, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ssra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateLower, 8, 2, true, {INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) @@ -466,8 +466,8 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUn HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedUpper, 16, 3, true, {INS_invalid, INS_sqrshrun2, INS_invalid, INS_sqrshrun2, INS_invalid, INS_sqrshrun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUpper, 16, 3, true, {INS_sqrshrn2, INS_invalid, INS_sqrshrn2, INS_invalid, INS_sqrshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedScalar, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticScalar, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogical, -1, 2, true, {INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticScalar, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogical, -1, 2, true, {INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalAdd, -1, 3, true, {INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalAddScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_usra, INS_usra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingLower, 8, 2, true, {INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) @@ -482,7 +482,7 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingSaturateLower HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingSaturateUpper, 16, 3, true, {INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingUpper, 16, 3, true, {INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urshr, INS_urshr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushr, INS_ushr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushr, INS_ushr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, true, {INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar) From 9c808d5545b9d7c47c7d1b50d66b5aab2dbb910c Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 1 Aug 2024 22:37:43 -0400 Subject: [PATCH 6/7] Test coverage --- .../JitBlue/Runtime_105621/Runtime_105621.cs | 120 +++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs b/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs index ee772943aef89..2713dcec04a44 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs @@ -14,19 +14,23 @@ public class Runtime_105621 { + private static byte getByteImmOOB() => 9; + private static byte getShortImmOOB() => 17; + private static byte getLongImmOOB() => 65; + [Fact] - public static void TestShiftByZero() + public static void TestShiftRightLogicalByZero() { if (AdvSimd.IsSupported) { - var vr3 = Vector64.Create(0); + var vr3 = Vector64.Create(1); var vr4 = AdvSimd.ShiftRightLogical(vr3, 0); Assert.Equal(vr3, vr4); } } [Fact] - public static void TestShiftToZero() + public static void TestShiftRightLogicalToZero() { if (AdvSimd.IsSupported) { @@ -35,4 +39,114 @@ public static void TestShiftToZero() Assert.Equal(vr4, Vector64.Zero); } } + + [Fact] + public static void TestShiftRightLogicalToZeroNonConst() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftRightLogical(vr3, getByteImmOOB()); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftRightLogicalScalarByZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(1); + var vr4 = AdvSimd.ShiftRightLogicalScalar(vr3, 0); + Assert.Equal(vr3, vr4); + } + } + + [Fact] + public static void TestShiftRightLogicalScalarToZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftRightLogicalScalar(vr3, 65); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftRightLogicalScalarToZeroNonConst() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftRightLogicalScalar(vr3, getLongImmOOB()); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftRightArithmeticByZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector128.Create(1); + var vr4 = AdvSimd.ShiftRightArithmetic(vr3, 0); + Assert.Equal(vr3, vr4); + } + } + + [Fact] + public static void TestShiftRightArithmeticToZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector128.Create(128); + var vr4 = AdvSimd.ShiftRightArithmetic(vr3, 17); + Assert.Equal(vr4, Vector128.Zero); + } + } + + [Fact] + public static void TestShiftRightArithmeticToZeroNonConst() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector128.Create(128); + var vr4 = AdvSimd.ShiftRightArithmetic(vr3, getShortImmOOB()); + Assert.Equal(vr4, Vector128.Zero); + } + } + + [Fact] + public static void TestShiftRightArithmeticScalarByZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(1); + var vr4 = AdvSimd.ShiftRightArithmeticScalar(vr3, 0); + Assert.Equal(vr3, vr4); + } + } + + [Fact] + public static void TestShiftRightArithmeticScalarToZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftRightArithmeticScalar(vr3, 65); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftRightArithmeticScalarToZeroNonConst() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftRightArithmeticScalar(vr3, getLongImmOOB()); + Assert.Equal(vr4, Vector64.Zero); + } + } } From a2cbdd3fd4d11e827483de03d60dd943757f2721 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 2 Aug 2024 12:44:13 -0400 Subject: [PATCH 7/7] Add ShiftLeft fallback + tests --- src/coreclr/jit/hwintrinsic.cpp | 2 + src/coreclr/jit/hwintrinsicarm64.cpp | 23 +++++-- src/coreclr/jit/hwintrinsiclistarm64.h | 4 +- src/coreclr/jit/simd.h | 4 +- .../JitBlue/Runtime_105621/Runtime_105621.cs | 66 +++++++++++++++++++ .../JitBlue/Runtime_105817/Runtime_105817.cs | 22 +++++++ 6 files changed, 112 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index d3545759d9c91..d574cf0118bb2 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1666,6 +1666,8 @@ bool Compiler::CheckHWIntrinsicImmRange(NamedIntrinsic intrinsic, #ifdef TARGET_ARM64 switch (intrinsic) { + case NI_AdvSimd_ShiftLeftLogical: + case NI_AdvSimd_ShiftLeftLogicalScalar: case NI_AdvSimd_ShiftRightLogical: case NI_AdvSimd_ShiftRightLogicalScalar: case NI_AdvSimd_ShiftRightArithmetic: diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 32d533b9a7538..e7bde27b11d12 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -564,25 +564,41 @@ void HWIntrinsicInfo::lookupImmBounds( // GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, CorInfoType simdBaseJitType) { + bool isRightShift = true; + switch (intrinsic) { + case NI_AdvSimd_ShiftLeftLogical: + case NI_AdvSimd_ShiftLeftLogicalScalar: + isRightShift = false; + FALLTHROUGH; + case NI_AdvSimd_ShiftRightLogical: case NI_AdvSimd_ShiftRightLogicalScalar: case NI_AdvSimd_ShiftRightArithmetic: case NI_AdvSimd_ShiftRightArithmeticScalar: { - // AdvSimd.ShiftRight* be replaced with AdvSimd.Shift*, which takes op2 in a simd register + // AdvSimd.ShiftLeft* and AdvSimd.ShiftRight* can be replaced with AdvSimd.Shift*, which takes op2 in a simd + // register GenTree* op2 = impPopStack().val; GenTree* op1 = impSIMDPopStack(); + // AdvSimd.ShiftLogical does right-shifts with negative immediates, hence the negation + if (isRightShift) + { + op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2); + } + NamedIntrinsic fallbackIntrinsic; switch (intrinsic) { + case NI_AdvSimd_ShiftLeftLogical: case NI_AdvSimd_ShiftRightLogical: fallbackIntrinsic = NI_AdvSimd_ShiftLogical; break; + case NI_AdvSimd_ShiftLeftLogicalScalar: case NI_AdvSimd_ShiftRightLogicalScalar: fallbackIntrinsic = NI_AdvSimd_ShiftLogicalScalar; break; @@ -599,10 +615,7 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT unreached(); } - // AdvSimd.ShiftLogical does right-shifts with negative immediates, hence the negation - GenTree* tmpOp = - gtNewSimdCreateBroadcastNode(simdType, gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2), - simdBaseJitType, genTypeSize(simdType)); + GenTree* tmpOp = gtNewSimdCreateBroadcastNode(simdType, op2, simdBaseJitType, genTypeSize(simdType)); return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, fallbackIntrinsic, simdBaseJitType, genTypeSize(simdType)); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 24c4fc2b1879c..3b97cab722782 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -433,12 +433,12 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticScalar, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftAndInsert, -1, 3, true, {INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftAndInsertScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sli, INS_sli, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogical, -1, 2, true, {INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogical, -1, 2, true, {INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturate, -1, 2, true, {INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateUnsigned, -1, 2, true, {INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateUnsignedScalar, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shl, INS_shl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shl, INS_shl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalWideningLower, 8, 2, true, {INS_sshll, INS_ushll, INS_sshll, INS_ushll, INS_sshll, INS_ushll, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalWideningUpper, 16, 2, true, {INS_sshll2, INS_ushll2, INS_sshll2, INS_ushll2, INS_sshll2, INS_ushll2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(AdvSimd, ShiftLogical, -1, 2, true, {INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 5f4f00249cd7c..507fbd62406de 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -760,10 +760,10 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) case GT_LSH: { -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) if ((arg1 < 0) || (arg1 >= (sizeof(TBase) * 8))) { - // For SIMD, xarch allows overshifting and treats + // For SIMD, xarch and ARM64 allow overshifting and treat // it as zeroing. So ensure we do the same here. // // The xplat APIs ensure the shiftAmount is masked diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs b/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs index 2713dcec04a44..cf06e3e71144e 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_105621/Runtime_105621.cs @@ -18,6 +18,72 @@ public class Runtime_105621 private static byte getShortImmOOB() => 17; private static byte getLongImmOOB() => 65; + [Fact] + public static void TestShiftLeftLogicalByZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(1); + var vr4 = AdvSimd.ShiftLeftLogical(vr3, 0); + Assert.Equal(vr3, vr4); + } + } + + [Fact] + public static void TestShiftLeftLogicalToZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftLeftLogical(vr3, 9); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftLeftLogicalToZeroNonConst() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftLeftLogical(vr3, getByteImmOOB()); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftLeftLogicalScalarByZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(1); + var vr4 = AdvSimd.ShiftLeftLogicalScalar(vr3, 0); + Assert.Equal(vr3, vr4); + } + } + + [Fact] + public static void TestShiftLeftLogicalScalarToZero() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftLeftLogicalScalar(vr3, 65); + Assert.Equal(vr4, Vector64.Zero); + } + } + + [Fact] + public static void TestShiftLeftLogicalScalarToZeroNonConst() + { + if (AdvSimd.IsSupported) + { + var vr3 = Vector64.Create(128); + var vr4 = AdvSimd.ShiftLeftLogicalScalar(vr3, getLongImmOOB()); + Assert.Equal(vr4, Vector64.Zero); + } + } + [Fact] public static void TestShiftRightLogicalByZero() { diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs index 122a67b382847..1b6f4b206861d 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_105817/Runtime_105817.cs @@ -14,6 +14,28 @@ public class Runtime_105817 { + [Fact] + public static void TestOverShiftLeftLogical() + { + if (AdvSimd.IsSupported) + { + var vr6 = Vector128.Create(1); + var vr7 = AdvSimd.ShiftLeftLogical(vr6, 16); + Assert.Equal(vr7, Vector128.Zero); + } + } + + [Fact] + public static void TestOverShiftLeftLogicalScalar() + { + if (AdvSimd.IsSupported) + { + var vr6 = Vector64.Create(1); + var vr7 = AdvSimd.ShiftLeftLogicalScalar(vr6, 64); + Assert.Equal(vr7, Vector64.Zero); + } + } + [Fact] public static void TestOverShiftRightLogical() {