From 6f94411cae34df61dde7f8408647728815892162 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 31 Jan 2024 16:29:39 +0000 Subject: [PATCH 01/14] JIT ARM64-SVE: Add TrueMask Change-Id: I285f8aba668409ca94e11be2489a6d9b50a4ec6e --- src/coreclr/jit/emitarm64.cpp | 28 ++++++++++++ src/coreclr/jit/emitarm64.h | 3 ++ src/coreclr/jit/hwintrinsic.h | 21 +++++++-- src/coreclr/jit/hwintrinsicarm64.cpp | 5 +++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 27 ++++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 + src/coreclr/jit/lowerarmarch.cpp | 9 ++++ src/coreclr/jit/lsraarm64.cpp | 23 +++++++++- src/coreclr/jit/targetarm64.h | 4 ++ .../System.Private.CoreLib.Shared.projitems | 1 + .../System/Runtime/Intrinsics/Arm/Enums.cs | 26 +++++++++++ .../Arm/Sve.PlatformNotSupported.cs | 15 +++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 16 +++++++ .../ref/System.Runtime.Intrinsics.cs | 23 ++++++++++ src/tests/JIT/opt/Sve/Sve_mine.cs | 44 +++++++++++++++++++ src/tests/JIT/opt/Sve/Sve_mine.csproj | 18 ++++++++ 16 files changed, 260 insertions(+), 5 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs create mode 100644 src/tests/JIT/opt/Sve/Sve_mine.cs create mode 100644 src/tests/JIT/opt/Sve/Sve_mine.csproj diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2d15e3ae1ee1bd..2da290e11be22e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -6157,6 +6157,34 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) } } +// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement. +// Asserts and returns INS_OPTS_NONE if an invalid 'elemsize' is passed +// +/*static*/ insOpts emitter::optGetSveInsOpt(emitAttr elemsize) +{ + switch (elemsize) + { + case EA_1BYTE: + return INS_OPTS_SCALABLE_B; + + case EA_2BYTE: + return INS_OPTS_SCALABLE_H; + + case EA_4BYTE: + return INS_OPTS_SCALABLE_S; + + case EA_8BYTE: + return INS_OPTS_SCALABLE_D; + + case EA_16BYTE: + return INS_OPTS_SCALABLE_Q; + + default: + assert(!"Invalid emitAttr for sve vector register"); + return INS_OPTS_NONE; + } +} + // For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement // asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed // diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 68cd25723af5bb..fd14c013d763a5 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -786,6 +786,9 @@ static emitAttr optGetDatasize(insOpts arrangement); // For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement static emitAttr optGetElemsize(insOpts arrangement); +// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement. +static insOpts optGetSveInsOpt(emitAttr elemsize); + // For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement static emitAttr optGetSveElemsize(insOpts arrangement); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 4df1aace5287a4..3088457f2744be 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -58,6 +58,7 @@ enum HWIntrinsicCategory : uint8_t HW_Category_ShiftLeftByImmediate, HW_Category_ShiftRightByImmediate, HW_Category_SIMDByIndexedElement, + HW_Category_EnumPattern, // Helper intrinsics // - do not directly correspond to a instruction, such as Vector64.AllBitsSet @@ -175,6 +176,15 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic needs consecutive registers HW_Flag_NeedsConsecutiveRegisters = 0x4000, + + // The intrinsic uses scalable registers + HW_Flag_Scalable = 0x8000, + + // Returns Per-Element Mask + // the intrinsic returns a vector containing elements that are either "all bits set" or "all bits clear" + // this output can be used as a per-element mask + HW_Flag_ReturnsPerElementMask = 0x10000, + #else #error Unsupported platform #endif @@ -669,10 +679,8 @@ struct HWIntrinsicInfo static bool ReturnsPerElementMask(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) return (flags & HW_Flag_ReturnsPerElementMask) != 0; -#elif defined(TARGET_ARM64) - unreached(); #else #error Unsupported platform #endif @@ -863,6 +871,13 @@ struct HWIntrinsicInfo const HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_HasImmediateOperand) != 0; } + + static bool IsScalable(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_Scalable) != 0; + } + #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0561ac2adadd69..8faf346803abce 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -280,6 +280,11 @@ void HWIntrinsicInfo::lookupImmBounds( immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1; break; + case NI_Sve_TrueMask: + immLowerBound = (int) SVE_PATTERN_POW2; + immUpperBound = (int) SVE_PATTERN_ALL; + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index eba1b6f33a09c4..2a08629b489778 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -265,6 +265,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) emitSize = EA_UNKNOWN; opt = INS_OPTS_NONE; } + else if (HWIntrinsicInfo::IsScalable(intrin.id)) + { + emitSize = EA_SCALABLE; + opt = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)); + } else { emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); @@ -371,6 +376,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { emitShift(intrin.op2, op1Reg); } + + } + else if (intrin.category == HW_Category_EnumPattern) + { + assert(hasImmediateOperand); + + switch (intrin.numOperands) + { + case 1: + { + HWIntrinsicImmOpHelper helper(this, intrin.op1, node); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + const insSvePattern pattern = (insSvePattern) helper.ImmValue(); + GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern); + } + }; + break; + + default: + unreached(); + } } else { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index f8263c40bb0c66..7fe506fb5fdd8e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -16,6 +16,8 @@ // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SVE Intrinsics +// Sve +HARDWARE_INTRINSIC(Sve, TrueMask, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 5c0acbbdc40115..a81e36d4b8fa2d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3188,6 +3188,15 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_TrueMask: + assert(hasImmediateOperand); + assert(varTypeIsIntegral(intrin.op1)); + if (intrin.op1->IsCnsIntOrI()) + { + MakeSrcContained(node, intrin.op1); + } + break; + default: unreached(); } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ea3bc9d7fb37e0..b77d53c90e7426 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1318,6 +1318,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou int srcCount = 0; int dstCount = 0; + regMaskTP dstCandidates = RBM_NONE; if (HWIntrinsicInfo::IsMultiReg(intrin.id)) { @@ -1430,6 +1431,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op4->isContainedIntOrIImmed()); break; + case NI_Sve_TrueMask: + needBranchTargetReg = !intrin.op1->isContainedIntOrIImmed(); + break; + default: unreached(); } @@ -1716,6 +1721,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } return srcCount; } + + else if (HWIntrinsicInfo::ReturnsPerElementMask(intrin.id)) + { + switch (intrin.id) + { + case NI_Sve_TrueMask: + dstCandidates = RBM_ALLMASK; + break; + + default: + noway_assert(!"Not a supported ReturnsPerElementMask operation"); + } + } + else if (intrin.op2 != nullptr) { // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg @@ -1770,11 +1789,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if ((dstCount == 1) || (dstCount == 2)) { - BuildDef(intrinsicTree); + BuildDef(intrinsicTree, dstCandidates); if (dstCount == 2) { - BuildDef(intrinsicTree, RBM_NONE, 1); + BuildDef(intrinsicTree, dstCandidates, 1); } } else diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 3646ecb4407bf7..71302f5febf005 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -140,6 +140,10 @@ #define REG_JUMP_THUNK_PARAM REG_R12 #define RBM_JUMP_THUNK_PARAM RBM_R12 + #define RBM_LOWMASK (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3 | RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7) + #define RBM_HIGHMASK (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15) + #define RBM_ALLMASK (RBM_LOWMASK | RBM_HIGHMASK) + // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): // On entry: diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 261968ae7f8f3d..42ee6da22ebf37 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1025,6 +1025,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs new file mode 100644 index 00000000000000..5aafb6d2c5a6a3 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.Runtime.Intrinsics.Arm +{ + public enum SveMaskPattern : byte + { + LargestPowerOf2 = 0, // The largest power of 2. + VectorCount1 = 1, // 1 element. + VectorCount2 = 2, // 2 elements. + VectorCount3 = 3, // 3 elements. + VectorCount4 = 4, // 4 elements. + VectorCount5 = 5, // 5 elements. + VectorCount6 = 6, // 6 elements. + VectorCount7 = 7, // 7 elements. + VectorCount8 = 8, // 8 elements. + VectorCount16 = 9, // 16 elements. + VectorCount32 = 10, // 32 elements. + VectorCount64 = 11, // 64 elements. + VectorCount128 = 12, // 128 elements. + VectorCount256 = 13, // 256 elements. + LargestMultipleOf4 = 29, // The largest multiple of 4. + LargestMultipleOf3 = 30, // The largest multiple of 3. + All = 31 // All available (implicitly a multiple of two). + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 3eeb40d5d9de19..f24101ae83ae2e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -30,5 +30,20 @@ internal Arm64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } + + /// TrueMask : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// svbool_t svptrue_pat_b16(enum svpattern pattern) + /// PTRUE Presult.H, pattern + /// svbool_t svptrue_pat_b32(enum svpattern pattern) + /// PTRUE Presult.S, pattern + /// svbool_t svptrue_pat_b64(enum svpattern pattern) + /// PTRUE Presult.D, pattern + /// + public static unsafe Vector TrueMask([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 7a71144e0bc33f..9fb34a3bfdfc2b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -27,5 +27,21 @@ internal Arm64() { } public static new bool IsSupported { get => IsSupported; } } + + /// TrueMask : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// svbool_t svptrue_pat_b16(enum svpattern pattern) + /// PTRUE Presult.H, pattern + /// svbool_t svptrue_pat_b32(enum svpattern pattern) + /// PTRUE Presult.S, pattern + /// svbool_t svptrue_pat_b64(enum svpattern pattern) + /// PTRUE Presult.D, pattern + /// + + public static unsafe Vector TrueMask([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => TrueMask(pattern); + } } diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 9eb01deebe0f4b..75ba8c419c7df7 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4162,7 +4162,30 @@ internal Sve() { } internal Arm64() { } public static new bool IsSupported { get { throw null; } } } + + public static System.Numerics.Vector TrueMask([ConstantExpected] System.Runtime.Intrinsics.Arm.SveMaskPattern pattern = SveMaskPattern.All) { throw null; } } + + public enum SveMaskPattern : byte + { + LargestPowerOf2 = 0, // The largest power of 2. + VectorCount1 = 1, // 1 element. + VectorCount2 = 2, // 2 elements. + VectorCount3 = 3, // 3 elements. + VectorCount4 = 4, // 4 elements. + VectorCount5 = 5, // 5 elements. + VectorCount6 = 6, // 6 elements. + VectorCount7 = 7, // 7 elements. + VectorCount8 = 8, // 8 elements. + VectorCount16 = 9, // 16 elements. + VectorCount32 = 10, // 32 elements. + VectorCount64 = 11, // 64 elements. + VectorCount128 = 12, // 128 elements. + VectorCount256 = 13, // 256 elements. + LargestMultipleOf4 = 29, // The largest multiple of 4. + LargestMultipleOf3 = 30, // The largest multiple of 3. + All = 31 // All available (implicitly a multiple of two). + }; } namespace System.Runtime.Intrinsics.X86 { diff --git a/src/tests/JIT/opt/Sve/Sve_mine.cs b/src/tests/JIT/opt/Sve/Sve_mine.cs new file mode 100644 index 00000000000000..edff0fe3d3557a --- /dev/null +++ b/src/tests/JIT/opt/Sve/Sve_mine.cs @@ -0,0 +1,44 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Globalization; +using System.Runtime.CompilerServices; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace CodeGenTests +{ + public static class Sve_mine + { + [MethodImpl(MethodImplOptions.NoInlining)] + public static Vector TrueMask(SveMaskPattern mask) + { + return Sve.TrueMask(mask); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static Vector TrueMask_VectorCount4() + { + return Sve.TrueMask(SveMaskPattern.VectorCount4); + } + + [Fact] + public static int TestEntryPoint() + { + Console.WriteLine($" Sve: {Sve.IsSupported}"); + if (!Sve.IsSupported) + { + return 0; + } + Vector mask1 = TrueMask(SveMaskPattern.VectorCount1); + Vector mask2 = TrueMask_VectorCount4(); + Console.WriteLine($"Done {mask1} {mask2}"); + + return 100; + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/opt/Sve/Sve_mine.csproj b/src/tests/JIT/opt/Sve/Sve_mine.csproj new file mode 100644 index 00000000000000..dbc3ab7f2f9596 --- /dev/null +++ b/src/tests/JIT/opt/Sve/Sve_mine.csproj @@ -0,0 +1,18 @@ + + + + true + + + None + True + + + + true + + + + + + From 864b925f42545d8b8b3a31b00ccb6a4378ce7c3f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 7 Feb 2024 12:39:05 +0000 Subject: [PATCH 02/14] LoadVector Change-Id: I3ad4fd9a8d823cb43a9546ba6356006a0907ac57 --- src/coreclr/jit/emitarm64.cpp | 6 ++ src/coreclr/jit/gentree.cpp | 5 +- src/coreclr/jit/hwintrinsic.h | 8 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + src/coreclr/jit/lsraarm64.cpp | 14 ++++ .../Arm/Sve.PlatformNotSupported.cs | 73 ++++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 74 +++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 11 +++ src/tests/JIT/opt/Sve/Sve_mine.cs | 47 ++++++++++++ src/tests/JIT/opt/Sve/Sve_mine.csproj | 1 + 10 files changed, 239 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2da290e11be22e..d21f349434c68f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -10482,6 +10482,12 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_CZ_4A; break; + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ld1w: + case INS_sve_ld1d: + return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt); + default: unreached(); break; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 444bed3606994d..b3de96fd684277 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26089,9 +26089,12 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4: - addr = Op(3); break; + + case NI_Sve_LoadVector: + addr = Op(2); + break; #endif // TARGET_ARM64 default: diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 3088457f2744be..7634dc3b2ab7c0 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -185,6 +185,9 @@ enum HWIntrinsicFlag : unsigned int // this output can be used as a per-element mask HW_Flag_ReturnsPerElementMask = 0x10000, + // The intrinsic uses a mask in arg1 to select elements present in the result + HW_Flag_MaskedOperation = 0x20000, + #else #error Unsupported platform #endif @@ -878,6 +881,11 @@ struct HWIntrinsicInfo return (flags & HW_Flag_Scalable) != 0; } + static bool IsMaskedOperation(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_MaskedOperation) != 0; + } #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 7fe506fb5fdd8e..9fb73da40341d8 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -17,6 +17,7 @@ // SVE Intrinsics // Sve +HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_MaskedOperation) HARDWARE_INTRINSIC(Sve, TrueMask, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index b77d53c90e7426..86bfea3a96f252 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1523,6 +1523,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount++; } } + else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) + { + regMaskTP srcCandidates = RBM_NONE; + switch (intrin.id) + { + case NI_Sve_LoadVector: + srcCandidates = RBM_LOWMASK; + break; + + default: + noway_assert(!"Not a supported masked operation"); + } + srcCount += BuildOperandUses(intrin.op1, srcCandidates); + } else if (intrinsicTree->OperIsMemoryLoadOrStore()) { srcCount += BuildAddrUses(intrin.op1); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index f24101ae83ae2e..bfdb14ba619e72 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -31,6 +31,79 @@ internal Arm64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } + /// LoadVector : Unextended load + + /// + /// svint8_t svld1[_s8](svbool_t pg, const int8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, sbyte* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint16_t svld1[_s16](svbool_t pg, const int16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, short* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint32_t svld1[_s32](svbool_t pg, const int32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, int* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svld1[_s64](svbool_t pg, const int64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, long* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, byte* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ushort* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, uint* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ulong* address) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, float* address) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, double* address) { throw new PlatformNotSupportedException(); } + + /// TrueMask : Set predicate elements to true /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 9fb34a3bfdfc2b..855e119e503966 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -28,6 +28,80 @@ internal Arm64() { } public static new bool IsSupported { get => IsSupported; } } + + /// LoadVector : Unextended load + + /// + /// svint8_t svld1[_s8](svbool_t pg, const int8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, sbyte* address) => LoadVector(mask, address); + + /// + /// svint16_t svld1[_s16](svbool_t pg, const int16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, short* address) => LoadVector(mask, address); + + /// + /// svint32_t svld1[_s32](svbool_t pg, const int32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, int* address) => LoadVector(mask, address); + + /// + /// svint64_t svld1[_s64](svbool_t pg, const int64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, long* address) => LoadVector(mask, address); + + /// + /// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, byte* address) => LoadVector(mask, address); + + /// + /// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ushort* address) => LoadVector(mask, address); + + /// + /// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, uint* address) => LoadVector(mask, address); + + /// + /// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ulong* address) => LoadVector(mask, address); + + /// + /// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, float* address) => LoadVector(mask, address); + + /// + /// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, double* address) => LoadVector(mask, address); + + /// TrueMask : Set predicate elements to true /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 75ba8c419c7df7..32fd4c030d80ad 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4163,6 +4163,17 @@ internal Arm64() { } public static new bool IsSupported { get { throw null; } } } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, sbyte* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, short* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, int* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, long* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, byte* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, ushort* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, uint* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, ulong* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, float* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, double* address) { throw null; } + public static System.Numerics.Vector TrueMask([ConstantExpected] System.Runtime.Intrinsics.Arm.SveMaskPattern pattern = SveMaskPattern.All) { throw null; } } diff --git a/src/tests/JIT/opt/Sve/Sve_mine.cs b/src/tests/JIT/opt/Sve/Sve_mine.cs index edff0fe3d3557a..adc6cef05f14af 100644 --- a/src/tests/JIT/opt/Sve/Sve_mine.cs +++ b/src/tests/JIT/opt/Sve/Sve_mine.cs @@ -8,12 +8,19 @@ using System.Numerics; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.InteropServices; using Xunit; namespace CodeGenTests { public static class Sve_mine { + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + [MethodImpl(MethodImplOptions.NoInlining)] public static Vector TrueMask(SveMaskPattern mask) { @@ -26,6 +33,41 @@ public static Vector TrueMask_VectorCount4() return Sve.TrueMask(SveMaskPattern.VectorCount4); } + + [MethodImpl(MethodImplOptions.NoInlining)] + public unsafe static Vector LoadVector_ImplicitMask(byte* address) + { + Vector mask = Sve.TrueMask(SveMaskPattern.All); + return Sve.LoadVector(mask, address); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public unsafe static bool do_LoadVector_ImplicitMask() + { + int elemsInVector = 16; + int OpElementCount = elemsInVector * 2; + byte[] inArray1 = new byte[OpElementCount]; + for (var i = 0; i < OpElementCount; i++) { inArray1[i] = (byte)(i+1); } + + GCHandle inHandle1; + inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned); + byte* inArray1Ptr = (byte*)Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), 128); + + Vector outVector1 = LoadVector_ImplicitMask(inArray1Ptr); + + for (var i = 0; i < elemsInVector; i++) + { + if (inArray1[i] != outVector1[i]) + { + Console.WriteLine("{0} {1} != {2}", i, inArray1[i], outVector1[i]); + // return false; + } + Console.WriteLine(outVector1[i]); + } + + return true; + } + [Fact] public static int TestEntryPoint() { @@ -38,6 +80,11 @@ public static int TestEntryPoint() Vector mask2 = TrueMask_VectorCount4(); Console.WriteLine($"Done {mask1} {mask2}"); + if(!do_LoadVector_ImplicitMask()) + { + return 0; + } + return 100; } } diff --git a/src/tests/JIT/opt/Sve/Sve_mine.csproj b/src/tests/JIT/opt/Sve/Sve_mine.csproj index dbc3ab7f2f9596..cbeb4a6075e9cb 100644 --- a/src/tests/JIT/opt/Sve/Sve_mine.csproj +++ b/src/tests/JIT/opt/Sve/Sve_mine.csproj @@ -6,6 +6,7 @@ None True + True From c2031ca59104127afaf39cc23e416e35a557fc76 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 19 Feb 2024 11:44:46 +0000 Subject: [PATCH 03/14] Add SveLoadUnOpMaskedTest.template --- .../GenerateHWIntrinsicTests_Arm.cs | 37 +-- .../Arm/Shared/SveLoadMaskedUnOpTest.template | 214 ++++++++++++++++++ src/tests/JIT/opt/Sve/Sve_mine.cs | 91 -------- src/tests/JIT/opt/Sve/Sve_mine.csproj | 19 -- 4 files changed, 237 insertions(+), 124 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template delete mode 100644 src/tests/JIT/opt/Sve/Sve_mine.cs delete mode 100644 src/tests/JIT/opt/Sve/Sve_mine.csproj diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 48761b8619c353..a862635226d735 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -2893,9 +2893,18 @@ ("SecureHashTernOpTest.template", new Dictionary { ["TestName"] = "ScheduleUpdate1_Vector128_UInt32", ["Isa"] = "Sha256", ["LoadIsa"] = "AdvSimd", ["Method"] = "ScheduleUpdate1", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "0x00112233", ["NextValueOp2"] = "0x44556677", ["NextValueOp3"] = "0x8899AABB", ["ExpectedResult"] = "{0x248F1BDF, 0x248F1BDF, 0xB303DDBA, 0xF74821FE}"}), }; -(string templateFileName, Dictionary templateData)[] SveInputs = Array.Empty<(string templateFileName, Dictionary templateData)>(); +(string templateFileName, Dictionary templateData)[] SveInputs = new [] { - //TODO-SVE: Add SVE tests + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "float", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "float", ["Op2BaseType"] = "float", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "double", ["Op2BaseType"] = "double", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_sbyte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "sbyte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "sbyte", ["Op2BaseType"] = "sbyte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_short", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "short", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "short", ["Op2BaseType"] = "short", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_int", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "int", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "int", ["Op2BaseType"] = "int", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_long", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "long", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "long", ["Op2BaseType"] = "long", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_byte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "byte", ["Op2BaseType"] = "byte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ushort", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "ushort", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "ushort", ["Op2BaseType"] = "ushort", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_uint", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "uint", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "uint", ["Op2BaseType"] = "uint", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ulong", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "ulong", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "ulong", ["Op2BaseType"] = "ulong", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), }; @@ -2904,18 +2913,18 @@ string outputDirectory = args[2]; string testListFileName = args[3]; -ProcessInputs("AdvSimd", AdvSimdInputs); -ProcessInputs("AdvSimd.Arm64", AdvSimd_Arm64Inputs); -ProcessInputs("Aes", AesInputs); -ProcessInputs("ArmBase", ArmBaseInputs); -ProcessInputs("ArmBase.Arm64", ArmBase_Arm64Inputs); -ProcessInputs("Crc32", Crc32Inputs); -ProcessInputs("Crc32.Arm64", Crc32_Arm64Inputs); -ProcessInputs("Dp", DpInputs); -ProcessInputs("Rdm", RdmInputs); -ProcessInputs("Rdm.Arm64", Rdm_Arm64Inputs); -ProcessInputs("Sha1", Sha1Inputs); -ProcessInputs("Sha256", Sha256Inputs); +// ProcessInputs("AdvSimd", AdvSimdInputs); +// ProcessInputs("AdvSimd.Arm64", AdvSimd_Arm64Inputs); +// ProcessInputs("Aes", AesInputs); +// ProcessInputs("ArmBase", ArmBaseInputs); +// ProcessInputs("ArmBase.Arm64", ArmBase_Arm64Inputs); +// ProcessInputs("Crc32", Crc32Inputs); +// ProcessInputs("Crc32.Arm64", Crc32_Arm64Inputs); +// ProcessInputs("Dp", DpInputs); +// ProcessInputs("Rdm", RdmInputs); +// ProcessInputs("Rdm.Arm64", Rdm_Arm64Inputs); +// ProcessInputs("Sha1", Sha1Inputs); +// ProcessInputs("Sha256", Sha256Inputs); ProcessInputs("Sve", SveInputs); void ProcessInputs(string groupName, (string templateFileName, Dictionary templateData)[] inputs) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template new file mode 100644 index 00000000000000..a1ce9bc8cdc2a5 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template @@ -0,0 +1,214 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new LoadUnaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works + test.RunBasicScenario_Load(); + + // Validates calling via reflection works + // TODO-SVE: Enable once register allocation exists for predicates. + // test.RunReflectionScenario_Load(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class LoadUnaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray1, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op2ElementCount = Unsafe.SizeOf<{RetVectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data = new {Op2BaseType}[Op2ElementCount]; + + private DataTable _dataTable; + + public LoadUnaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + //TODO-SVE: Once register allocation exists for predicates, move loadMask into DataTable + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.TrueMask(SveMaskPattern.All); + + var result = {Isa}.{Method}( + loadMask, + ({Op2BaseType}*)(_dataTable.inArray1Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.TrueMask(SveMaskPattern.All); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof(Vector<{Op2BaseType}>), typeof({Op2BaseType}*) }) + .Invoke(null, new object[] { + loadMask, + Pointer.Box(_dataTable.inArray1Ptr, typeof({Op2BaseType}*)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + Succeeded = false; + + try + { + RunBasicScenario_Load(); + } + catch (PlatformNotSupportedException) + { + Succeeded = true; + } + } + + // private void ValidateResult({Op2VectorType}<{Op2BaseType}> firstOp, void* result, [CallerMemberName] string method = "") + // { + // {Op2BaseType}[] inArray = new {Op2BaseType}[Op2ElementCount]; + // {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + // Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray[0]), firstOp); + // Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + // ValidateResult(inArray, outArray, method); + // } + + private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef(firstOp), (uint)Unsafe.SizeOf<{RetVectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + for (var i = 0; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2BaseType}): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/opt/Sve/Sve_mine.cs b/src/tests/JIT/opt/Sve/Sve_mine.cs deleted file mode 100644 index adc6cef05f14af..00000000000000 --- a/src/tests/JIT/opt/Sve/Sve_mine.cs +++ /dev/null @@ -1,91 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.Globalization; -using System.Runtime.CompilerServices; -using System.Numerics; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.InteropServices; -using Xunit; - -namespace CodeGenTests -{ - public static class Sve_mine - { - - private static unsafe void* Align(byte* buffer, ulong expectedAlignment) - { - return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public static Vector TrueMask(SveMaskPattern mask) - { - return Sve.TrueMask(mask); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public static Vector TrueMask_VectorCount4() - { - return Sve.TrueMask(SveMaskPattern.VectorCount4); - } - - - [MethodImpl(MethodImplOptions.NoInlining)] - public unsafe static Vector LoadVector_ImplicitMask(byte* address) - { - Vector mask = Sve.TrueMask(SveMaskPattern.All); - return Sve.LoadVector(mask, address); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public unsafe static bool do_LoadVector_ImplicitMask() - { - int elemsInVector = 16; - int OpElementCount = elemsInVector * 2; - byte[] inArray1 = new byte[OpElementCount]; - for (var i = 0; i < OpElementCount; i++) { inArray1[i] = (byte)(i+1); } - - GCHandle inHandle1; - inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned); - byte* inArray1Ptr = (byte*)Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), 128); - - Vector outVector1 = LoadVector_ImplicitMask(inArray1Ptr); - - for (var i = 0; i < elemsInVector; i++) - { - if (inArray1[i] != outVector1[i]) - { - Console.WriteLine("{0} {1} != {2}", i, inArray1[i], outVector1[i]); - // return false; - } - Console.WriteLine(outVector1[i]); - } - - return true; - } - - [Fact] - public static int TestEntryPoint() - { - Console.WriteLine($" Sve: {Sve.IsSupported}"); - if (!Sve.IsSupported) - { - return 0; - } - Vector mask1 = TrueMask(SveMaskPattern.VectorCount1); - Vector mask2 = TrueMask_VectorCount4(); - Console.WriteLine($"Done {mask1} {mask2}"); - - if(!do_LoadVector_ImplicitMask()) - { - return 0; - } - - return 100; - } - } -} \ No newline at end of file diff --git a/src/tests/JIT/opt/Sve/Sve_mine.csproj b/src/tests/JIT/opt/Sve/Sve_mine.csproj deleted file mode 100644 index cbeb4a6075e9cb..00000000000000 --- a/src/tests/JIT/opt/Sve/Sve_mine.csproj +++ /dev/null @@ -1,19 +0,0 @@ - - - - true - - - None - True - True - - - - true - - - - - - From 83194f35ad5d13cb57e67a7a21866467746e8dad Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 21 Feb 2024 10:10:20 +0000 Subject: [PATCH 04/14] Add CreateTrueMaskByte etc --- src/coreclr/jit/hwintrinsicarm64.cpp | 11 +- src/coreclr/jit/hwintrinsiclistarm64sve.h | 12 +- src/coreclr/jit/lowerarmarch.cpp | 11 +- src/coreclr/jit/lsraarm64.cpp | 22 +++- .../Arm/Sve.PlatformNotSupported.cs | 105 ++++++++++++++--- .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 107 +++++++++++++++--- .../ref/System.Runtime.Intrinsics.cs | 12 +- 7 files changed, 244 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 8faf346803abce..ddada225c81f89 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -280,7 +280,16 @@ void HWIntrinsicInfo::lookupImmBounds( immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1; break; - case NI_Sve_TrueMask: + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: immLowerBound = (int) SVE_PATTERN_POW2; immUpperBound = (int) SVE_PATTERN_ALL; break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9fb73da40341d8..93b7425dc1b29d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -17,8 +17,18 @@ // SVE Intrinsics // Sve +HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, false, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) + HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_MaskedOperation) -HARDWARE_INTRINSIC(Sve, TrueMask, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index a81e36d4b8fa2d..dac552fdd39481 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3188,7 +3188,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Sve_TrueMask: + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: assert(hasImmediateOperand); assert(varTypeIsIntegral(intrin.op1)); if (intrin.op1->IsCnsIntOrI()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 86bfea3a96f252..af93ea7e5cda66 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1431,7 +1431,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op4->isContainedIntOrIImmed()); break; - case NI_Sve_TrueMask: + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: needBranchTargetReg = !intrin.op1->isContainedIntOrIImmed(); break; @@ -1740,7 +1749,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { switch (intrin.id) { - case NI_Sve_TrueMask: + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: dstCandidates = RBM_ALLMASK; break; diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index bfdb14ba619e72..fbd5ee65ca748f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -31,6 +31,97 @@ internal Arm64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } + /// CreateTrueMaskByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskDouble : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskSByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskSingle : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskUInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b16(enum svpattern pattern) + /// PTRUE Presult.H, pattern + /// + public static unsafe Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskUInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b32(enum svpattern pattern) + /// PTRUE Presult.S, pattern + /// + public static unsafe Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskUInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b64(enum svpattern pattern) + /// PTRUE Presult.D, pattern + /// + public static unsafe Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// LoadVector : Unextended load /// @@ -104,19 +195,5 @@ internal Arm64() { } public static unsafe Vector LoadVector(Vector mask, double* address) { throw new PlatformNotSupportedException(); } - /// TrueMask : Set predicate elements to true - - /// - /// svbool_t svptrue_pat_b8(enum svpattern pattern) - /// PTRUE Presult.B, pattern - /// svbool_t svptrue_pat_b16(enum svpattern pattern) - /// PTRUE Presult.H, pattern - /// svbool_t svptrue_pat_b32(enum svpattern pattern) - /// PTRUE Presult.S, pattern - /// svbool_t svptrue_pat_b64(enum svpattern pattern) - /// PTRUE Presult.D, pattern - /// - public static unsafe Vector TrueMask([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } - } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 855e119e503966..6ba2a2c67bc8a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -29,6 +29,97 @@ internal Arm64() { } } + /// CreateTrueMaskByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskByte(pattern); + + + /// CreateTrueMaskDouble : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskDouble(pattern); + + + /// CreateTrueMaskInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt16(pattern); + + + /// CreateTrueMaskInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt32(pattern); + + + /// CreateTrueMaskInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt64(pattern); + + + /// CreateTrueMaskSByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskSByte(pattern); + + + /// CreateTrueMaskSingle : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskSingle(pattern); + + + /// CreateTrueMaskUInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b16(enum svpattern pattern) + /// PTRUE Presult.H, pattern + /// + public static unsafe Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt16(pattern); + + + /// CreateTrueMaskUInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b32(enum svpattern pattern) + /// PTRUE Presult.S, pattern + /// + public static unsafe Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt32(pattern); + + + /// CreateTrueMaskUInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b64(enum svpattern pattern) + /// PTRUE Presult.D, pattern + /// + public static unsafe Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt64(pattern); + + + /// LoadVector : Unextended load /// @@ -101,21 +192,5 @@ internal Arm64() { } /// public static unsafe Vector LoadVector(Vector mask, double* address) => LoadVector(mask, address); - - /// TrueMask : Set predicate elements to true - - /// - /// svbool_t svptrue_pat_b8(enum svpattern pattern) - /// PTRUE Presult.B, pattern - /// svbool_t svptrue_pat_b16(enum svpattern pattern) - /// PTRUE Presult.H, pattern - /// svbool_t svptrue_pat_b32(enum svpattern pattern) - /// PTRUE Presult.S, pattern - /// svbool_t svptrue_pat_b64(enum svpattern pattern) - /// PTRUE Presult.D, pattern - /// - - public static unsafe Vector TrueMask([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => TrueMask(pattern); - } } diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 32fd4c030d80ad..7ff2b3268e2ca9 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4163,6 +4163,17 @@ internal Arm64() { } public static new bool IsSupported { get { throw null; } } } + public static System.Numerics.Vector CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, sbyte* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, short* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, int* address) { throw null; } @@ -4174,7 +4185,6 @@ internal Arm64() { } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, float* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, double* address) { throw null; } - public static System.Numerics.Vector TrueMask([ConstantExpected] System.Runtime.Intrinsics.Arm.SveMaskPattern pattern = SveMaskPattern.All) { throw null; } } public enum SveMaskPattern : byte From 1c66d456310ccff7420025b2b49d84525f8b7e10 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 21 Feb 2024 11:27:43 +0000 Subject: [PATCH 05/14] Fix up tests --- .../GenerateHWIntrinsicTests_Arm.cs | 44 +++++++++---------- .../Arm/Shared/SveLoadMaskedUnOpTest.template | 4 +- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index a862635226d735..0dad1df16655ea 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -2895,16 +2895,16 @@ (string templateFileName, Dictionary templateData)[] SveInputs = new [] { - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "float", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "float", ["Op2BaseType"] = "float", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "double", ["Op2BaseType"] = "double", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_sbyte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "sbyte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "sbyte", ["Op2BaseType"] = "sbyte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_short", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "short", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "short", ["Op2BaseType"] = "short", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_int", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "int", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "int", ["Op2BaseType"] = "int", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_long", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "long", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "long", ["Op2BaseType"] = "long", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_byte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "byte", ["Op2BaseType"] = "byte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ushort", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "ushort", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "ushort", ["Op2BaseType"] = "ushort", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_uint", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "uint", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "uint", ["Op2BaseType"] = "uint", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), - ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ulong", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "ulong", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "ulong", ["Op2BaseType"] = "ulong", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_sbyte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_short", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_int", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_long", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_byte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ushort", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_uint", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ulong", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), }; @@ -2913,18 +2913,18 @@ string outputDirectory = args[2]; string testListFileName = args[3]; -// ProcessInputs("AdvSimd", AdvSimdInputs); -// ProcessInputs("AdvSimd.Arm64", AdvSimd_Arm64Inputs); -// ProcessInputs("Aes", AesInputs); -// ProcessInputs("ArmBase", ArmBaseInputs); -// ProcessInputs("ArmBase.Arm64", ArmBase_Arm64Inputs); -// ProcessInputs("Crc32", Crc32Inputs); -// ProcessInputs("Crc32.Arm64", Crc32_Arm64Inputs); -// ProcessInputs("Dp", DpInputs); -// ProcessInputs("Rdm", RdmInputs); -// ProcessInputs("Rdm.Arm64", Rdm_Arm64Inputs); -// ProcessInputs("Sha1", Sha1Inputs); -// ProcessInputs("Sha256", Sha256Inputs); +ProcessInputs("AdvSimd", AdvSimdInputs); +ProcessInputs("AdvSimd.Arm64", AdvSimd_Arm64Inputs); +ProcessInputs("Aes", AesInputs); +ProcessInputs("ArmBase", ArmBaseInputs); +ProcessInputs("ArmBase.Arm64", ArmBase_Arm64Inputs); +ProcessInputs("Crc32", Crc32Inputs); +ProcessInputs("Crc32.Arm64", Crc32_Arm64Inputs); +ProcessInputs("Dp", DpInputs); +ProcessInputs("Rdm", RdmInputs); +ProcessInputs("Rdm.Arm64", Rdm_Arm64Inputs); +ProcessInputs("Sha1", Sha1Inputs); +ProcessInputs("Sha256", Sha256Inputs); ProcessInputs("Sve", SveInputs); void ProcessInputs(string groupName, (string templateFileName, Dictionary templateData)[] inputs) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template index a1ce9bc8cdc2a5..a8320f60ded514 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template @@ -122,7 +122,7 @@ namespace JIT.HardwareIntrinsics.Arm TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); //TODO-SVE: Once register allocation exists for predicates, move loadMask into DataTable - {Op1VectorType}<{Op1BaseType}> loadMask = Sve.TrueMask(SveMaskPattern.All); + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); var result = {Isa}.{Method}( loadMask, @@ -137,7 +137,7 @@ namespace JIT.HardwareIntrinsics.Arm { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); - {Op1VectorType}<{Op1BaseType}> loadMask = Sve.TrueMask(SveMaskPattern.All); + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof(Vector<{Op2BaseType}>), typeof({Op2BaseType}*) }) .Invoke(null, new object[] { From fe091289ea05d6cef59393f906fe2051385cd078 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 21 Feb 2024 11:46:43 +0000 Subject: [PATCH 06/14] Remove commented code --- .../Arm/Shared/SveLoadMaskedUnOpTest.template | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template index a8320f60ded514..09aaf2f442e136 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template @@ -165,17 +165,6 @@ namespace JIT.HardwareIntrinsics.Arm } } - // private void ValidateResult({Op2VectorType}<{Op2BaseType}> firstOp, void* result, [CallerMemberName] string method = "") - // { - // {Op2BaseType}[] inArray = new {Op2BaseType}[Op2ElementCount]; - // {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; - - // Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray[0]), firstOp); - // Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - - // ValidateResult(inArray, outArray, method); - // } - private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "") { {Op2BaseType}[] inArray = new {Op2BaseType}[Op2ElementCount]; From 941db03b28e5a451bc0297ed86596723915bc583 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 21 Feb 2024 17:47:47 +0000 Subject: [PATCH 07/14] Explain SveMaskPattern --- .../System/Runtime/Intrinsics/Arm/Enums.cs | 95 ++++++++++++++++--- 1 file changed, 82 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs index 5aafb6d2c5a6a3..868300bf14acaa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs @@ -3,24 +3,93 @@ namespace System.Runtime.Intrinsics.Arm { + // Used to specify or limit the number of elements used within an method. + // Matches the field "pattern" within the Arm Architecture Reference Manual public enum SveMaskPattern : byte { + /// + /// POW2 + /// LargestPowerOf2 = 0, // The largest power of 2. - VectorCount1 = 1, // 1 element. - VectorCount2 = 2, // 2 elements. - VectorCount3 = 3, // 3 elements. - VectorCount4 = 4, // 4 elements. - VectorCount5 = 5, // 5 elements. - VectorCount6 = 6, // 6 elements. - VectorCount7 = 7, // 7 elements. - VectorCount8 = 8, // 8 elements. - VectorCount16 = 9, // 16 elements. - VectorCount32 = 10, // 32 elements. - VectorCount64 = 11, // 64 elements. - VectorCount128 = 12, // 128 elements. - VectorCount256 = 13, // 256 elements. + + /// + /// VL1 + /// + VectorCount1 = 1, // Exactly 1 element. + + /// + /// VL2 + /// + VectorCount2 = 2, // Exactly 2 elements. + + /// + /// VL3 + /// + VectorCount3 = 3, // Exactly 3 elements. + + /// + /// VL4 + /// + VectorCount4 = 4, // Exactly 4 elements. + + /// + /// VL5 + /// + VectorCount5 = 5, // Exactly 5 elements. + + /// + /// VL6 + /// + VectorCount6 = 6, // Exactly 6 elements. + + /// + /// VL7 + /// + VectorCount7 = 7, // Exactly 7 elements. + + /// + /// VL8 + /// + VectorCount8 = 8, // Exactly 8 elements. + + /// + /// VL16 + /// + VectorCount16 = 9, // Exactly 16 elements. + + /// + /// VL32 + /// + VectorCount32 = 10, // Exactly 32 elements. + + /// + /// VL64 + /// + VectorCount64 = 11, // Exactly 64 elements. + + /// + /// VL128 + /// + VectorCount128 = 12, // Exactly 128 elements. + + /// + /// VL256 + /// + VectorCount256 = 13, // Exactly 256 elements. + + /// + /// MUL4 + /// LargestMultipleOf4 = 29, // The largest multiple of 4. + + /// + /// MUL3 + /// LargestMultipleOf3 = 30, // The largest multiple of 3. + + /// + /// ALL + /// All = 31 // All available (implicitly a multiple of two). } } From 5dc72346bf88ca369b9de93d56aa97098d4bc6ea Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Feb 2024 14:25:51 +0000 Subject: [PATCH 08/14] ARM64-SVE: Implement IF_SVE_BV_2A --- src/coreclr/jit/codegenarm64test.cpp | 40 +++++++++++ src/coreclr/jit/emitarm64.cpp | 102 ++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.h | 6 ++ 3 files changed, 137 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3844d96a7fa077..71d03d62ad6e40 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5365,6 +5365,46 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q, INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 .Q, .Q, .Q + // IF_SVE_BV_2A + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V5, REG_P15, 27, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V31, REG_P0, -128, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P5, 127, + INS_OPTS_SCALABLE_B); // MOV ., /Z, #{, } + + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, + INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V23, REG_P12, 10, + INS_OPTS_SCALABLE_S); // MOV ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V4, REG_P0, -128, + INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V19, REG_P15, 127, + INS_OPTS_SCALABLE_H); // MOV ., /Z, #{, } + + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 256, + INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 3072, + INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -3072, + INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -32768, + INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_P0, REG_V0, 32512, + INS_OPTS_SCALABLE_D); // MOV ., /Z, #{, } + + // IF_SVE_BV_2A_A + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P12, 5, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // CPY ., /M, #{, } + + // IF_SVE_BV_2A_J + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, 5632, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, -5632, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } + // IF_SVE_BZ_3A theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TBL ., {.}, . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0182532a1b1472..ae08a330557a2e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1127,6 +1127,18 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + // Size specifier must be able to fit left-shifted immediate + assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift()); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements elemsize = id->idOpSize(); assert(insOptsScalableStandard(id->idInsOpt())); @@ -9182,16 +9194,18 @@ void emitter::emitIns_R_R_I(instruction ins, insOpts opt /* = INS_OPTS_NONE */, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool isLdSt = false; - bool isLdrStr = false; - bool isSIMD = false; - bool isAddSub = false; - bool setFlags = false; - unsigned scale = 0; - bool unscaledOp = false; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool isLdSt = false; + bool isLdrStr = false; + bool isSIMD = false; + bool isAddSub = false; + bool setFlags = false; + unsigned scale = 0; + bool unscaledOp = false; + bool optionalShift = false; + bool hasShift = false; /* Figure out the encoding format of the instruction */ switch (ins) @@ -9774,6 +9788,31 @@ void emitter::emitIns_R_R_I(instruction ins, } break; + case INS_sve_mov: + case INS_sve_cpy: + optionalShift = true; + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // DDDDD + assert(isPredicateRegister(reg2)); // GGGG + if (!isValidSimm8(imm)) + { + assert(isValidSimm8_MultipleOf256(imm)); + assert(insOptsScalableAtLeastHalf(opt)); + hasShift = true; + imm = imm / 256; + } + if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE) + { + fmt = IF_SVE_BV_2A_J; + } + else + { + fmt = IF_SVE_BV_2A; + } + // MOV is an alias for CPY, and is always the preferred disassembly. + ins = INS_sve_mov; + break; + case INS_sve_sqrshrn: case INS_sve_sqrshrun: case INS_sve_uqrshrn: @@ -10100,7 +10139,18 @@ void emitter::emitIns_R_R_I(instruction ins, assert(fmt != IF_NONE); - instrDesc* id = emitNewInstrSC(attr, imm); + instrDesc* id; + + if (!optionalShift) + { + id = emitNewInstrSC(attr, imm); + } + else + { + // Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state + id = emitNewInstrCns(attr, imm); + id->idOptionalShift(hasShift); + } id->idIns(ins); id->idInsFmt(fmt); @@ -23612,6 +23662,19 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_19_to_16(id->idReg2()); // gggg + code |= insEncodeImm8_12_to_5(imm); // iiiiiiii + code |= (id->idOptionalShift() ? 0x2000 : 0); // h + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -28901,6 +28964,16 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); break; + // ., /Z, #{, } + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg + emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -31337,6 +31410,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 07b8690826fc31..158564028b00b0 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -860,6 +860,12 @@ static bool isValidSimm8(ssize_t value) return (-0x80 <= value) && (value <= 0x7F); }; +// Returns true if 'value' is a legal signed multiple of 256 immediate 8 bit encoding (such as for MOV). +static bool isValidSimm8_MultipleOf256(ssize_t value) +{ + return (-0x8000 <= value) && (value <= 0x7f00) && (value % 256 == 0); +}; + // Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN). static bool isValidUimm12(ssize_t value) { From 5a2e84e10d35e06d15fa691ba52abdef2166b406 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 23 Feb 2024 13:20:13 +0000 Subject: [PATCH 09/14] Create vector to/from mask nodes in intrinsic generation --- src/coreclr/jit/compiler.h | 5 ++ src/coreclr/jit/hwintrinsic.cpp | 49 ++++++++--- src/coreclr/jit/hwintrinsic.h | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 91 ++++++++++++++++++++- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 47 ++++++++--- src/coreclr/jit/hwintrinsiclistarm64sve.h | 22 +++++ src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsra.h | 8 +- src/coreclr/jit/lsraarm64.cpp | 40 +++------ src/coreclr/jit/targetarm64.h | 4 + src/coreclr/jit/typelist.h | 4 +- src/coreclr/jit/vartype.h | 2 +- 12 files changed, 216 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 25607e21438d6c..deebcb3a4dcb90 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4543,6 +4543,11 @@ class Compiler NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immLowerBound, int immUpperBound); GenTree* addRangeCheckForHWIntrinsic(GenTree* immOp, int immLowerBound, int immUpperBound); +#if defined(TARGET_ARM64) + GenTree* convertHWIntrinsicToMask(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type); +#endif + #endif // FEATURE_HW_INTRINSICS GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index f771a9ec978e2d..14c262524da2d8 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1356,6 +1356,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, compFloatingPointUsed = true; } + var_types nodeRetType = retType; +#if defined(TARGET_ARM64) + if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)) + { + // Ensure the result is generated to a mask. + nodeRetType = TYP_MASK; + } +#endif // defined(TARGET_ARM64) + // table-driven importer of simple intrinsics if (impIsTableDrivenHWIntrinsic(intrinsic, category)) { @@ -1392,7 +1401,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case 0: { assert(!isScalar); - retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(nodeRetType, intrinsic, simdBaseJitType, simdSize); break; } @@ -1410,8 +1419,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } - retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); + retNode = isScalar ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, intrinsic) + : gtNewSimdHWIntrinsicNode(nodeRetType, op1, intrinsic, simdBaseJitType, simdSize); #if defined(TARGET_XARCH) switch (intrinsic) @@ -1462,8 +1471,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); - retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + retNode = isScalar + ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, intrinsic) + : gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize); #ifdef TARGET_XARCH if ((intrinsic == NI_SSE42_Crc32) || (intrinsic == NI_SSE42_X64_Crc32)) @@ -1543,9 +1553,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand, immLowerBound, immUpperBound); } - retNode = isScalar - ? gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + retNode = isScalar ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic) + : gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, + simdSize); #ifdef TARGET_XARCH if ((intrinsic == NI_AVX2_GatherVector128) || (intrinsic == NI_AVX2_GatherVector256)) @@ -1566,7 +1576,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); assert(!isScalar); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); break; } @@ -1576,8 +1587,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } else { - retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, retType, simdSize); + retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, nodeRetType, simdSize); + } + +#if defined(TARGET_ARM64) + if (HWIntrinsicInfo::IsMaskedOperation(intrinsic)) + { + // Op1 input is a vector. HWInstrinsic requires a mask, so convert to a mask. + assert(numArgs > 0); + GenTree* op1 = retNode->AsHWIntrinsic()->Op(1); + op1 = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(1) = op1; + } + + if (retType != nodeRetType) + { + // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. + assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); + retNode = convertHWIntrinsicFromMask(retNode->AsHWIntrinsic(), retType); } +#endif // defined(TARGET_ARM64) if ((retNode != nullptr) && retNode->OperIs(GT_HWINTRINSIC)) { diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 3bd05aa5ad2f96..f432ec9fe3e8d2 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -930,7 +930,7 @@ struct HWIntrinsic final InitializeBaseType(node); } - bool IsTableDriven() const + bool codeGenIsTableDriven() const { // TODO-Arm64-Cleanup - make more categories to the table-driven framework bool isTableDrivenCategory = category != HW_Category_Helper; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index ddada225c81f89..b85cdc1bcd0e8b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -290,8 +290,8 @@ void HWIntrinsicInfo::lookupImmBounds( case NI_Sve_CreateTrueMaskUInt16: case NI_Sve_CreateTrueMaskUInt32: case NI_Sve_CreateTrueMaskUInt64: - immLowerBound = (int) SVE_PATTERN_POW2; - immUpperBound = (int) SVE_PATTERN_ALL; + immLowerBound = (int)SVE_PATTERN_POW2; + immUpperBound = (int)SVE_PATTERN_ALL; break; default: @@ -2193,6 +2193,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } + default: { return nullptr; @@ -2202,4 +2203,90 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return retNode; } +//------------------------------------------------------------------------ +// convertHWIntrinsicFromMask: Convert a HW instrinsic vector node to a mask +// +// Arguments: +// node -- The node to convert +// simdBaseJitType -- the base jit type of the converted node +// simdSize -- the simd size of the converted node +// +// Return Value: +// The node converted to the a mask type +// +GenTree* Compiler::convertHWIntrinsicToMask(var_types type, + GenTree* node, + CorInfoType simdBaseJitType, + unsigned simdSize) +{ + // ConvertVectorToMask uses cmpne which requires an embedded mask. + // TODO-SVE: Refactor this out once full embedded masking is adding. + NamedIntrinsic maskName; + switch (simdBaseJitType) + { + case CORINFO_TYPE_UBYTE: + maskName = NI_Sve_CreateTrueMaskAllByte; + break; + + case CORINFO_TYPE_DOUBLE: + maskName = NI_Sve_CreateTrueMaskAllDouble; + break; + + case CORINFO_TYPE_SHORT: + maskName = NI_Sve_CreateTrueMaskAllInt16; + break; + + case CORINFO_TYPE_INT: + maskName = NI_Sve_CreateTrueMaskAllInt32; + break; + + case CORINFO_TYPE_LONG: + maskName = NI_Sve_CreateTrueMaskAllInt64; + break; + + case CORINFO_TYPE_BYTE: + maskName = NI_Sve_CreateTrueMaskAllSByte; + break; + + case CORINFO_TYPE_FLOAT: + maskName = NI_Sve_CreateTrueMaskAllSingle; + break; + + case CORINFO_TYPE_USHORT: + maskName = NI_Sve_CreateTrueMaskAllUInt16; + break; + + case CORINFO_TYPE_UINT: + maskName = NI_Sve_CreateTrueMaskAllUInt32; + break; + + case CORINFO_TYPE_ULONG: + maskName = NI_Sve_CreateTrueMaskAllUInt64; + break; + + default: + unreached(); + } + GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, maskName, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, + simdSize); +} + +//------------------------------------------------------------------------ +// convertHWIntrinsicFromMask: Convert a HW instrinsic mask node to a vector +// +// Arguments: +// node -- The node to convert +// type -- The type of the node to convert to +// +// Return Value: +// The node converted to the given type +// +GenTree* Compiler::convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type) +{ + assert(node->TypeGet() == TYP_MASK); + return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(), + node->GetSimdSize()); +} + #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 2a08629b489778..24bf3167866691 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -268,7 +268,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) else if (HWIntrinsicInfo::IsScalable(intrin.id)) { emitSize = EA_SCALABLE; - opt = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)); + opt = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)); } else { @@ -281,7 +281,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genConsumeMultiOpOperands(node); - if (intrin.IsTableDriven()) + if (intrin.codeGenIsTableDriven()) { const instruction ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType); assert(ins != INS_invalid); @@ -376,7 +376,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { emitShift(intrin.op2, op1Reg); } - } else if (intrin.category == HW_Category_EnumPattern) { @@ -385,15 +384,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrin.numOperands) { case 1: + { + HWIntrinsicImmOpHelper helper(this, intrin.op1, node); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { - HWIntrinsicImmOpHelper helper(this, intrin.op1, node); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - const insSvePattern pattern = (insSvePattern) helper.ImmValue(); - GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern); - } - }; - break; + const insSvePattern pattern = (insSvePattern)helper.ImmValue(); + GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern); + } + }; + break; default: unreached(); @@ -1281,6 +1280,32 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg); break; + case NI_Sve_ConvertMaskToVector: + // PMOV would be ideal here, but it is in SVE2.1. + // Instead, use a predicated move: MOV ., /Z, #1 + GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 1, opt); + break; + + case NI_Sve_ConvertVectorToMask: + // PMOV would be ideal here, but it is in SVE2.1. + // Instead, use a compare: CMPNE ., /Z, ., #0 + GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt); + break; + + case NI_Sve_CreateTrueMaskAllByte: + case NI_Sve_CreateTrueMaskAllDouble: + case NI_Sve_CreateTrueMaskAllInt16: + case NI_Sve_CreateTrueMaskAllInt32: + case NI_Sve_CreateTrueMaskAllInt64: + case NI_Sve_CreateTrueMaskAllSByte: + case NI_Sve_CreateTrueMaskAllSingle: + case NI_Sve_CreateTrueMaskAllUInt16: + case NI_Sve_CreateTrueMaskAllUInt32: + case NI_Sve_CreateTrueMaskAllUInt64: + // Must use the pattern variant, as the non-pattern varient is SVE2.1. + GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL); + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 93b7425dc1b29d..22ad049c5c7c76 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -30,6 +30,28 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_MaskedOperation) + + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// Special intrinsics that are generated during importing or lowering + +HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_MaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) + +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllByte, -1, 0, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllDouble, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllInt16, -1, 0, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllInt32, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllInt64, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllSByte, -1, 0, false, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllSingle, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllUInt16, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllUInt32, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllUInt64, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) + #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index e411de81ed80e3..e422f8f050bcd6 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -788,7 +788,7 @@ LinearScan::LinearScan(Compiler* theCompiler) availableFloatRegs = RBM_ALLFLOAT; availableDoubleRegs = RBM_ALLDOUBLE; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) availableMaskRegs = RBM_ALLMASK; #endif diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index c0e0f5d2fdbd34..63def732a26650 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -51,12 +51,12 @@ RegisterType regType(T type) { return IntRegisterType; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD) else if (varTypeUsesMaskReg(type)) { return MaskRegisterType; } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // (TARGET_XARCH || TARGET_ARM64) && FEATURE_SIMD else { assert(varTypeUsesFloatReg(type)); @@ -1662,12 +1662,12 @@ class LinearScan : public LinearScanInterface PhasedVar availableIntRegs; PhasedVar availableFloatRegs; PhasedVar availableDoubleRegs; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) PhasedVar availableMaskRegs; #endif PhasedVar* availableRegs[TYP_COUNT]; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) #define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs) #else #define allAvailableRegs (availableIntRegs | availableFloatRegs) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index f64450aa11808b..0aba0b95f92868 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1329,8 +1329,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou const HWIntrinsic intrin(intrinsicTree); - int srcCount = 0; - int dstCount = 0; + int srcCount = 0; + int dstCount = 0; regMaskTP dstCandidates = RBM_NONE; if (HWIntrinsicInfo::IsMultiReg(intrin.id)) @@ -1547,17 +1547,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - regMaskTP srcCandidates = RBM_NONE; + regMaskTP predMask = RBM_ALLMASK; switch (intrin.id) { - case NI_Sve_LoadVector: - srcCandidates = RBM_LOWMASK; + case NI_Sve_ConvertVectorToMask: // Uses INS_sve_cmpne + case NI_Sve_LoadVector: // TODO-SVE: are we sure? + predMask = RBM_LOWMASK; + break; + + case NI_Sve_ConvertMaskToVector: // Uses INS_sve_pmov break; default: - noway_assert(!"Not a supported masked operation"); + noway_assert(!"Not a supported predicated result SVE operation"); } - srcCount += BuildOperandUses(intrin.op1, srcCandidates); + srcCount += BuildOperandUses(intrin.op1, predMask); } else if (intrinsicTree->OperIsMemoryLoadOrStore()) { @@ -1759,28 +1763,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou return srcCount; } - else if (HWIntrinsicInfo::ReturnsPerElementMask(intrin.id)) - { - switch (intrin.id) - { - case NI_Sve_CreateTrueMaskByte: - case NI_Sve_CreateTrueMaskDouble: - case NI_Sve_CreateTrueMaskInt16: - case NI_Sve_CreateTrueMaskInt32: - case NI_Sve_CreateTrueMaskInt64: - case NI_Sve_CreateTrueMaskSByte: - case NI_Sve_CreateTrueMaskSingle: - case NI_Sve_CreateTrueMaskUInt16: - case NI_Sve_CreateTrueMaskUInt32: - case NI_Sve_CreateTrueMaskUInt64: - dstCandidates = RBM_ALLMASK; - break; - - default: - noway_assert(!"Not a supported ReturnsPerElementMask operation"); - } - } - else if (intrin.op2 != nullptr) { // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index a8e90095674f53..45d160146c2995 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -144,6 +144,10 @@ #define RBM_HIGHMASK (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15) #define RBM_ALLMASK (RBM_LOWMASK | RBM_HIGHMASK) + // TODO-SVE: Fix when adding predicate register allocation + #define RBM_MSK_CALLEE_SAVED (0) + #define RBM_MSK_CALLEE_TRASH (0) + // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): // On entry: diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 8b8da6db011f5b..1a9a8c4072f6bf 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -63,8 +63,10 @@ DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, available #if defined(TARGET_XARCH) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) -DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) #endif // TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) +#endif // TARGET_XARCH || TARGET_ARM64 #endif // FEATURE_SIMD DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, 0, 0, 0, 0, 0, VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_ANY) diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 27dd5b3329574f..ed57a76b6e7ad8 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -321,7 +321,7 @@ inline bool varTypeUsesMaskReg(T vt) // However, we only have one type that uses VTR_MASK today // and so its quite a bit cheaper to just check that directly -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#if defined(FEATURE_SIMD) && (defined(TARGET_XARCH) || defined(TARGET_ARM64)) assert((TypeGet(vt) == TYP_MASK) || (varTypeRegister[TypeGet(vt)] != VTR_MASK)); return TypeGet(vt) == TYP_MASK; #else From 310812f4a9045c6b9f0e106505900946b35f5661 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Mar 2024 17:16:59 +0000 Subject: [PATCH 10/14] Add HW_Flag_LowMaskedOperation --- src/coreclr/jit/hwintrinsic.h | 12 +++++++++++- src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- src/coreclr/jit/lsraarm64.cpp | 15 +-------------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index f432ec9fe3e8d2..cac041eb83ea6d 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -188,6 +188,9 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic uses a mask in arg1 to select elements present in the result HW_Flag_MaskedOperation = 0x20000, + // The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low register. + HW_Flag_LowMaskedOperation = 0x40000, + #else #error Unsupported platform #endif @@ -869,8 +872,15 @@ struct HWIntrinsicInfo static bool IsMaskedOperation(NamedIntrinsic id) { const HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_MaskedOperation) != 0; + return ((flags & HW_Flag_MaskedOperation) != 0) || IsLowMaskedOperation(id); + } + + static bool IsLowMaskedOperation(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_LowMaskedOperation) != 0; } + #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 22ad049c5c7c76..567315fb2860dc 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -28,7 +28,7 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_MaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation) @@ -39,7 +39,7 @@ HARDWARE_INTRINSIC(Sve, LoadVector, // Special intrinsics that are generated during importing or lowering HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_MaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllByte, -1, 0, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllDouble, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0aba0b95f92868..1096d7f11701c5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1547,20 +1547,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - regMaskTP predMask = RBM_ALLMASK; - switch (intrin.id) - { - case NI_Sve_ConvertVectorToMask: // Uses INS_sve_cmpne - case NI_Sve_LoadVector: // TODO-SVE: are we sure? - predMask = RBM_LOWMASK; - break; - - case NI_Sve_ConvertMaskToVector: // Uses INS_sve_pmov - break; - - default: - noway_assert(!"Not a supported predicated result SVE operation"); - } + regMaskTP predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK; srcCount += BuildOperandUses(intrin.op1, predMask); } else if (intrinsicTree->OperIsMemoryLoadOrStore()) From 8fdd3813de1a2971accfe9b1a3b0486b4d1407f7 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Mar 2024 09:51:51 +0000 Subject: [PATCH 11/14] Revert "ARM64-SVE: Implement IF_SVE_BV_2A" --- src/coreclr/jit/codegenarm64test.cpp | 40 ----------- src/coreclr/jit/emitarm64.cpp | 102 +++------------------------ src/coreclr/jit/emitarm64.h | 6 -- 3 files changed, 11 insertions(+), 137 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 71d03d62ad6e40..3844d96a7fa077 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5365,46 +5365,6 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q, INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 .Q, .Q, .Q - // IF_SVE_BV_2A - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, - INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V5, REG_P15, 27, - INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V31, REG_P0, -128, - INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P5, 127, - INS_OPTS_SCALABLE_B); // MOV ., /Z, #{, } - - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, - INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V23, REG_P12, 10, - INS_OPTS_SCALABLE_S); // MOV ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V4, REG_P0, -128, - INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V19, REG_P15, 127, - INS_OPTS_SCALABLE_H); // MOV ., /Z, #{, } - - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 256, - INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 3072, - INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -3072, - INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -32768, - INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_P0, REG_V0, 32512, - INS_OPTS_SCALABLE_D); // MOV ., /Z, #{, } - - // IF_SVE_BV_2A_A - theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P12, 5, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_PREDICATE_MERGE); // CPY ., /M, #{, } - - // IF_SVE_BV_2A_J - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, 5632, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, -5632, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } - // IF_SVE_BZ_3A theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TBL ., {.}, . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ae08a330557a2e..0182532a1b1472 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1127,18 +1127,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - assert(insOptsScalableStandard(id->idInsOpt())); // xx - // Size specifier must be able to fit left-shifted immediate - assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift()); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - break; - case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements elemsize = id->idOpSize(); assert(insOptsScalableStandard(id->idInsOpt())); @@ -9194,18 +9182,16 @@ void emitter::emitIns_R_R_I(instruction ins, insOpts opt /* = INS_OPTS_NONE */, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool isLdSt = false; - bool isLdrStr = false; - bool isSIMD = false; - bool isAddSub = false; - bool setFlags = false; - unsigned scale = 0; - bool unscaledOp = false; - bool optionalShift = false; - bool hasShift = false; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool isLdSt = false; + bool isLdrStr = false; + bool isSIMD = false; + bool isAddSub = false; + bool setFlags = false; + unsigned scale = 0; + bool unscaledOp = false; /* Figure out the encoding format of the instruction */ switch (ins) @@ -9788,31 +9774,6 @@ void emitter::emitIns_R_R_I(instruction ins, } break; - case INS_sve_mov: - case INS_sve_cpy: - optionalShift = true; - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // DDDDD - assert(isPredicateRegister(reg2)); // GGGG - if (!isValidSimm8(imm)) - { - assert(isValidSimm8_MultipleOf256(imm)); - assert(insOptsScalableAtLeastHalf(opt)); - hasShift = true; - imm = imm / 256; - } - if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE) - { - fmt = IF_SVE_BV_2A_J; - } - else - { - fmt = IF_SVE_BV_2A; - } - // MOV is an alias for CPY, and is always the preferred disassembly. - ins = INS_sve_mov; - break; - case INS_sve_sqrshrn: case INS_sve_sqrshrun: case INS_sve_uqrshrn: @@ -10139,18 +10100,7 @@ void emitter::emitIns_R_R_I(instruction ins, assert(fmt != IF_NONE); - instrDesc* id; - - if (!optionalShift) - { - id = emitNewInstrSC(attr, imm); - } - else - { - // Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state - id = emitNewInstrCns(attr, imm); - id->idOptionalShift(hasShift); - } + instrDesc* id = emitNewInstrSC(attr, imm); id->idIns(ins); id->idInsFmt(fmt); @@ -23662,19 +23612,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - imm = emitGetInsSC(id); - code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code |= insEncodeReg_P_19_to_16(id->idReg2()); // gggg - code |= insEncodeImm8_12_to_5(imm); // iiiiiiii - code |= (id->idOptionalShift() ? 0x2000 : 0); // h - code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx - dst += emitOutput_Instr(dst, code); - break; - case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -28964,16 +28901,6 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); break; - // ., /Z, #{, } - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg - emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h - break; - default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -31410,13 +31337,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 158564028b00b0..07b8690826fc31 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -860,12 +860,6 @@ static bool isValidSimm8(ssize_t value) return (-0x80 <= value) && (value <= 0x7F); }; -// Returns true if 'value' is a legal signed multiple of 256 immediate 8 bit encoding (such as for MOV). -static bool isValidSimm8_MultipleOf256(ssize_t value) -{ - return (-0x8000 <= value) && (value <= 0x7f00) && (value % 256 == 0); -}; - // Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN). static bool isValidUimm12(ssize_t value) { From afdae94dcad84f59bffbfa512e730b1873fd6f91 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Mar 2024 07:23:33 +0000 Subject: [PATCH 12/14] Use NI_Sve_CreateTrueMaskAll --- src/coreclr/jit/emitarm64.cpp | 1 + src/coreclr/jit/hwintrinsicarm64.cpp | 49 +-------------------- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 11 +---- src/coreclr/jit/hwintrinsiclistarm64sve.h | 11 +---- 4 files changed, 4 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 69b5752b610796..4967c8d43d348c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -24458,6 +24458,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) code |= insEncodeImm8_12_to_5(imm); // iiiiiiii code |= (id->idOptionalShift() ? 0x2000 : 0); // h code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); break; case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index b85cdc1bcd0e8b..5c7f796c61c909 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2220,54 +2220,7 @@ GenTree* Compiler::convertHWIntrinsicToMask(var_types type, unsigned simdSize) { // ConvertVectorToMask uses cmpne which requires an embedded mask. - // TODO-SVE: Refactor this out once full embedded masking is adding. - NamedIntrinsic maskName; - switch (simdBaseJitType) - { - case CORINFO_TYPE_UBYTE: - maskName = NI_Sve_CreateTrueMaskAllByte; - break; - - case CORINFO_TYPE_DOUBLE: - maskName = NI_Sve_CreateTrueMaskAllDouble; - break; - - case CORINFO_TYPE_SHORT: - maskName = NI_Sve_CreateTrueMaskAllInt16; - break; - - case CORINFO_TYPE_INT: - maskName = NI_Sve_CreateTrueMaskAllInt32; - break; - - case CORINFO_TYPE_LONG: - maskName = NI_Sve_CreateTrueMaskAllInt64; - break; - - case CORINFO_TYPE_BYTE: - maskName = NI_Sve_CreateTrueMaskAllSByte; - break; - - case CORINFO_TYPE_FLOAT: - maskName = NI_Sve_CreateTrueMaskAllSingle; - break; - - case CORINFO_TYPE_USHORT: - maskName = NI_Sve_CreateTrueMaskAllUInt16; - break; - - case CORINFO_TYPE_UINT: - maskName = NI_Sve_CreateTrueMaskAllUInt32; - break; - - case CORINFO_TYPE_ULONG: - maskName = NI_Sve_CreateTrueMaskAllUInt64; - break; - - default: - unreached(); - } - GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, maskName, simdBaseJitType, simdSize); + GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize); } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 24bf3167866691..6418b72a8f3075 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1292,16 +1292,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt); break; - case NI_Sve_CreateTrueMaskAllByte: - case NI_Sve_CreateTrueMaskAllDouble: - case NI_Sve_CreateTrueMaskAllInt16: - case NI_Sve_CreateTrueMaskAllInt32: - case NI_Sve_CreateTrueMaskAllInt64: - case NI_Sve_CreateTrueMaskAllSByte: - case NI_Sve_CreateTrueMaskAllSingle: - case NI_Sve_CreateTrueMaskAllUInt16: - case NI_Sve_CreateTrueMaskAllUInt32: - case NI_Sve_CreateTrueMaskAllUInt64: + case NI_Sve_CreateTrueMaskAll: // Must use the pattern variant, as the non-pattern varient is SVE2.1. GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL); break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 567315fb2860dc..ac110c2a0e1b5b 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -41,16 +41,7 @@ HARDWARE_INTRINSIC(Sve, LoadVector, HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_MaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllByte, -1, 0, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllDouble, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllInt16, -1, 0, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllInt32, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllInt64, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllSByte, -1, 0, false, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllSingle, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllUInt16, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllUInt32, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAllUInt64, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, -1, false, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC From 6beb760b8cb2d3d8a4501c0cb71e5bb8735ab28a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Mar 2024 09:32:57 +0000 Subject: [PATCH 13/14] Mark API as experimental --- .../System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs | 1 + .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index fbd5ee65ca748f..1a047d4521a521 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -13,6 +13,7 @@ namespace System.Runtime.Intrinsics.Arm /// [CLSCompliant(false)] [System.Runtime.Versioning.RequiresPreviewFeaturesAttribute("Sve is in preview.")] + [Experimental(diagnosticId: "Sve")] #if SYSTEM_PRIVATE_CORELIB public #else diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 6ba2a2c67bc8a7..fc61329e001845 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -14,6 +14,7 @@ namespace System.Runtime.Intrinsics.Arm [Intrinsic] [CLSCompliant(false)] [System.Runtime.Versioning.RequiresPreviewFeaturesAttribute("Sve is in preview.")] + [Experimental(diagnosticId: "Sve")] public abstract class Sve : AdvSimd { internal Sve() { } From dae6d90737c1d710c271ee575f1d23ed1c17fb78 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Mar 2024 15:48:45 +0000 Subject: [PATCH 14/14] Revert "Mark API as experimental" This reverts commit 6beb760b8cb2d3d8a4501c0cb71e5bb8735ab28a. --- .../System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs | 1 - .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 1a047d4521a521..fbd5ee65ca748f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -13,7 +13,6 @@ namespace System.Runtime.Intrinsics.Arm /// [CLSCompliant(false)] [System.Runtime.Versioning.RequiresPreviewFeaturesAttribute("Sve is in preview.")] - [Experimental(diagnosticId: "Sve")] #if SYSTEM_PRIVATE_CORELIB public #else diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index fc61329e001845..6ba2a2c67bc8a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -14,7 +14,6 @@ namespace System.Runtime.Intrinsics.Arm [Intrinsic] [CLSCompliant(false)] [System.Runtime.Versioning.RequiresPreviewFeaturesAttribute("Sve is in preview.")] - [Experimental(diagnosticId: "Sve")] public abstract class Sve : AdvSimd { internal Sve() { }