diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index b63e08ae2436d..9d1dbe0110dc6 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -186,6 +186,7 @@ if (CLR_CMAKE_TARGET_WIN32) unwind.h utils.h valuenum.h + valuenumfuncs.h valuenumtype.h varset.h vartype.h diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp index f3f95ffe2e6d6..916921f36cd3f 100644 --- a/src/coreclr/src/jit/assertionprop.cpp +++ b/src/coreclr/src/jit/assertionprop.cpp @@ -5193,8 +5193,15 @@ void Compiler::optAssertionPropMain() } } - if (!optAssertionCount) + if (optAssertionCount == 0) { + // Zero out the bbAssertionIn values, as these can be referenced in RangeCheck::MergeAssertion + // and this is sharedstate with the CSE phase: bbCseIn + // + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + block->bbAssertionIn = BitVecOps::MakeEmpty(apTraits); + } return; } diff --git a/src/coreclr/src/jit/bitsetasshortlong.h b/src/coreclr/src/jit/bitsetasshortlong.h index c22eefcb6f0df..27a8c1d7349a5 100644 --- a/src/coreclr/src/jit/bitsetasshortlong.h +++ b/src/coreclr/src/jit/bitsetasshortlong.h @@ -412,12 +412,12 @@ class BitSetOpsOperIsMemoryLoad(); + } +#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { - GenTreeHWIntrinsic* hwIntrinsicNode = (const_cast(this))->AsHWIntrinsic(); - return hwIntrinsicNode->OperIsMemoryLoadOrStore(); + return AsHWIntrinsic()->OperIsMemoryLoadOrStore(); } #endif // FEATURE_HW_INTRINSICS default: @@ -18118,6 +18123,16 @@ bool GenTree::isCommutativeSIMDIntrinsic() return false; } } + +// Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise +bool GenTreeSIMD::OperIsMemoryLoad() const +{ + if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + return true; + } + return false; +} #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS @@ -18327,7 +18342,7 @@ GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORI } // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryLoad() +bool GenTreeHWIntrinsic::OperIsMemoryLoad() const { #ifdef TARGET_XARCH // Some xarch instructions have MemoryLoad sematics @@ -18369,7 +18384,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() } // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryStore() +bool GenTreeHWIntrinsic::OperIsMemoryStore() const { #ifdef TARGET_XARCH // Some xarch instructions have MemoryStore sematics @@ -18404,7 +18419,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() } // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() +bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() const { #ifdef TARGET_XARCH return OperIsMemoryLoad() || OperIsMemoryStore(); diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 755b5ee4c5d0f..5b548a6a74658 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -4536,6 +4536,9 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic { } + bool OperIsMemoryLoad() const; // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, + // false otherwise + #if DEBUGGABLE_GENTREE GenTreeSIMD() : GenTreeJitIntrinsic() { @@ -4584,12 +4587,12 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic // However there are HW Instrinsic instructions that have 3 or even 4 operands and this is // supported using a single op1 and using an ArgList for it: gtNewArgList(op1, op2, op3) - bool OperIsMemoryLoad(); // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, + bool OperIsMemoryLoad() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, // false otherwise - bool OperIsMemoryStore(); // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, + bool OperIsMemoryStore() const; // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, // false otherwise - bool OperIsMemoryLoadOrStore(); // Returns true for the HW Instrinsic instructions that have MemoryLoad or - // MemoryStore semantics, false otherwise + bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad or + // MemoryStore semantics, false otherwise #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 716d4904046d6..e85c93589150a 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -173,6 +173,69 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va return NO_CLASS_HANDLE; } +#ifdef FEATURE_HW_INTRINSICS +//------------------------------------------------------------------------ +// vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID): +// +// Arguments: +// hwIntrinsicID -- The id for the HW intrinsic +// +// Return Value: +// Returns true if this intrinsic requires value numbering to add an +// extra SimdType argument that encodes the resulting type. +// If we don't do this overloaded versions can return the same VN +// leading to incorrect CSE subsitutions. +// +/* static */ bool Compiler::vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID) +{ + int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); + + // HW Instrinsic's with -1 for numArgs have a varying number of args, so we currently + // give themm a unique value number them, and don't add an extra argument. + // + if (numArgs == -1) + { + return false; + } + + // We iterate over all of the different baseType's for this instrinsic in the HWIntrinsicInfo table + // We set diffInsCount to the number of instructions that can execute differently. + // + unsigned diffInsCount = 0; +#ifdef TARGET_XARCH + instruction lastIns = INS_invalid; +#endif + for (var_types baseType = TYP_BYTE; (baseType <= TYP_DOUBLE); baseType = (var_types)(baseType + 1)) + { + instruction curIns = HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType); + if (curIns != INS_invalid) + { +#ifdef TARGET_XARCH + if (curIns != lastIns) + { + diffInsCount++; + // remember the last valid instruction that we saw + lastIns = curIns; + } +#elif defined(TARGET_ARM64) + // On ARM64 we use the same instruction and specify an insOpt arrangement + // so we always consider the instruction operation to be different + // + diffInsCount++; +#endif // TARGET + if (diffInsCount >= 2) + { + // We can early exit the loop now + break; + } + } + } + + // If we see two (or more) different instructions we need the extra VNF_SimdType arg + return (diffInsCount >= 2); +} +#endif // FEATURE_HW_INTRINSICS + //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 42eda73b5d783..7bd8c15142743 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -483,10 +483,10 @@ HARDWARE_INTRINSIC(AVX2_ConvertToUInt32, "ConvertToUI HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16, "ConvertToVector256Int16", AVX2, -1, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVector256Int32", AVX2, -1, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_HorizontalAdd, "HorizontalAdd", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate, "HorizontalAddSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalSubtract", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index 688008f69717d..1f91cfd1fbf29 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -136,6 +136,7 @@ CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0) CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0) CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0) CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing + CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak")) CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method @@ -273,6 +274,14 @@ CONFIG_INTEGER(EnableArm64Sve, W("EnableArm64Sve"), 1) // clang-format on +#ifdef FEATURE_SIMD +CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNumbering of SIMD nodes and HW Intrinsic + // nodes enabled + // If 1, then disable ValueNumbering of SIMD nodes + // If 2, then disable ValueNumbering of HW Intrinsic nodes + // If 3, disable both SIMD and HW Intrinsic nodes +#endif // FEATURE_SIMD + /// /// JIT /// diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 4a9a40541304c..efb4ed3433161 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -498,8 +498,22 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) /* Start the list with the first CSE candidate recorded */ - hashDsc->csdTreeList = newElem; - hashDsc->csdTreeLast = newElem; + hashDsc->csdTreeList = newElem; + hashDsc->csdTreeLast = newElem; + hashDsc->csdStructHnd = NO_CLASS_HANDLE; + + hashDsc->csdStructHndMismatch = false; + + if (varTypeIsStruct(tree->gtType)) + { + // When we have a GT_IND node with a SIMD type then we don't have a reliable + // struct handle and gtGetStructHandleIfPresent returns a guess that can be wrong + // + if ((hashDsc->csdTree->OperGet() != GT_IND) || !varTypeIsSIMD(tree)) + { + hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); + } + } } noway_assert(hashDsc->csdTreeList); @@ -516,6 +530,38 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeLast->tslNext = newElem; hashDsc->csdTreeLast = newElem; + if (varTypeIsStruct(newElem->tslTree->gtType)) + { + // When we have a GT_IND node with a SIMD type then we don't have a reliable + // struct handle and gtGetStructHandleIfPresent returns a guess that can be wrong + // + if ((newElem->tslTree->OperGet() != GT_IND) || !varTypeIsSIMD(newElem->tslTree)) + { + CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); + if (newElemStructHnd != NO_CLASS_HANDLE) + { + if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) + { + // The previous node(s) were GT_IND's and didn't carry the struct handle info + // The current node does have the struct handle info, so record it now + // + hashDsc->csdStructHnd = newElemStructHnd; + } + else if (newElemStructHnd != hashDsc->csdStructHnd) + { + hashDsc->csdStructHndMismatch = true; +#ifdef DEBUG + if (verbose) + { + printf("Abandoned - CSE candidate has mismatching struct handles!\n"); + printTreeID(newElem->tslTree); + } +#endif // DEBUG + } + } + } + } + optDoCSE = true; // Found a duplicate CSE tree /* Have we assigned a CSE index? */ @@ -2415,6 +2461,36 @@ class CSE_Heuristic extra_yes_cost *= 2; // full cost if we are being Conservative } } + +#ifdef FEATURE_SIMD + // SIMD types may cause a SIMD register to be spilled/restored in the prolog and epilog. + // + if (varTypeIsSIMD(candidate->Expr()->TypeGet())) + { + // We don't have complete information about when these extra spilled/restore will be needed. + // Instead we are conservative and assume that each SIMD CSE that is live across a call + // will cause an additional spill/restore in the prolog and epilog. + // + int spillSimdRegInProlog = 1; + + // If we have a SIMD32 that is live across a call we have even higher spill costs + // + if (candidate->Expr()->TypeGet() == TYP_SIMD32) + { + // Additionally for a simd32 CSE candidate we assume that and second spilled/restore will be needed. + // (to hold the upper half of the simd32 register that isn't preserved across the call) + // + spillSimdRegInProlog++; + + // We also increase the CSE use cost here to because we may have to generate instructions + // to move the upper half of the simd32 before and after a call. + // + cse_use_cost += 2; + } + + extra_yes_cost = (BB_UNITY_WEIGHT * spillSimdRegInProlog) * 3; + } +#endif // FEATURE_SIMD } // estimate the cost from lost codesize reduction if we do not perform the CSE @@ -2558,9 +2634,10 @@ class CSE_Heuristic var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); if (varTypeIsStruct(cseLclVarTyp)) { - // After call args have been morphed, we don't need a handle for SIMD types. - // They are only required where the size is not implicit in the type and/or there are GC refs. - CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(successfulCandidate->Expr()); + // Retrieve the struct handle that we recorded while bulding the list of CSE candidates. + // If all occurances were in GT_IND nodes it could still be NO_CLASS_HANDLE + // + CORINFO_CLASS_HANDLE structHnd = successfulCandidate->CseDsc()->csdStructHnd; assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT)); if (structHnd != NO_CLASS_HANDLE) { @@ -2974,13 +3051,19 @@ class CSE_Heuristic for (; (cnt > 0); cnt--, ptr++) { Compiler::CSEdsc* dsc = *ptr; + CSE_Candidate candidate(this, dsc); + if (dsc->defExcSetPromise == ValueNumStore::NoVN) { - JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n"); + JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n", candidate.CseIndex()); continue; } - CSE_Candidate candidate(this, dsc); + if (dsc->csdStructHndMismatch) + { + JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n", candidate.CseIndex()); + continue; + } candidate.InitializeCounts(); @@ -3280,10 +3363,58 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) case GT_LE: case GT_GE: case GT_GT: - return true; // Also CSE these Comparison Operators + return true; // Allow the CSE of Comparison operators + +#ifdef FEATURE_SIMD + case GT_SIMD: + return true; // allow SIMD intrinsics to be CSE-ed + +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwIntrinsicNode->gtHWIntrinsicId); + + switch (category) + { + case HW_Category_SimpleSIMD: + case HW_Category_IMM: + case HW_Category_Scalar: + case HW_Category_SIMDScalar: + case HW_Category_Helper: + break; + + case HW_Category_MemoryLoad: + case HW_Category_MemoryStore: + case HW_Category_Special: + default: + return false; + } + + if (hwIntrinsicNode->OperIsMemoryStore()) + { + // NI_BMI2_MultiplyNoFlags, etc... + return false; + } + if (hwIntrinsicNode->OperIsMemoryLoad()) + { + // NI_AVX2_BroadcastScalarToVector128, NI_AVX2_GatherVector128, etc... + return false; + } + + return true; // allow Hardware Intrinsics to be CSE-ed + } + +#endif // FEATURE_HW_INTRINSICS case GT_INTRINSIC: - return true; // Intrinsics + return true; // allow Intrinsics to be CSE-ed + + case GT_OBJ: + return varTypeIsEnregisterable(type); // Allow enregisterable GT_OBJ's to be CSE-ed. (i.e. SIMD types) case GT_COMMA: return true; // Allow GT_COMMA nodes to be CSE-ed. diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 1d7b9bf7443a9..95f6739154feb 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1051,6 +1051,46 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in return nullptr; } +/* static */ bool Compiler::vnEncodesResultTypeForSIMDIntrinsic(SIMDIntrinsicID intrinsicId) +{ + switch (intrinsicId) + { + case SIMDIntrinsicInit: + case SIMDIntrinsicGetItem: + case SIMDIntrinsicAdd: + case SIMDIntrinsicSub: + case SIMDIntrinsicMul: + case SIMDIntrinsicDiv: + case SIMDIntrinsicSqrt: + case SIMDIntrinsicMin: + case SIMDIntrinsicMax: + case SIMDIntrinsicAbs: + case SIMDIntrinsicEqual: + case SIMDIntrinsicLessThan: + case SIMDIntrinsicLessThanOrEqual: + case SIMDIntrinsicGreaterThan: + case SIMDIntrinsicGreaterThanOrEqual: + case SIMDIntrinsicBitwiseAnd: + case SIMDIntrinsicBitwiseAndNot: + case SIMDIntrinsicBitwiseOr: + case SIMDIntrinsicBitwiseXor: + case SIMDIntrinsicDotProduct: + case SIMDIntrinsicCast: + case SIMDIntrinsicConvertToSingle: + case SIMDIntrinsicConvertToDouble: + case SIMDIntrinsicConvertToInt32: + case SIMDIntrinsicConvertToInt64: + case SIMDIntrinsicNarrow: + case SIMDIntrinsicWidenHi: + case SIMDIntrinsicWidenLo: + return true; + + default: + break; + } + return false; +} + // Pops and returns GenTree node from importer's type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. // diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 2509bb1912ec7..1e9c6dc7ddaae 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -342,6 +342,15 @@ VNFunc GetVNFuncForNode(GenTree* node) } break; +#ifdef FEATURE_SIMD + case GT_SIMD: + return VNFunc(VNF_SIMD_FIRST + node->AsSIMD()->gtSIMDIntrinsicID); +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + return VNFunc(VNF_HWI_FIRST + (node->AsHWIntrinsic()->gtHWIntrinsicId - NI_HW_INTRINSIC_START - 1)); +#endif // FEATURE_HW_INTRINSICS + case GT_CAST: // GT_CAST can overflow but it has special handling and it should not appear here. unreached(); @@ -1739,14 +1748,15 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) case TYP_BYREF: return VNForByrefCon(0); case TYP_STRUCT: + return VNForZeroMap(); // Recursion! + #ifdef FEATURE_SIMD - // TODO-CQ: Improve value numbering for SIMD types. case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: -#endif // FEATURE_SIMD - return VNForZeroMap(); // Recursion! + return VNForLongCon(0); +#endif // FEATURE_SIMD // These should be unreached. default: @@ -5009,14 +5019,23 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) printf("byrefVal"); break; case TYP_STRUCT: + printf("structVal(zero)"); + break; + #ifdef FEATURE_SIMD case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: + { + // Only the zero constant is currently allowed for SIMD types + // + INT64 val = ConstantValue(vn); + assert(val == 0); + printf(" 0"); + } + break; #endif // FEATURE_SIMD - printf("structVal"); - break; // These should be unreached. default: @@ -5054,6 +5073,12 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) case VNF_ValWithExc: vnDumpValWithExc(comp, &funcApp); break; +#ifdef FEATURE_SIMD + case VNF_SimdType: + vnDumpSimdType(comp, &funcApp); + break; +#endif // FEATURE_SIMD + default: printf("%s(", VNFuncName(funcApp.m_func)); for (unsigned i = 0; i < funcApp.m_arity; i++) @@ -5205,6 +5230,21 @@ void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore) comp->vnPrint(newValVN, 0); printf("]"); } + +#ifdef FEATURE_SIMD +void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType) +{ + assert(simdType->m_func == VNF_SimdType); // Preconditions. + assert(IsVNConstant(simdType->m_args[0])); + assert(IsVNConstant(simdType->m_args[1])); + + int simdSize = ConstantValue(simdType->m_args[0]); + var_types baseType = (var_types)ConstantValue(simdType->m_args[1]); + + printf("%s(simd%d, %s)", VNFuncName(simdType->m_func), simdSize, varTypeName(baseType)); +} +#endif // FEATURE_SIMD + #endif // DEBUG // Static fields, methods. @@ -5227,9 +5267,9 @@ UINT8* ValueNumStore::s_vnfOpAttribs = nullptr; void ValueNumStore::InitValueNumStoreStatics() { - // Make sure we've gotten constants right... - assert(unsigned(VNFOA_Arity) == (1 << VNFOA_ArityShift)); - assert(unsigned(VNFOA_AfterArity) == (unsigned(VNFOA_Arity) << VNFOA_ArityBits)); + // Make sure we have the constants right... + assert(unsigned(VNFOA_Arity1) == (1 << VNFOA_ArityShift)); + assert(VNFOA_ArityMask == (VNFOA_MaxArity << VNFOA_ArityShift)); s_vnfOpAttribs = &vnfOpAttribs[0]; for (unsigned i = 0; i < GT_COUNT; i++) @@ -5249,7 +5289,7 @@ void ValueNumStore::InitValueNumStoreStatics() { arity = 2; } - vnfOpAttribs[i] |= (arity << VNFOA_ArityShift); + vnfOpAttribs[i] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); if (GenTree::OperIsCommutative(gtOper)) { @@ -5268,12 +5308,72 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ if (sharedStatic) \ vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ + if (arity > 0) \ + vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); \ vnfNum++; #include "valuenumfuncs.h" #undef ValueNumFuncDef + assert(vnfNum == VNF_COUNT); + +#define ValueNumFuncSetArity(vnfNum, arity) \ + vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ + vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask) /* set the new arity value */ + +#ifdef FEATURE_SIMD + + // SIMDIntrinsicInit has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_Init, 1); + // SIMDIntrinsicWidenHi has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_WidenHi, 1); + // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_WidenLo, 1); + + // Some SIMD intrinsic nodes have an extra VNF_SimdType arg + // + for (SIMDIntrinsicID id = SIMDIntrinsicID::SIMDIntrinsicNone; (id < SIMDIntrinsicID::SIMDIntrinsicInvalid); + id = (SIMDIntrinsicID)(id + 1)) + { + bool encodeResultType = Compiler::vnEncodesResultTypeForSIMDIntrinsic(id); + + if (encodeResultType) + { + // These SIMDIntrinsic's have an extra VNF_SimdType arg. + // + VNFunc func = VNFunc(VNF_SIMD_FIRST + id); + unsigned oldArity = VNFuncArity(func); + unsigned newArity = oldArity + 1; + + ValueNumFuncSetArity(func, newArity); + } + } + +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + + for (NamedIntrinsic id = (NamedIntrinsic)(NI_HW_INTRINSIC_START + 1); (id < NI_HW_INTRINSIC_END); + id = (NamedIntrinsic)(id + 1)) + { + bool encodeResultType = Compiler::vnEncodesResultTypeForHWIntrinsic(id); + + if (encodeResultType) + { + // These HW_Intrinsic's have an extra VNF_SimdType arg. + // + VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1)); + unsigned oldArity = VNFuncArity(func); + unsigned newArity = oldArity + 1; + + ValueNumFuncSetArity(func, newArity); + } + } + +#endif // FEATURE_HW_INTRINSICS + +#undef ValueNumFuncSetArity + for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) { vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp; @@ -6313,6 +6413,24 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) } break; +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: + +#ifdef TARGET_64BIT + // Only the zero constant is currently allowed for SIMD types + // + assert(tree->AsIntConCommon()->LngValue() == 0); + tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue())); +#else // 32BIT + assert(tree->AsIntConCommon()->IconValue() == 0); + tree->gtVNPair.SetBoth(vnStore->VNForIntCon(tree->AsIntConCommon()->IconValue())); +#endif + break; +#endif // FEATURE_SIMD + case TYP_FLOAT: tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->AsDblCon()->gtDconVal)); break; @@ -6680,40 +6798,46 @@ void Compiler::fgValueNumberTree(GenTree* tree) genTreeOps oper = tree->OperGet(); #ifdef FEATURE_SIMD - // TODO-CQ: For now TYP_SIMD values are not handled by value numbering to be amenable for CSE'ing. - if (oper == GT_SIMD) + if ((JitConfig.JitDisableSimdVN() & 1) == 1) { - tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); - return; + // This Jit Config forces the previous behavior of value numbering for SIMD nodes + if (oper == GT_SIMD) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); + return; + } } -#endif +#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS - if (oper == GT_HWINTRINSIC) + if ((JitConfig.JitDisableSimdVN() & 2) == 2) { - // TODO-CQ: For now hardware intrinsics are not handled by value numbering to be amenable for CSE'ing. - tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); + // This Jit Config forces the previous behavior of value numbering for HW Intrinsic nodes + if (oper == GT_HWINTRINSIC) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); - GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); - assert(hwIntrinsicNode != nullptr); + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); - // For safety/correctness we must mutate the global heap valuenumber - // for any HW intrinsic that performs a memory store operation - if (hwIntrinsicNode->OperIsMemoryStore()) - { - fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); - } + // For safety/correctness we must mutate the global heap valuenumber + // for any HW intrinsic that performs a memory store operation + if (hwIntrinsicNode->OperIsMemoryStore()) + { + fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); + } - return; + return; + } } #endif // FEATURE_HW_INTRINSICS var_types typ = tree->TypeGet(); if (GenTree::OperIsConst(oper)) { - // If this is a struct assignment, with a constant rhs, it is an initBlk, and it is not - // really useful to value number the constant. - if (!varTypeIsStruct(tree)) + // If this is a struct assignment, with a constant rhs, (i,.e. an initBlk), + // it is not useful to value number the constant. + if (tree->TypeGet() != TYP_STRUCT) { fgValueNumberTreeConst(tree); } @@ -7024,7 +7148,8 @@ void Compiler::fgValueNumberTree(GenTree* tree) unsigned memorySsaNum; #endif - if ((oper == GT_ASG) && !varTypeIsStruct(tree)) + // Allow assignments for all enregisterable types to be value numbered (SIMD types) + if ((oper == GT_ASG) && varTypeIsEnregisterable(tree)) { GenTree* lhs = tree->AsOp()->gtOp1; GenTree* rhs = tree->AsOp()->gtOp2; @@ -7032,7 +7157,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair rhsVNPair = rhs->gtVNPair; // Is the type being stored different from the type computed by the rhs? - if (rhs->TypeGet() != lhs->TypeGet()) + if ((rhs->TypeGet() != lhs->TypeGet()) && (lhs->OperGet() != GT_BLK)) { // This means that there is an implicit cast on the rhs value // @@ -7229,11 +7354,13 @@ void Compiler::fgValueNumberTree(GenTree* tree) noway_assert(!"Phi arg cannot be LHS."); break; - case GT_BLK: case GT_OBJ: - noway_assert(!"GT_BLK/GT_OBJ can not be LHS when !varTypeIsStruct(tree) is true!"); + noway_assert(!"GT_OBJ can not be LHS when (tree->TypeGet() != TYP_STRUCT)!"); break; + case GT_BLK: + __fallthrough; + case GT_IND: { bool isVolatile = (lhs->gtFlags & GTF_IND_VOLATILE) != 0; @@ -7594,7 +7721,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) } } // Other kinds of assignment: initblk and copyblk. - else if (oper == GT_ASG && varTypeIsStruct(tree)) + else if (oper == GT_ASG && (tree->TypeGet() == TYP_STRUCT)) { fgValueNumberBlockAssignment(tree); } @@ -7923,6 +8050,21 @@ void Compiler::fgValueNumberTree(GenTree* tree) { fgValueNumberIntrinsic(tree); } + +#ifdef FEATURE_SIMD + else if (tree->OperGet() == GT_SIMD) + { + fgValueNumberSimd(tree); + } +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + else if (tree->OperGet() == GT_HWINTRINSIC) + { + fgValueNumberHWIntrinsic(tree); + } +#endif // FEATURE_HW_INTRINSICS + else // Look up the VNFunc for the node { VNFunc vnf = GetVNFuncForNode(tree); @@ -7996,7 +8138,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair op2vnp; ValueNumPair op2Xvnp; vnStore->VNPUnpackExc(op2VNPair, &op2vnp, &op2Xvnp); - ValueNumPair excSet = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + ValueNumPair excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); ValueNum newVN = ValueNumStore::NoVN; @@ -8010,14 +8152,14 @@ void Compiler::fgValueNumberTree(GenTree* tree) if (newVN != ValueNumStore::NoVN) { // We don't care about differences between liberal and conservative for pointer values. - newVN = vnStore->VNWithExc(newVN, excSet.GetLiberal()); + newVN = vnStore->VNWithExc(newVN, excSetPair.GetLiberal()); tree->gtVNPair.SetBoth(newVN); } else { VNFunc vnf = GetVNFuncForNode(tree); ValueNumPair normalPair = vnStore->VNPairForFunc(tree->TypeGet(), vnf, op1vnp, op2vnp); - tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSet); + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); // For overflow checking operations the VNF_OverflowExc will be added below // by fgValueNumberAddExceptionSet } @@ -8145,10 +8287,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair vnpIndex = tree->AsBoundsChk()->gtIndex->gtVNPair; ValueNumPair vnpArrLen = tree->AsBoundsChk()->gtArrLen->gtVNPair; - // Construct the exception set for bounds check - ValueNumPair vnpExcSet = vnStore->VNPExcSetSingleton( - vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), - vnStore->VNPNormalPair(vnpArrLen))); + ValueNumPair vnpExcSet = ValueNumStore::VNPForEmptyExcSet(); // And collect the exceptions from Index and ArrLen vnpExcSet = vnStore->VNPUnionExcSet(vnpIndex, vnpExcSet); @@ -8157,6 +8296,9 @@ void Compiler::fgValueNumberTree(GenTree* tree) // A bounds check node has no value, but may throw exceptions. tree->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), vnpExcSet); + // next add the bounds check exception set for the current tree node + fgValueNumberAddExceptionSet(tree); + // Record non-constant value numbers that are used as the length argument to bounds checks, so that // assertion prop will know that comparisons against them are worth analyzing. ValueNum lengthVN = tree->AsBoundsChk()->gtArrLen->gtVNPair.GetConservative(); @@ -8276,6 +8418,242 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) } } +#ifdef FEATURE_SIMD +// Does value-numbering for a GT_SIMD node. +void Compiler::fgValueNumberSimd(GenTree* tree) +{ + assert(tree->OperGet() == GT_SIMD); + GenTreeSIMD* simdNode = tree->AsSIMD(); + assert(simdNode != nullptr); + ValueNumPair excSetPair; + ValueNumPair normalPair; + + // There are some SIMD operations that have zero args, i.e. NI_Vector128_Zero + if (tree->AsOp()->gtOp1 == nullptr) + { + excSetPair = ValueNumStore::VNPForEmptyExcSet(); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree)); + } + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST)) + { + assert(tree->AsOp()->gtOp2 == nullptr); + + // We have a SIMD node in the GT_LIST form with 3 or more args + // For now we will generate a unique value number for this case. + + // Generate a unique VN + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + return; + } + else // SIMD unary or binary operator. + { + ValueNumPair resvnp = ValueNumPair(); + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + // rationalize rewrites this as an explicit load with op1 as the base address + + ValueNumPair op2vnp; + if (tree->AsOp()->gtOp2 == nullptr) + { + // a nullptr for op2 means that we have an impicit index of zero + op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); + + excSetPair = op1Xvnp; + } + else // We have an explicit index in op2 + { + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + } + + ValueNum addrVN = + vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); +#ifdef DEBUG + if (verbose) + { + printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); + vnPrint(addrVN, 0); + } +#endif // DEBUG + + // The address points into the heap, so it is an ByrefExposed load. + // + ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); + tree->gtVNPair.SetLiberal(loadVN); + tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); + fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + return; + } + + bool encodeResultType = vnEncodesResultTypeForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID); + + if (encodeResultType) + { + ValueNum vnSize = vnStore->VNForIntCon(simdNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(simdNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + resvnp.SetBoth(simdTypeVN); + +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 1); + printf("\n"); + } +#endif + } + + VNFunc simdFunc = GetVNFuncForNode(tree); + + if (tree->AsOp()->gtOp2 == nullptr) + { + // Unary SIMD nodes have a nullptr for op2. + excSetPair = op1Xvnp; + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, resvnp); + assert(vnStore->VNFuncArity(simdFunc) == 2); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp); + assert(vnStore->VNFuncArity(simdFunc) == 1); + } + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, op2vnp, resvnp); + assert(vnStore->VNFuncArity(simdFunc) == 3); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, op2vnp); + assert(vnStore->VNFuncArity(simdFunc) == 2); + } + } + } + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +// Does value-numbering for a GT_HWINTRINSIC node +void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) +{ + assert(tree->OperGet() == GT_HWINTRINSIC); + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + + // For safety/correctness we must mutate the global heap valuenumber + // for any HW intrinsic that performs a memory store operation + if (hwIntrinsicNode->OperIsMemoryStore()) + { + fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); + } + + int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode->gtHWIntrinsicId); + VNFunc func = GetVNFuncForNode(tree); + unsigned fixedArity = vnStore->VNFuncArity(func); + + ValueNumPair excSetPair = ValueNumStore::VNPForEmptyExcSet(); + ValueNumPair normalPair; + + // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero + if (tree->AsOp()->gtOp1 == nullptr) + { + assert(fixedArity == 0); + + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func); + assert(lookupNumArgs == 0); + } + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (lookupNumArgs == -1)) + { + // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args + // Or the numArgs was specified as -1 in the numArgs column in "hwinstrinsiclistxarch.h" + // For now we will generate a unique value number for this case. + + // Generate unique VN + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + return; + } + else // HWINTRINSIC unary or binary operator. + { + ValueNumPair resvnp = ValueNumPair(); + bool encodeResultType = vnEncodesResultTypeForHWIntrinsic(hwIntrinsicNode->gtHWIntrinsicId); + + if (encodeResultType) + { + ValueNum vnSize = vnStore->VNForIntCon(hwIntrinsicNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(hwIntrinsicNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + resvnp.SetBoth(simdTypeVN); + +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 1); + printf("\n"); + } +#endif + } + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + if (tree->AsOp()->gtOp2 == nullptr) + { + excSetPair = op1Xvnp; + + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, resvnp); + assert(vnStore->VNFuncArity(func) == 2); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp); + assert(vnStore->VNFuncArity(func) == 1); + } + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp, resvnp); + assert(vnStore->VNFuncArity(func) == 3); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp); + assert(vnStore->VNFuncArity(func) == 2); + } + } + } + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); +} +#endif // FEATURE_HW_INTRINSICS + void Compiler::fgValueNumberCastTree(GenTree* tree) { assert(tree->OperGet() == GT_CAST); @@ -9355,6 +9733,47 @@ void Compiler::fgValueNumberAddExceptionSetForOverflow(GenTree* tree) tree->gtVNPair = vnStore->VNPWithExc(vnpTreeNorm, newExcSet); } +//-------------------------------------------------------------------------------- +// fgValueNumberAddExceptionSetForBoundsCheck +// - Adds the exception set for the current tree node +// which is performing an bounds check operation +// +// Arguments: +// tree - The current GenTree node, +// It must be a node that performs a bounds check operation +// +// Return Value: +// - The tree's gtVNPair is updated to include the +// VNF_IndexOutOfRangeExc exception set. +// +void Compiler::fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree) +{ + GenTreeBoundsChk* node = tree->AsBoundsChk(); + assert(node != nullptr); + + ValueNumPair vnpIndex = node->gtIndex->gtVNPair; + ValueNumPair vnpArrLen = node->gtArrLen->gtVNPair; + + // Unpack, Norm,Exc for the tree's VN + // + ValueNumPair vnpTreeNorm; + ValueNumPair vnpTreeExc; + + vnStore->VNPUnpackExc(tree->gtVNPair, &vnpTreeNorm, &vnpTreeExc); + + // Construct the exception set for bounds check + ValueNumPair boundsChkExcSet = vnStore->VNPExcSetSingleton( + vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), + vnStore->VNPNormalPair(vnpArrLen))); + + // Combine the new Overflow exception with the original exception set of tree + ValueNumPair newExcSet = vnStore->VNPExcSetUnion(vnpTreeExc, boundsChkExcSet); + + // Update the VN for the tree it, the updated VN for tree + // now includes the IndexOutOfRange exception. + tree->gtVNPair = vnStore->VNPWithExc(vnpTreeNorm, newExcSet); +} + //-------------------------------------------------------------------------------- // fgValueNumberAddExceptionSetForCkFinite // - Adds the exception set for the current tree node @@ -9419,9 +9838,27 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree) case GT_ADD: // An Overflow checking ALU operation case GT_SUB: case GT_MUL: + assert(tree->gtOverflowEx()); fgValueNumberAddExceptionSetForOverflow(tree); break; + case GT_DIV: + case GT_UDIV: + case GT_MOD: + case GT_UMOD: + fgValueNumberAddExceptionSetForDivision(tree); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + case GT_ARR_BOUNDS_CHECK: + fgValueNumberAddExceptionSetForBoundsCheck(tree); + break; + case GT_LCLHEAP: // It is not necessary to model the StackOverflow exception for GT_LCLHEAP break; @@ -9461,17 +9898,16 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree) fgValueNumberAddExceptionSetForIndirection(tree, tree->AsArrOffs()->gtArrObj); break; - case GT_DIV: - case GT_UDIV: - case GT_MOD: - case GT_UMOD: - fgValueNumberAddExceptionSetForDivision(tree); - break; - case GT_CKFINITE: fgValueNumberAddExceptionSetForCkFinite(tree); break; +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + // ToDo: model the exceptions for Intrinsics + break; +#endif // FEATURE_HW_INTRINSICS + default: assert(!"Handle this oper in fgValueNumberAddExceptionSet"); break; diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index fdb6268892193..d4167a00c0747 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -134,8 +134,9 @@ class ValueNumStore { VNFOA_IllegalGenTreeOp = 0x1, // corresponds to a genTreeOps value that is not a legal VN func. VNFOA_Commutative = 0x2, // 1 iff the function is commutative. - VNFOA_Arity = 0x4, // Bits 2..3 encode the arity. - VNFOA_AfterArity = 0x20, // Makes it clear what value the next flag(s) after Arity should have. + VNFOA_Arity1 = 0x4, // Bits 2,3,4 encode the arity. + VNFOA_Arity2 = 0x8, // Bits 2,3,4 encode the arity. + VNFOA_Arity4 = 0x10, // Bits 2,3,4 encode the arity. VNFOA_KnownNonNull = 0x20, // 1 iff the result is known to be non-null. VNFOA_SharedStatic = 0x40, // 1 iff this VNF is represent one of the shared static jit helpers }; @@ -143,7 +144,7 @@ class ValueNumStore static const unsigned VNFOA_ArityShift = 2; static const unsigned VNFOA_ArityBits = 3; static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent. - static const unsigned VNFOA_ArityMask = VNFOA_AfterArity - VNFOA_Arity; + static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2 | VNFOA_Arity1); // These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper enum VNFCastAttrib @@ -878,6 +879,12 @@ class ValueNumStore // Prints a representation of the set of exceptions on standard out. void vnDumpExcSeq(Compiler* comp, VNFuncApp* excSeq, bool isHead); +#ifdef FEATURE_SIMD + // Requires "simdType" to be a VNF_SimdType VNFuncApp. + // Prints a representation (comma-separated list of field names) on standard out. + void vnDumpSimdType(Compiler* comp, VNFuncApp* simdType); +#endif // FEATURE_SIMD + // Returns the string name of "vnf". static const char* VNFuncName(VNFunc vnf); // Used in the implementation of the above. diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index 7e96d430a2f07..00fa69083a89c 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -147,7 +147,7 @@ ValueNumFuncDef(LE_UN, 2, false, false, false) ValueNumFuncDef(GE_UN, 2, false, false, false) ValueNumFuncDef(GT_UN, 2, false, false, false) -// currently we won't constant fold the next six +// currently we don't constant fold the next six ValueNumFuncDef(ADD_OVF, 2, true, false, false) // overflow checking operations ValueNumFuncDef(SUB_OVF, 2, false, false, false) @@ -157,6 +157,33 @@ ValueNumFuncDef(ADD_UN_OVF, 2, true, false, false) // unsigned overflow checkin ValueNumFuncDef(SUB_UN_OVF, 2, false, false, false) ValueNumFuncDef(MUL_UN_OVF, 2, true, false, false) +#ifdef FEATURE_SIMD +ValueNumFuncDef(SimdType, 2, false, false, false) // A value number function to compose a SIMD type +#endif + +#define SIMD_INTRINSIC(m, i, id, n, r, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \ +ValueNumFuncDef(SIMD_##id, argCount, false, false, false) // All of the SIMD intrinsic (Consider isCommutativeSIMDIntrinsic) +#include "simdintrinsiclist.h" +#define VNF_SIMD_FIRST VNF_SIMD_None + +#if defined(TARGET_XARCH) +#define HARDWARE_INTRINSIC(id, name, isa, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ +ValueNumFuncDef(HWI_##id, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for x86/x64 +#include "hwintrinsiclistxarch.h" +#define VNF_HWI_FIRST VNF_HWI_Vector128_As + +#elif defined (TARGET_ARM64) +#define HARDWARE_INTRINSIC(isa, name, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ +ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for arm64 +#include "hwintrinsiclistarm64.h" +#define VNF_HWI_FIRST VNF_HWI_Vector64_AsByte + +#elif defined (TARGET_ARM) +// No Hardware Intrinsics on ARM32 +#else +#error Unsupported platform +#endif + // clang-format on #undef ValueNumFuncDef