From 0c4b857d06ac5764498c48a1713632cf6539c523 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 29 Jan 2020 18:16:19 -0800 Subject: [PATCH 01/34] Added ValueNumbering support for GT_SIMD and GT_HWINTRINSIC tree nodes --- src/coreclr/src/jit/compiler.h | 13 ++ src/coreclr/src/jit/valuenum.cpp | 220 +++++++++++++++++++++++++--- src/coreclr/src/jit/valuenumfuncs.h | 21 +++ 3 files changed, 232 insertions(+), 22 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index d8aff229e8156..3fbe532d45403 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -4645,6 +4645,16 @@ class Compiler // Does value-numbering for an intrinsic tree. void fgValueNumberIntrinsic(GenTree* tree); +#ifdef FEATURE_SIMD + // Does value-numbering for a GT_SIMD tree + void fgValueNumberSimd(GenTree* tree); +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + // Does value-numbering for a GT_HWINTRINSIC tree + void fgValueNumberHWIntrinsic(GenTree* tree); +#endif // FEATURE_HW_INTRINSICS + // Does value-numbering for a call. We interpret some helper calls. void fgValueNumberCall(GenTreeCall* call); @@ -4669,6 +4679,9 @@ class Compiler // Adds the exception set for the current tree node which is performing a overflow checking operation void fgValueNumberAddExceptionSetForOverflow(GenTree* tree); + // Adds the exception set for the current tree node which is performing a bounds check operation + void fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree); + // Adds the exception set for the current tree node which is performing a ckfinite operation void fgValueNumberAddExceptionSetForCkFinite(GenTree* tree); diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 2509bb1912ec7..29feb7f1a7700 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -342,6 +342,15 @@ VNFunc GetVNFuncForNode(GenTree* node) } break; +#ifdef FEATURE_SIMD + case GT_SIMD: + return VNFunc(VNF_SIMD_FIRST + node->AsSIMD()->gtSIMDIntrinsicID); +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + return VNFunc(VNF_HWI_FIRST + (node->AsHWIntrinsic()->gtHWIntrinsicId - NI_HW_INTRINSIC_START - 1)); +#endif // FEATURE_HW_INTRINSICS + case GT_CAST: // GT_CAST can overflow but it has special handling and it should not appear here. unreached(); @@ -1739,14 +1748,15 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) case TYP_BYREF: return VNForByrefCon(0); case TYP_STRUCT: + return VNForZeroMap(); // Recursion! + #ifdef FEATURE_SIMD - // TODO-CQ: Improve value numbering for SIMD types. case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: -#endif // FEATURE_SIMD - return VNForZeroMap(); // Recursion! + return VNForLongCon(0); +#endif // FEATURE_SIMD // These should be unreached. default: @@ -1879,7 +1889,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V assert(arg0VN != NoVN && arg1VN != NoVN); assert(arg0VN == VNNormalValue(arg0VN)); // Arguments carry no exceptions. assert(arg1VN == VNNormalValue(arg1VN)); // Arguments carry no exceptions. - assert(VNFuncArity(func) == 2); + // assert(VNFuncArity(func) == 2); // or HWI_Shuffle assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that. ValueNum resultVN; @@ -5009,14 +5019,28 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) printf("byrefVal"); break; case TYP_STRUCT: + printf("structVal(zero)"); + break; + #ifdef FEATURE_SIMD case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: + { + INT64 val = ConstantValue(vn); + printf("SimdCns: "); + if ((val > -1000) && (val < 1000)) + { + printf(" %ld", val); + } + else + { + printf(" 0x%llx", val); + } + } + break; #endif // FEATURE_SIMD - printf("structVal"); - break; // These should be unreached. default: @@ -6313,6 +6337,15 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) } break; +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: + tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->AsIntConCommon()->LngValue()))); + break; +#endif // FEATURE_SIMD + case TYP_FLOAT: tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->AsDblCon()->gtDconVal)); break; @@ -6679,6 +6712,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) { genTreeOps oper = tree->OperGet(); +#if 0 #ifdef FEATURE_SIMD // TODO-CQ: For now TYP_SIMD values are not handled by value numbering to be amenable for CSE'ing. if (oper == GT_SIMD) @@ -6707,13 +6741,14 @@ void Compiler::fgValueNumberTree(GenTree* tree) return; } #endif // FEATURE_HW_INTRINSICS +#endif // 0 var_types typ = tree->TypeGet(); if (GenTree::OperIsConst(oper)) { // If this is a struct assignment, with a constant rhs, it is an initBlk, and it is not // really useful to value number the constant. - if (!varTypeIsStruct(tree)) + if (tree->TypeGet() != TYP_STRUCT) { fgValueNumberTreeConst(tree); } @@ -7024,7 +7059,8 @@ void Compiler::fgValueNumberTree(GenTree* tree) unsigned memorySsaNum; #endif - if ((oper == GT_ASG) && !varTypeIsStruct(tree)) + // Allow SIMD types to be value numbered + if ((oper == GT_ASG) && (tree->TypeGet() != TYP_STRUCT)) { GenTree* lhs = tree->AsOp()->gtOp1; GenTree* rhs = tree->AsOp()->gtOp2; @@ -7923,6 +7959,18 @@ void Compiler::fgValueNumberTree(GenTree* tree) { fgValueNumberIntrinsic(tree); } +#ifdef FEATURE_SIMD + else if (tree->OperGet() == GT_SIMD) + { + fgValueNumberSimd(tree); + } +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + else if (tree->OperGet() == GT_HWINTRINSIC) + { + fgValueNumberHWIntrinsic(tree); + } +#endif // FEATURE_HW_INTRINSICS else // Look up the VNFunc for the node { VNFunc vnf = GetVNFuncForNode(tree); @@ -7996,7 +8044,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair op2vnp; ValueNumPair op2Xvnp; vnStore->VNPUnpackExc(op2VNPair, &op2vnp, &op2Xvnp); - ValueNumPair excSet = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + ValueNumPair excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); ValueNum newVN = ValueNumStore::NoVN; @@ -8010,14 +8058,14 @@ void Compiler::fgValueNumberTree(GenTree* tree) if (newVN != ValueNumStore::NoVN) { // We don't care about differences between liberal and conservative for pointer values. - newVN = vnStore->VNWithExc(newVN, excSet.GetLiberal()); + newVN = vnStore->VNWithExc(newVN, excSetPair.GetLiberal()); tree->gtVNPair.SetBoth(newVN); } else { VNFunc vnf = GetVNFuncForNode(tree); ValueNumPair normalPair = vnStore->VNPairForFunc(tree->TypeGet(), vnf, op1vnp, op2vnp); - tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSet); + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); // For overflow checking operations the VNF_OverflowExc will be added below // by fgValueNumberAddExceptionSet } @@ -8145,10 +8193,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair vnpIndex = tree->AsBoundsChk()->gtIndex->gtVNPair; ValueNumPair vnpArrLen = tree->AsBoundsChk()->gtArrLen->gtVNPair; - // Construct the exception set for bounds check - ValueNumPair vnpExcSet = vnStore->VNPExcSetSingleton( - vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), - vnStore->VNPNormalPair(vnpArrLen))); + ValueNumPair vnpExcSet = ValueNumStore::VNPForEmptyExcSet(); // And collect the exceptions from Index and ArrLen vnpExcSet = vnStore->VNPUnionExcSet(vnpIndex, vnpExcSet); @@ -8157,6 +8202,9 @@ void Compiler::fgValueNumberTree(GenTree* tree) // A bounds check node has no value, but may throw exceptions. tree->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), vnpExcSet); + // next add the bounds check exception set for the current tree node + fgValueNumberAddExceptionSet(tree); + // Record non-constant value numbers that are used as the length argument to bounds checks, so that // assertion prop will know that comparisons against them are worth analyzing. ValueNum lengthVN = tree->AsBoundsChk()->gtArrLen->gtVNPair.GetConservative(); @@ -8276,6 +8324,76 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) } } +#ifdef FEATURE_SIMD +// Does value-numbering for a GT_SIMD node. +void Compiler::fgValueNumberSimd(GenTree* tree) +{ + assert(tree->OperGet() == GT_SIMD); + assert(tree->AsOp()->gtOp1 != nullptr); + + // SIMD unary or binary operator. + + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + ValueNumPair excSetPair; + ValueNumPair normalPair; + + if (tree->AsOp()->gtOp2 == nullptr) + { + // Unary SIMD nodes have a nullptr for op2. + excSetPair = op1Xvnp; + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + } + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +// Does value-numbering for a GT_HWINTRINSIC node +void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) +{ + assert(tree->OperGet() == GT_HWINTRINSIC); + assert(tree->AsOp()->gtOp1 != nullptr); + + // HWINTRINSIC unary or binary operator. + + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + ValueNumPair excSetPair; + ValueNumPair normalPair; + + if (tree->AsOp()->gtOp2 == nullptr) + { + // Unary SIMD nodes have a nullptr for op2. + excSetPair = op1Xvnp; + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + } + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); +} +#endif // FEATURE_HW_INTRINSICS + void Compiler::fgValueNumberCastTree(GenTree* tree) { assert(tree->OperGet() == GT_CAST); @@ -9355,6 +9473,47 @@ void Compiler::fgValueNumberAddExceptionSetForOverflow(GenTree* tree) tree->gtVNPair = vnStore->VNPWithExc(vnpTreeNorm, newExcSet); } +//-------------------------------------------------------------------------------- +// fgValueNumberAddExceptionSetForBoundsCheck +// - Adds the exception set for the current tree node +// which is performing an bounds check operation +// +// Arguments: +// tree - The current GenTree node, +// It must be a node that performs a bounds check operation +// +// Return Value: +// - The tree's gtVNPair is updated to include the +// VNF_IndexOutOfRangeExc exception set. +// +void Compiler::fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree) +{ + GenTreeBoundsChk* node = tree->AsBoundsChk(); + assert(node != nullptr); + + ValueNumPair vnpIndex = node->gtIndex->gtVNPair; + ValueNumPair vnpArrLen = node->gtArrLen->gtVNPair; + + // Unpack, Norm,Exc for the tree's VN + // + ValueNumPair vnpTreeNorm; + ValueNumPair vnpTreeExc; + + vnStore->VNPUnpackExc(tree->gtVNPair, &vnpTreeNorm, &vnpTreeExc); + + // Construct the exception set for bounds check + ValueNumPair boundsChkExcSet = vnStore->VNPExcSetSingleton( + vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), + vnStore->VNPNormalPair(vnpArrLen))); + + // Combine the new Overflow exception with the original exception set of tree + ValueNumPair newExcSet = vnStore->VNPExcSetUnion(vnpTreeExc, boundsChkExcSet); + + // Update the VN for the tree it, the updated VN for tree + // now includes the IndexOutOfRange exception. + tree->gtVNPair = vnStore->VNPWithExc(vnpTreeNorm, newExcSet); +} + //-------------------------------------------------------------------------------- // fgValueNumberAddExceptionSetForCkFinite // - Adds the exception set for the current tree node @@ -9419,9 +9578,27 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree) case GT_ADD: // An Overflow checking ALU operation case GT_SUB: case GT_MUL: + assert(tree->gtOverflowEx()); fgValueNumberAddExceptionSetForOverflow(tree); break; + case GT_DIV: + case GT_UDIV: + case GT_MOD: + case GT_UMOD: + fgValueNumberAddExceptionSetForDivision(tree); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + case GT_ARR_BOUNDS_CHECK: + fgValueNumberAddExceptionSetForBoundsCheck(tree); + break; + case GT_LCLHEAP: // It is not necessary to model the StackOverflow exception for GT_LCLHEAP break; @@ -9461,17 +9638,16 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree) fgValueNumberAddExceptionSetForIndirection(tree, tree->AsArrOffs()->gtArrObj); break; - case GT_DIV: - case GT_UDIV: - case GT_MOD: - case GT_UMOD: - fgValueNumberAddExceptionSetForDivision(tree); - break; - case GT_CKFINITE: fgValueNumberAddExceptionSetForCkFinite(tree); break; +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + // ToDo: model the exceptions for Intrinsics + break; +#endif // FEATURE_HW_INTRINSICS + default: assert(!"Handle this oper in fgValueNumberAddExceptionSet"); break; diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index 7e96d430a2f07..e032eba13d0af 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -157,6 +157,27 @@ ValueNumFuncDef(ADD_UN_OVF, 2, true, false, false) // unsigned overflow checkin ValueNumFuncDef(SUB_UN_OVF, 2, false, false, false) ValueNumFuncDef(MUL_UN_OVF, 2, true, false, false) +#define SIMD_INTRINSIC(m, i, id, n, r, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \ +ValueNumFuncDef(SIMD_##id, argCount, false, false, false) // All of the SIMD intrinsic (Consider isCommutativeSIMDIntrinsic) +#include "simdintrinsiclist.h" +#define VNF_SIMD_FIRST VNF_SIMD_None + +#if defined(_TARGET_XARCH_) +#define HARDWARE_INTRINSIC(id, name, isa, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ +ValueNumFuncDef(HWI_##id, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for x86/x64 +#include "hwintrinsiclistxarch.h" +#define VNF_HWI_FIRST VNF_HWI_Vector128_As + +#elif defined (_TARGET_ARM64_) +#define HARDWARE_INTRINSIC(isa, name, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ +ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of the HARDWARE_INTRINSICS form arm64 +#include "hwintrinsiclistarm64.h" +#define VNF_HWI_FIRST VNF_HWI_Vector64_AsByte + +#else +#error Unsupported platform +#endif + // clang-format on #undef ValueNumFuncDef From 367da1f5000fd03f356bb2f2b3789dceff61c5e0 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 30 Jan 2020 17:27:25 -0800 Subject: [PATCH 02/34] Allow SIMD and HW Intrinsics to be CSE candidates --- src/coreclr/src/jit/optcse.cpp | 19 ++++++++-- src/coreclr/src/jit/valuenum.cpp | 62 +++++++++++++++++++------------- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 4a9a40541304c..24f1192c51d0a 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -3280,10 +3280,25 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) case GT_LE: case GT_GE: case GT_GT: - return true; // Also CSE these Comparison Operators + return true; // Allow the CSE of Comparison operators + +#ifdef FEATURE_SIMD + case GT_SIMD: + return true; // allow SIMD intrinsics to be CSE-ed + +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + return true; // allow Hardware Intrinsics to be CSE-ed + +#endif // FEATURE_HW_INTRINSICS case GT_INTRINSIC: - return true; // Intrinsics + return true; // allow Intrinsics to be CSE-ed + + case GT_OBJ: + return varTypeIsEnregisterable(type); // Allow enregisterable GT_OBJ's to be CSE-ed. (i.e. SIMD types) case GT_COMMA: return true; // Allow GT_COMMA nodes to be CSE-ed. diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 29feb7f1a7700..3ea00efb3999f 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -1889,8 +1889,8 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V assert(arg0VN != NoVN && arg1VN != NoVN); assert(arg0VN == VNNormalValue(arg0VN)); // Arguments carry no exceptions. assert(arg1VN == VNNormalValue(arg1VN)); // Arguments carry no exceptions. - // assert(VNFuncArity(func) == 2); // or HWI_Shuffle - assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that. + assert(VNFuncArity(func) == 2); // or HWI_Shuffle + assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that. ValueNum resultVN; @@ -6746,8 +6746,8 @@ void Compiler::fgValueNumberTree(GenTree* tree) var_types typ = tree->TypeGet(); if (GenTree::OperIsConst(oper)) { - // If this is a struct assignment, with a constant rhs, it is an initBlk, and it is not - // really useful to value number the constant. + // If this is a struct assignment, with a constant rhs, (i,.e. an initBlk), + // it is not useful to value number the constant. if (tree->TypeGet() != TYP_STRUCT) { fgValueNumberTreeConst(tree); @@ -7059,8 +7059,8 @@ void Compiler::fgValueNumberTree(GenTree* tree) unsigned memorySsaNum; #endif - // Allow SIMD types to be value numbered - if ((oper == GT_ASG) && (tree->TypeGet() != TYP_STRUCT)) + // Allow assignments for all enregisterable types to be value numbered (SIMD types) + if ((oper == GT_ASG) && varTypeIsEnregisterable(tree)) { GenTree* lhs = tree->AsOp()->gtOp1; GenTree* rhs = tree->AsOp()->gtOp2; @@ -7630,7 +7630,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) } } // Other kinds of assignment: initblk and copyblk. - else if (oper == GT_ASG && varTypeIsStruct(tree)) + else if (oper == GT_ASG && (tree->TypeGet() == TYP_STRUCT)) { fgValueNumberBlockAssignment(tree); } @@ -7959,18 +7959,21 @@ void Compiler::fgValueNumberTree(GenTree* tree) { fgValueNumberIntrinsic(tree); } + #ifdef FEATURE_SIMD else if (tree->OperGet() == GT_SIMD) { - fgValueNumberSimd(tree); + fgValueNumberSimd(tree); } #endif // FEATURE_SIMD + #ifdef FEATURE_HW_INTRINSICS else if (tree->OperGet() == GT_HWINTRINSIC) { fgValueNumberHWIntrinsic(tree); } #endif // FEATURE_HW_INTRINSICS + else // Look up the VNFunc for the node { VNFunc vnf = GetVNFuncForNode(tree); @@ -8364,31 +8367,40 @@ void Compiler::fgValueNumberSimd(GenTree* tree) void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) { assert(tree->OperGet() == GT_HWINTRINSIC); - assert(tree->AsOp()->gtOp1 != nullptr); - - // HWINTRINSIC unary or binary operator. - - ValueNumPair op1vnp; - ValueNumPair op1Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); ValueNumPair excSetPair; ValueNumPair normalPair; - if (tree->AsOp()->gtOp2 == nullptr) + // There are some HWINTRINSICS that have zero args, i.e. NI_Vector128_Zero + if (tree->AsOp()->gtOp1 == nullptr) { // Unary SIMD nodes have a nullptr for op2. - excSetPair = op1Xvnp; - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + excSetPair = ValueNumStore::VNPForEmptyExcSet(); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree)); } else { - ValueNumPair op2vnp; - ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + // HWINTRINSIC unary or binary operator. - excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + if (tree->AsOp()->gtOp2 == nullptr) + { + // Unary SIMD nodes have a nullptr for op2. + excSetPair = op1Xvnp; + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + } } tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); } @@ -9491,7 +9503,7 @@ void Compiler::fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree) GenTreeBoundsChk* node = tree->AsBoundsChk(); assert(node != nullptr); - ValueNumPair vnpIndex = node->gtIndex->gtVNPair; + ValueNumPair vnpIndex = node->gtIndex->gtVNPair; ValueNumPair vnpArrLen = node->gtArrLen->gtVNPair; // Unpack, Norm,Exc for the tree's VN @@ -9504,7 +9516,7 @@ void Compiler::fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree) // Construct the exception set for bounds check ValueNumPair boundsChkExcSet = vnStore->VNPExcSetSingleton( vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), - vnStore->VNPNormalPair(vnpArrLen))); + vnStore->VNPNormalPair(vnpArrLen))); // Combine the new Overflow exception with the original exception set of tree ValueNumPair newExcSet = vnStore->VNPExcSetUnion(vnpTreeExc, boundsChkExcSet); From 57eadfbb5f25dd82022650822c4bcb56a41e60a8 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 4 Feb 2020 14:05:38 -0800 Subject: [PATCH 03/34] Correctness fix for optAssertionPropMain - Zero out the bbAssertionIn values, as these can be referenced in RangeCheck::MergeAssertion and this is shared state with the CSE phase: bbCseIn --- src/coreclr/src/jit/assertionprop.cpp | 9 ++++++++- src/coreclr/src/jit/hwintrinsiclistxarch.h | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp index f3f95ffe2e6d6..916921f36cd3f 100644 --- a/src/coreclr/src/jit/assertionprop.cpp +++ b/src/coreclr/src/jit/assertionprop.cpp @@ -5193,8 +5193,15 @@ void Compiler::optAssertionPropMain() } } - if (!optAssertionCount) + if (optAssertionCount == 0) { + // Zero out the bbAssertionIn values, as these can be referenced in RangeCheck::MergeAssertion + // and this is sharedstate with the CSE phase: bbCseIn + // + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + block->bbAssertionIn = BitVecOps::MakeEmpty(apTraits); + } return; } diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 42eda73b5d783..7bd8c15142743 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -483,10 +483,10 @@ HARDWARE_INTRINSIC(AVX2_ConvertToUInt32, "ConvertToUI HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16, "ConvertToVector256Int16", AVX2, -1, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVector256Int32", AVX2, -1, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_HorizontalAdd, "HorizontalAdd", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate, "HorizontalAddSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalSubtract", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) From 6d2028c36db0792c6fbb2342abb1fe6b8492054d Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 4 Feb 2020 14:08:34 -0800 Subject: [PATCH 04/34] Improve the VNFOA_ArityMask --- src/coreclr/src/jit/valuenum.cpp | 121 ++++++++++++++++++++++--------- src/coreclr/src/jit/valuenum.h | 7 +- 2 files changed, 90 insertions(+), 38 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 3ea00efb3999f..635cc0bfc9c42 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5251,9 +5251,9 @@ UINT8* ValueNumStore::s_vnfOpAttribs = nullptr; void ValueNumStore::InitValueNumStoreStatics() { - // Make sure we've gotten constants right... - assert(unsigned(VNFOA_Arity) == (1 << VNFOA_ArityShift)); - assert(unsigned(VNFOA_AfterArity) == (unsigned(VNFOA_Arity) << VNFOA_ArityBits)); + // Make sure we have the constants right... + assert(unsigned(VNFOA_Arity1) == (1 << VNFOA_ArityShift)); + assert(VNFOA_ArityMask == (VNFOA_MaxArity << VNFOA_ArityShift)); s_vnfOpAttribs = &vnfOpAttribs[0]; for (unsigned i = 0; i < GT_COUNT; i++) @@ -5285,19 +5285,30 @@ void ValueNumStore::InitValueNumStoreStatics() int vnfNum = VNF_Boundary + 1; // The macro definition below will update this after using it. -#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \ - if (commute) \ - vnfOpAttribs[vnfNum] |= VNFOA_Commutative; \ - if (knownNonNull) \ - vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ - if (sharedStatic) \ - vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ +#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \ + if (commute) \ + vnfOpAttribs[vnfNum] |= VNFOA_Commutative; \ + if (knownNonNull) \ + vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ + if (sharedStatic) \ + vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ + vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ vnfNum++; #include "valuenumfuncs.h" #undef ValueNumFuncDef + assert(vnfNum == VNF_COUNT); + +#define ValueNumFuncSetArity(vnfNum, arity) \ + vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ + vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); /* set the new arity value */ + + // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC it always has two operands. + ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2) + +#undef ValueNumFuncSetArity + for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) { vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp; @@ -8332,31 +8343,50 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) void Compiler::fgValueNumberSimd(GenTree* tree) { assert(tree->OperGet() == GT_SIMD); - assert(tree->AsOp()->gtOp1 != nullptr); - - // SIMD unary or binary operator. - - ValueNumPair op1vnp; - ValueNumPair op1Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + GenTreeSIMD* simdNode = tree->AsSIMD(); + assert(simdNode != nullptr); ValueNumPair excSetPair; ValueNumPair normalPair; - if (tree->AsOp()->gtOp2 == nullptr) + // There are some SIMD operations that have zero args, i.e. NI_Vector128_Zero + if (tree->AsOp()->gtOp1 == nullptr) { - // Unary SIMD nodes have a nullptr for op2. - excSetPair = op1Xvnp; - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + excSetPair = ValueNumStore::VNPForEmptyExcSet(); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree)); } - else + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST)) { - ValueNumPair op2vnp; - ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + assert(tree->AsOp()->gtOp2 == nullptr); + + // We have a SIMD node in the GT_LIST form with 3 or more args + // For now we will generate a unique value number for this case. - excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + // Generate unique VN + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + return; + } + else // SIMD unary or binary operator. + { + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + if (tree->AsOp()->gtOp2 == nullptr) + { + // Unary SIMD nodes have a nullptr for op2. + excSetPair = op1Xvnp; + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + } } tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); } @@ -8367,39 +8397,60 @@ void Compiler::fgValueNumberSimd(GenTree* tree) void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) { assert(tree->OperGet() == GT_HWINTRINSIC); + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode); + VNFunc func = GetVNFuncForNode(tree); + unsigned fixedArity = vnStore->VNFuncArity(func); + // + // fixedArity is the numArgs column in "hwinstrinsiclistxarch.h" + // a -1 value is translated into an unsigned value of 7 ValueNumPair excSetPair; ValueNumPair normalPair; - // There are some HWINTRINSICS that have zero args, i.e. NI_Vector128_Zero + // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero if (tree->AsOp()->gtOp1 == nullptr) { - // Unary SIMD nodes have a nullptr for op2. + assert(fixedArity == 0); excSetPair = ValueNumStore::VNPForEmptyExcSet(); - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree)); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func); + assert(lookupNumArgs == 0); } - else + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (fixedArity == 7)) { - // HWINTRINSIC unary or binary operator. + // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args + // Or the numArgs waspecifdied as -1 in the numArgs column in "hwinstrinsiclistxarch.h" + // For now we will generate a unique value number for this case. + // Generate unique VN + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + return; + } + else // HWINTRINSIC unary or binary operator. + { ValueNumPair op1vnp; ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); if (tree->AsOp()->gtOp2 == nullptr) { + assert(fixedArity == 1); // Unary SIMD nodes have a nullptr for op2. excSetPair = op1Xvnp; - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp); + assert(lookupNumArgs == 1); } else { + assert(fixedArity == 2); ValueNumPair op2vnp; ValueNumPair op2Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp); + assert(lookupNumArgs == 2); } } tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index fdb6268892193..411669912c127 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -134,8 +134,9 @@ class ValueNumStore { VNFOA_IllegalGenTreeOp = 0x1, // corresponds to a genTreeOps value that is not a legal VN func. VNFOA_Commutative = 0x2, // 1 iff the function is commutative. - VNFOA_Arity = 0x4, // Bits 2..3 encode the arity. - VNFOA_AfterArity = 0x20, // Makes it clear what value the next flag(s) after Arity should have. + VNFOA_Arity1 = 0x4, // Bits 2,3,4 encode the arity. + VNFOA_Arity2 = 0x8, // Bits 2,3,4 encode the arity. + VNFOA_Arity4 = 0x10, // Bits 2,3,4 encode the arity. VNFOA_KnownNonNull = 0x20, // 1 iff the result is known to be non-null. VNFOA_SharedStatic = 0x40, // 1 iff this VNF is represent one of the shared static jit helpers }; @@ -143,7 +144,7 @@ class ValueNumStore static const unsigned VNFOA_ArityShift = 2; static const unsigned VNFOA_ArityBits = 3; static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent. - static const unsigned VNFOA_ArityMask = VNFOA_AfterArity - VNFOA_Arity; + static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2| VNFOA_Arity1); // These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper enum VNFCastAttrib From 0d0e4f82d53dcffc65714d4a9dd95e19ea12ea25 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 4 Feb 2020 18:26:31 -0800 Subject: [PATCH 05/34] Added check for simdNode->OperIsMemoryLoad()) to fgValueNumberSimd Added bool OperIsMemoryLoad() to GenTreeSIMD, returns true for SIMDIntrinsicInitArray Added valuenumfuncs.h to src/coreclr/src/jit/CMakeLists.txt --- src/coreclr/src/jit/CMakeLists.txt | 1 + src/coreclr/src/jit/gentree.cpp | 19 ++++++ src/coreclr/src/jit/gentree.h | 3 + src/coreclr/src/jit/optcse.cpp | 2 +- src/coreclr/src/jit/valuenum.cpp | 96 ++++++++++++++++++++++------- src/coreclr/src/jit/valuenum.h | 2 +- src/coreclr/src/jit/valuenumfuncs.h | 2 + 7 files changed, 101 insertions(+), 24 deletions(-) diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index b63e08ae2436d..9d1dbe0110dc6 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -186,6 +186,7 @@ if (CLR_CMAKE_TARGET_WIN32) unwind.h utils.h valuenum.h + valuenumfuncs.h valuenumtype.h varset.h vartype.h diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index a9aac9d6d4c22..173b5fc2341b9 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -5398,6 +5398,13 @@ bool GenTree::OperIsImplicitIndir() const case GT_ARR_ELEM: case GT_ARR_OFFSET: return true; +#ifdef FEATURE_SIMD + case GT_SIMD: + { + GenTreeSIMD* simdNode = (const_cast(this))->AsSIMD(); + return simdNode->OperIsMemoryLoad(); + } +#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { @@ -18326,6 +18333,18 @@ GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORI return node; } +// Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise +bool GenTreeSIMD::OperIsMemoryLoad() +{ +#ifdef _TARGET_XARCH_ + if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + return true; + } +#endif // _TARGET_XARCH_ + return false; +} + // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeHWIntrinsic::OperIsMemoryLoad() { diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 755b5ee4c5d0f..4011361c295e9 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -4536,6 +4536,9 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic { } + bool OperIsMemoryLoad(); // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, + // false otherwise + #if DEBUGGABLE_GENTREE GenTreeSIMD() : GenTreeJitIntrinsic() { diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 24f1192c51d0a..2b37feeef5811 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -3295,7 +3295,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) #endif // FEATURE_HW_INTRINSICS case GT_INTRINSIC: - return true; // allow Intrinsics to be CSE-ed + return true; // allow Intrinsics to be CSE-ed case GT_OBJ: return varTypeIsEnregisterable(type); // Allow enregisterable GT_OBJ's to be CSE-ed. (i.e. SIMD types) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 635cc0bfc9c42..8d3d47d325aa1 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -1889,8 +1889,8 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V assert(arg0VN != NoVN && arg1VN != NoVN); assert(arg0VN == VNNormalValue(arg0VN)); // Arguments carry no exceptions. assert(arg1VN == VNNormalValue(arg1VN)); // Arguments carry no exceptions. - assert(VNFuncArity(func) == 2); // or HWI_Shuffle - assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that. + assert(VNFuncArity(func) == 2); + assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that. ValueNum resultVN; @@ -5285,14 +5285,14 @@ void ValueNumStore::InitValueNumStoreStatics() int vnfNum = VNF_Boundary + 1; // The macro definition below will update this after using it. -#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \ - if (commute) \ - vnfOpAttribs[vnfNum] |= VNFOA_Commutative; \ - if (knownNonNull) \ - vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ - if (sharedStatic) \ - vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ +#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \ + if (commute) \ + vnfOpAttribs[vnfNum] |= VNFOA_Commutative; \ + if (knownNonNull) \ + vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ + if (sharedStatic) \ + vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ + vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ vnfNum++; #include "valuenumfuncs.h" @@ -5300,16 +5300,18 @@ void ValueNumStore::InitValueNumStoreStatics() assert(vnfNum == VNF_COUNT); -#define ValueNumFuncSetArity(vnfNum, arity) \ - vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); /* set the new arity value */ +#define ValueNumFuncSetArity(vnfNum, arity) \ + vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ + vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); /* set the new arity value */ - // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC it always has two operands. +#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_) + // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2) +#endif #undef ValueNumFuncSetArity - for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) + for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) { vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp; } @@ -6353,7 +6355,16 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: - tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->AsIntConCommon()->LngValue()))); + +#ifdef _TARGET_64BIT_ + // Only zero SIMD constants are currently allowed + // + assert(tree->AsIntConCommon()->LngValue() == 0); + tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue())); +#else // 32BIT + assert(tree->AsIntConCommon()->IconValue() == 0); + tree->gtVNPair.SetBoth(vnStore->VNForIntCon(tree->AsIntConCommon()->IconValue())); +#endif break; #endif // FEATURE_SIMD @@ -8345,7 +8356,6 @@ void Compiler::fgValueNumberSimd(GenTree* tree) assert(tree->OperGet() == GT_SIMD); GenTreeSIMD* simdNode = tree->AsSIMD(); assert(simdNode != nullptr); - ValueNumPair excSetPair; ValueNumPair normalPair; @@ -8362,7 +8372,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) // We have a SIMD node in the GT_LIST form with 3 or more args // For now we will generate a unique value number for this case. - // Generate unique VN + // Generate a unique VN tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); return; } @@ -8372,6 +8382,48 @@ void Compiler::fgValueNumberSimd(GenTree* tree) ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); +#ifdef _TARGET_XARCH_ + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + // rationalize rewrites this as an explicit load with op1 as the base address + + ValueNumPair op2vnp; + if (tree->AsOp()->gtOp2 == nullptr) + { + // a nullptr for op2 means that we have an impicit index of zero + op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); + + excSetPair = op1Xvnp; + } + else // We have an explicit index in op2 + { + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + } + + ValueNum addrVN = + vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); +#ifdef DEBUG + if (verbose) + { + printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); + vnPrint(addrVN, 0); + } +#endif // DEBUG + + // The address points into the heap, so it is an ByrefExposed load. + // + ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); + tree->gtVNPair.SetLiberal(loadVN); + tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); + fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + return; + } +#endif // _TARGET_XARCH_ + if (tree->AsOp()->gtOp2 == nullptr) { // Unary SIMD nodes have a nullptr for op2. @@ -8399,9 +8451,9 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) assert(tree->OperGet() == GT_HWINTRINSIC); GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); assert(hwIntrinsicNode != nullptr); - int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode); - VNFunc func = GetVNFuncForNode(tree); - unsigned fixedArity = vnStore->VNFuncArity(func); + int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode); + VNFunc func = GetVNFuncForNode(tree); + unsigned fixedArity = vnStore->VNFuncArity(func); // // fixedArity is the numArgs column in "hwinstrinsiclistxarch.h" // a -1 value is translated into an unsigned value of 7 @@ -8423,7 +8475,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) // Or the numArgs waspecifdied as -1 in the numArgs column in "hwinstrinsiclistxarch.h" // For now we will generate a unique value number for this case. - // Generate unique VN + // Generate unique VN tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); return; } diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index 411669912c127..2b0d239719b74 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -144,7 +144,7 @@ class ValueNumStore static const unsigned VNFOA_ArityShift = 2; static const unsigned VNFOA_ArityBits = 3; static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent. - static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2| VNFOA_Arity1); + static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2 | VNFOA_Arity1); // These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper enum VNFCastAttrib diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index e032eba13d0af..0f9292b9447ea 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -174,6 +174,8 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of t #include "hwintrinsiclistarm64.h" #define VNF_HWI_FIRST VNF_HWI_Vector64_AsByte +#elif defined (_TARGET_ARM_) +// No Hardware Intrinsics on ARM32 #else #error Unsupported platform #endif From 0232044c2d360ded65bfb18d456f5da0556be28b Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 5 Feb 2020 16:42:42 -0800 Subject: [PATCH 06/34] Update to use the new TARGET macros --- src/coreclr/src/jit/gentree.cpp | 4 ++-- src/coreclr/src/jit/valuenum.cpp | 10 +++++----- src/coreclr/src/jit/valuenumfuncs.h | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 173b5fc2341b9..13ca13612e12f 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -18336,12 +18336,12 @@ GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORI // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeSIMD::OperIsMemoryLoad() { -#ifdef _TARGET_XARCH_ +#ifdef TARGET_XARCH if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) { return true; } -#endif // _TARGET_XARCH_ +#endif // TARGET_XARCH return false; } diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 8d3d47d325aa1..37397c524f1ea 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5304,7 +5304,7 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); /* set the new arity value */ -#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_) +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2) #endif @@ -6356,8 +6356,8 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) case TYP_SIMD16: case TYP_SIMD32: -#ifdef _TARGET_64BIT_ - // Only zero SIMD constants are currently allowed +#ifdef TARGET_64BIT + // Only the zero constant is currently allowed for SIMD types // assert(tree->AsIntConCommon()->LngValue() == 0); tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue())); @@ -8382,7 +8382,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); -#ifdef _TARGET_XARCH_ +#ifdef TARGET_XARCH if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) { // rationalize rewrites this as an explicit load with op1 as the base address @@ -8422,7 +8422,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); return; } -#endif // _TARGET_XARCH_ +#endif // TARGET_XARCH if (tree->AsOp()->gtOp2 == nullptr) { diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index 0f9292b9447ea..f72181a5b211a 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -162,19 +162,19 @@ ValueNumFuncDef(SIMD_##id, argCount, false, false, false) // All of the SIMD i #include "simdintrinsiclist.h" #define VNF_SIMD_FIRST VNF_SIMD_None -#if defined(_TARGET_XARCH_) +#if defined(TARGET_XARCH) #define HARDWARE_INTRINSIC(id, name, isa, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ ValueNumFuncDef(HWI_##id, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for x86/x64 #include "hwintrinsiclistxarch.h" #define VNF_HWI_FIRST VNF_HWI_Vector128_As -#elif defined (_TARGET_ARM64_) +#elif defined (TARGET_ARM64) #define HARDWARE_INTRINSIC(isa, name, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of the HARDWARE_INTRINSICS form arm64 #include "hwintrinsiclistarm64.h" #define VNF_HWI_FIRST VNF_HWI_Vector64_AsByte -#elif defined (_TARGET_ARM_) +#elif defined (TARGET_ARM) // No Hardware Intrinsics on ARM32 #else #error Unsupported platform From d8b9b53efc5782a451d3836ce354da376872fdad Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 6 Feb 2020 12:00:58 -0800 Subject: [PATCH 07/34] Include node type when value numbering SIMDIntrinsicInit Mutate the gloabl heap when performing a HW_INTRINSIC memory store operation Printing of SIMD constants only support 0 --- src/coreclr/src/jit/valuenum.cpp | 99 ++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 37397c524f1ea..7b2d645e43f0d 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5028,16 +5028,11 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) case TYP_SIMD16: case TYP_SIMD32: { + // Only the zero constant is currently allowed for SIMD types + // INT64 val = ConstantValue(vn); - printf("SimdCns: "); - if ((val > -1000) && (val < 1000)) - { - printf(" %ld", val); - } - else - { - printf(" 0x%llx", val); - } + assert(val == 0); + printf(" 0"); } break; #endif // FEATURE_SIMD @@ -8383,45 +8378,65 @@ void Compiler::fgValueNumberSimd(GenTree* tree) vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); #ifdef TARGET_XARCH - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + switch (simdNode->gtSIMDIntrinsicID) { - // rationalize rewrites this as an explicit load with op1 as the base address - - ValueNumPair op2vnp; - if (tree->AsOp()->gtOp2 == nullptr) + case SIMDIntrinsicInitArray: { - // a nullptr for op2 means that we have an impicit index of zero - op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); + // rationalize rewrites this as an explicit load with op1 as the base address - excSetPair = op1Xvnp; - } - else // We have an explicit index in op2 - { - ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + ValueNumPair op2vnp; + if (tree->AsOp()->gtOp2 == nullptr) + { + // a nullptr for op2 means that we have an impicit index of zero + op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); - excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - } + excSetPair = op1Xvnp; + } + else // We have an explicit index in op2 + { + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + } - ValueNum addrVN = - vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); + ValueNum addrVN = + vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); #ifdef DEBUG - if (verbose) + if (verbose) + { + printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); + vnPrint(addrVN, 0); + } +#endif // DEBUG + + // The address points into the heap, so it is an ByrefExposed load. + // + ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); + tree->gtVNPair.SetLiberal(loadVN); + tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); + fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + return; + } + + case SIMDIntrinsicInit: { - printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); - vnPrint(addrVN, 0); + // Also encode the resulting type as op2vnp + ValueNumPair op2vnp; + op2vnp.SetBoth(vnStore->VNForIntCon(INT32(tree->TypeGet()))); + + excSetPair = op1Xvnp; + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); + return; } -#endif // DEBUG - // The address points into the heap, so it is an ByrefExposed load. - // - ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); - tree->gtVNPair.SetLiberal(loadVN); - tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); - tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); - fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); - return; + default: + // Handled below as a normal SIMD unary or binary node + break; } + #endif // TARGET_XARCH if (tree->AsOp()->gtOp2 == nullptr) @@ -8451,6 +8466,14 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) assert(tree->OperGet() == GT_HWINTRINSIC); GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); assert(hwIntrinsicNode != nullptr); + + // For safety/correctness we must mutate the global heap valuenumber + // for any HW intrinsic that performs a memory store operation + if (hwIntrinsicNode->OperIsMemoryStore()) + { + fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); + } + int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode); VNFunc func = GetVNFuncForNode(tree); unsigned fixedArity = vnStore->VNFuncArity(func); From e188b9b2a0d1159d783401956814bc3c967a70bf Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 7 Feb 2020 14:31:59 -0800 Subject: [PATCH 08/34] Disable CSE's for some special HW_INTRINSIC categories --- src/coreclr/src/jit/optcse.cpp | 31 ++++++++++++++++++++++++++++++ src/coreclr/src/jit/valuenum.cpp | 33 +------------------------------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 2b37feeef5811..72d4f58daa1ac 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -3290,6 +3290,37 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwIntrinsicNode->gtHWIntrinsicId); + + switch (category) + { + case HW_Category_SimpleSIMD: + case HW_Category_IMM: + case HW_Category_Scalar: + case HW_Category_SIMDScalar: + case HW_Category_Helper: + break; + + case HW_Category_MemoryLoad: + case HW_Category_MemoryStore: + case HW_Category_Special: + default: + return false; + } + + if (hwIntrinsicNode->OperIsMemoryStore()) + { + // NI_BMI2_MultiplyNoFlags, etc... + return false; + } + if (hwIntrinsicNode->OperIsMemoryLoad()) + { + // NI_AVX2_BroadcastScalarToVector128, NI_AVX2_GatherVector128, etc... + return false; + } + return true; // allow Hardware Intrinsics to be CSE-ed #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 7b2d645e43f0d..e2be215f7e3a3 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -6729,37 +6729,6 @@ void Compiler::fgValueNumberTree(GenTree* tree) { genTreeOps oper = tree->OperGet(); -#if 0 -#ifdef FEATURE_SIMD - // TODO-CQ: For now TYP_SIMD values are not handled by value numbering to be amenable for CSE'ing. - if (oper == GT_SIMD) - { - tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); - return; - } -#endif - -#ifdef FEATURE_HW_INTRINSICS - if (oper == GT_HWINTRINSIC) - { - // TODO-CQ: For now hardware intrinsics are not handled by value numbering to be amenable for CSE'ing. - tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); - - GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); - assert(hwIntrinsicNode != nullptr); - - // For safety/correctness we must mutate the global heap valuenumber - // for any HW intrinsic that performs a memory store operation - if (hwIntrinsicNode->OperIsMemoryStore()) - { - fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); - } - - return; - } -#endif // FEATURE_HW_INTRINSICS -#endif // 0 - var_types typ = tree->TypeGet(); if (GenTree::OperIsConst(oper)) { @@ -8495,7 +8464,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (fixedArity == 7)) { // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args - // Or the numArgs waspecifdied as -1 in the numArgs column in "hwinstrinsiclistxarch.h" + // Or the numArgs was specified as -1 in the numArgs column in "hwinstrinsiclistxarch.h" // For now we will generate a unique value number for this case. // Generate unique VN From 506a544a06e45dfa60442ad92edcdd92e8a98c66 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 7 Feb 2020 17:22:51 -0800 Subject: [PATCH 09/34] Code review feedback --- src/coreclr/src/jit/gentree.cpp | 36 ++++++++++++++++----------------- src/coreclr/src/jit/gentree.h | 12 +++++------ src/coreclr/src/jit/optcse.cpp | 2 ++ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 13ca13612e12f..7b1a66ba678ee 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -5401,15 +5401,13 @@ bool GenTree::OperIsImplicitIndir() const #ifdef FEATURE_SIMD case GT_SIMD: { - GenTreeSIMD* simdNode = (const_cast(this))->AsSIMD(); - return simdNode->OperIsMemoryLoad(); + return AsSIMD()->OperIsMemoryLoad(); } #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { - GenTreeHWIntrinsic* hwIntrinsicNode = (const_cast(this))->AsHWIntrinsic(); - return hwIntrinsicNode->OperIsMemoryLoadOrStore(); + return AsHWIntrinsic()->OperIsMemoryLoadOrStore(); } #endif // FEATURE_HW_INTRINSICS default: @@ -18125,6 +18123,18 @@ bool GenTree::isCommutativeSIMDIntrinsic() return false; } } + +// Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise +bool GenTreeSIMD::OperIsMemoryLoad() const +{ +#ifdef TARGET_XARCH + if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + return true; + } +#endif // TARGET_XARCH + return false; +} #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS @@ -18333,20 +18343,8 @@ GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORI return node; } -// Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeSIMD::OperIsMemoryLoad() -{ -#ifdef TARGET_XARCH - if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) - { - return true; - } -#endif // TARGET_XARCH - return false; -} - // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryLoad() +bool GenTreeHWIntrinsic::OperIsMemoryLoad() const { #ifdef TARGET_XARCH // Some xarch instructions have MemoryLoad sematics @@ -18388,7 +18386,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() } // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryStore() +bool GenTreeHWIntrinsic::OperIsMemoryStore() const { #ifdef TARGET_XARCH // Some xarch instructions have MemoryStore sematics @@ -18423,7 +18421,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() } // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() +bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() const { #ifdef TARGET_XARCH return OperIsMemoryLoad() || OperIsMemoryStore(); diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 4011361c295e9..5b548a6a74658 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -4536,8 +4536,8 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic { } - bool OperIsMemoryLoad(); // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, - // false otherwise + bool OperIsMemoryLoad() const; // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, + // false otherwise #if DEBUGGABLE_GENTREE GenTreeSIMD() : GenTreeJitIntrinsic() @@ -4587,12 +4587,12 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic // However there are HW Instrinsic instructions that have 3 or even 4 operands and this is // supported using a single op1 and using an ArgList for it: gtNewArgList(op1, op2, op3) - bool OperIsMemoryLoad(); // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, + bool OperIsMemoryLoad() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, // false otherwise - bool OperIsMemoryStore(); // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, + bool OperIsMemoryStore() const; // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, // false otherwise - bool OperIsMemoryLoadOrStore(); // Returns true for the HW Instrinsic instructions that have MemoryLoad or - // MemoryStore semantics, false otherwise + bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad or + // MemoryStore semantics, false otherwise #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 72d4f58daa1ac..20174652f5437 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -3290,6 +3290,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: + { GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); assert(hwIntrinsicNode != nullptr); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwIntrinsicNode->gtHWIntrinsicId); @@ -3322,6 +3323,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) } return true; // allow Hardware Intrinsics to be CSE-ed + } #endif // FEATURE_HW_INTRINSICS From 9c62167214b6e0c36f42b6ec45162095c8f02d04 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 11 Feb 2020 18:20:27 -0800 Subject: [PATCH 10/34] Record csdStructHnd; // The class handle, currently needed to create a SIMD LclVar in PerformCSE --- src/coreclr/src/jit/compiler.h | 2 ++ src/coreclr/src/jit/optcse.cpp | 26 +++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 3fbe532d45403..878eb00711205 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -6176,6 +6176,8 @@ class Compiler treeStmtLst* csdTreeList; // list of matching tree nodes: head treeStmtLst* csdTreeLast; // list of matching tree nodes: tail + CORINFO_CLASS_HANDLE csdStructHnd; // The class handle, currently needed to create a SIMD LclVar in PerformCSE + ValueNum defExcSetPromise; // The exception set that is now required for all defs of this CSE. // This will be set to NoVN if we decide to abandon this CSE diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 20174652f5437..d80c354bd0357 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -498,8 +498,9 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) /* Start the list with the first CSE candidate recorded */ - hashDsc->csdTreeList = newElem; - hashDsc->csdTreeLast = newElem; + hashDsc->csdTreeList = newElem; + hashDsc->csdTreeLast = newElem; + hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); } noway_assert(hashDsc->csdTreeList); @@ -516,6 +517,20 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeLast->tslNext = newElem; hashDsc->csdTreeLast = newElem; + CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); + if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) + { + // The previous node(s) were GT_IND's and didn't carry the struct handle info + // The current node does hanve the struct handle info, so record it now + // + hashDsc->csdStructHnd = newElemStructHnd; + } + else + { + // Otherwise we should have a matching struct handle + assert(hashDsc->csdStructHnd == newElemStructHnd); + } + optDoCSE = true; // Found a duplicate CSE tree /* Have we assigned a CSE index? */ @@ -2558,9 +2573,10 @@ class CSE_Heuristic var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); if (varTypeIsStruct(cseLclVarTyp)) { - // After call args have been morphed, we don't need a handle for SIMD types. - // They are only required where the size is not implicit in the type and/or there are GC refs. - CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(successfulCandidate->Expr()); + // Retrieve the struct handle that we recorded while bulding the list of CSE candidates. + // If all occurances were in GT_IND nodes it could still be NO_CLASS_HANDLE + // + CORINFO_CLASS_HANDLE structHnd = successfulCandidate->CseDsc()->csdStructHnd; assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT)); if (structHnd != NO_CLASS_HANDLE) { From d9dda0a5a904e3e6f7b81623c13b6fa121afd33b Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 12 Feb 2020 11:26:43 -0800 Subject: [PATCH 11/34] Avoid calling gtGetStructHandleIfPresent to set csdStructHnd when we have a GT_IND node --- src/coreclr/src/jit/optcse.cpp | 38 +++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index d80c354bd0357..904329e7e5943 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -500,7 +500,15 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeList = newElem; hashDsc->csdTreeLast = newElem; - hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); + hashDsc->csdStructHnd = NO_CLASS_HANDLE; + + // When we have a GT_IND node we don't have a reliable struct handle + // and gtGetStructHandleIfPresent will return a guess that can be wrong + // + if (hashDsc->csdTree->OperGet() != GT_IND) + { + hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); + } } noway_assert(hashDsc->csdTreeList); @@ -517,18 +525,24 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeLast->tslNext = newElem; hashDsc->csdTreeLast = newElem; - CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); - if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) - { - // The previous node(s) were GT_IND's and didn't carry the struct handle info - // The current node does hanve the struct handle info, so record it now - // - hashDsc->csdStructHnd = newElemStructHnd; - } - else + // When we have a GT_IND node we don't have a reliable struct handle + // and gtGetStructHandleIfPresent will return a guess that can be wrong + // + if (newElem->tslTree->OperGet() != GT_IND) { - // Otherwise we should have a matching struct handle - assert(hashDsc->csdStructHnd == newElemStructHnd); + CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); + if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) + { + // The previous node(s) were GT_IND's and didn't carry the struct handle info + // The current node does hanve the struct handle info, so record it now + // + hashDsc->csdStructHnd = newElemStructHnd; + } + else + { + // Otherwise we should have a matching struct handle + assert(hashDsc->csdStructHnd == newElemStructHnd); + } } optDoCSE = true; // Found a duplicate CSE tree From f6d7f6588c3d5630644e97558f8b2ec17bc8d35a Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 12 Feb 2020 12:29:37 -0800 Subject: [PATCH 12/34] Instead of asserting on a struct handle mismatch, we record it in csdStructHndMismatch and avoid making the candidate into a CSE --- src/coreclr/src/jit/compiler.h | 1 + src/coreclr/src/jit/optcse.cpp | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 878eb00711205..6ad2f38439773 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -6177,6 +6177,7 @@ class Compiler treeStmtLst* csdTreeLast; // list of matching tree nodes: tail CORINFO_CLASS_HANDLE csdStructHnd; // The class handle, currently needed to create a SIMD LclVar in PerformCSE + bool csdStructHndMismatch; ValueNum defExcSetPromise; // The exception set that is now required for all defs of this CSE. // This will be set to NoVN if we decide to abandon this CSE diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 904329e7e5943..cf548ddb62679 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -502,6 +502,8 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeLast = newElem; hashDsc->csdStructHnd = NO_CLASS_HANDLE; + hashDsc->csdStructHndMismatch = false; + // When we have a GT_IND node we don't have a reliable struct handle // and gtGetStructHandleIfPresent will return a guess that can be wrong // @@ -540,8 +542,14 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) } else { - // Otherwise we should have a matching struct handle - assert(hashDsc->csdStructHnd == newElemStructHnd); + hashDsc->csdStructHndMismatch = true; +#ifdef DEBUG + if (verbose) + { + printf("Abandoned - CSE candidate has mismatching struct handles!\n"); + printTreeID(newElem->tslTree); + } +#endif // DEBUG } } @@ -3010,6 +3018,12 @@ class CSE_Heuristic continue; } + if (dsc->csdStructHndMismatch) + { + JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n"); + continue; + } + CSE_Candidate candidate(this, dsc); candidate.InitializeCounts(); From 8e3cea8ccc531d242741f0c74f85ff59155151e0 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 12 Feb 2020 12:40:15 -0800 Subject: [PATCH 13/34] Fix the JITDUMP messages to print the CseIndex --- src/coreclr/src/jit/optcse.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index cf548ddb62679..d9f8a005a3d60 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -3012,20 +3012,20 @@ class CSE_Heuristic for (; (cnt > 0); cnt--, ptr++) { Compiler::CSEdsc* dsc = *ptr; + CSE_Candidate candidate(this, dsc); + if (dsc->defExcSetPromise == ValueNumStore::NoVN) { - JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n"); + JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n", candidate.CseIndex()); continue; } if (dsc->csdStructHndMismatch) { - JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n"); + JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n", candidate.CseIndex()); continue; } - CSE_Candidate candidate(this, dsc); - candidate.InitializeCounts(); if (candidate.UseCount() == 0) From 3769197ffa138a13de5e09beb2bf7d5139591dc4 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 12 Feb 2020 12:44:45 -0800 Subject: [PATCH 14/34] add check for (newElemStructHnd != NO_CLASS_HANDLE) --- src/coreclr/src/jit/optcse.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index d9f8a005a3d60..444619557ce94 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -533,23 +533,26 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) if (newElem->tslTree->OperGet() != GT_IND) { CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); - if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) + if (newElemStructHnd != NO_CLASS_HANDLE) { - // The previous node(s) were GT_IND's and didn't carry the struct handle info - // The current node does hanve the struct handle info, so record it now - // - hashDsc->csdStructHnd = newElemStructHnd; - } - else - { - hashDsc->csdStructHndMismatch = true; -#ifdef DEBUG - if (verbose) + if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) { - printf("Abandoned - CSE candidate has mismatching struct handles!\n"); - printTreeID(newElem->tslTree); + // The previous node(s) were GT_IND's and didn't carry the struct handle info + // The current node does hanve the struct handle info, so record it now + // + hashDsc->csdStructHnd = newElemStructHnd; } + else + { + hashDsc->csdStructHndMismatch = true; +#ifdef DEBUG + if (verbose) + { + printf("Abandoned - CSE candidate has mismatching struct handles!\n"); + printTreeID(newElem->tslTree); + } #endif // DEBUG + } } } @@ -3012,7 +3015,7 @@ class CSE_Heuristic for (; (cnt > 0); cnt--, ptr++) { Compiler::CSEdsc* dsc = *ptr; - CSE_Candidate candidate(this, dsc); + CSE_Candidate candidate(this, dsc); if (dsc->defExcSetPromise == ValueNumStore::NoVN) { From b9d0a58da70f5b69416fdc64bd4f47aaba70002e Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 12 Feb 2020 15:14:33 -0800 Subject: [PATCH 15/34] Fix check for (newElemStructHnd != hashDsc->csdStructHnd) --- src/coreclr/src/jit/optcse.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 444619557ce94..2f204b49e8c3b 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -538,11 +538,11 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) { // The previous node(s) were GT_IND's and didn't carry the struct handle info - // The current node does hanve the struct handle info, so record it now + // The current node does have the struct handle info, so record it now // hashDsc->csdStructHnd = newElemStructHnd; } - else + else if (newElemStructHnd != hashDsc->csdStructHnd) { hashDsc->csdStructHndMismatch = true; #ifdef DEBUG From 3c3f2fe4ee4956b49c4a6160981016ca71f55f2e Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 12 Feb 2020 17:31:45 -0800 Subject: [PATCH 16/34] Additional checks for SIMD struct types when setting csdStructHnd Added Mismatched Struct Handle assert in ConsiderCandidates --- src/coreclr/src/jit/optcse.cpp | 55 +++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 2f204b49e8c3b..9c5510b8e2472 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -504,12 +504,15 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdStructHndMismatch = false; - // When we have a GT_IND node we don't have a reliable struct handle - // and gtGetStructHandleIfPresent will return a guess that can be wrong - // - if (hashDsc->csdTree->OperGet() != GT_IND) + if (varTypeIsStruct(tree->gtType)) { - hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); + // When we have a GT_IND node with a SIMD type then we don't have a reliable + // struct handle and gtGetStructHandleIfPresent returns a guess that can be wrong + // + if ((hashDsc->csdTree->OperGet() != GT_IND) || !varTypeIsSIMD(tree)) + { + hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); + } } } @@ -527,31 +530,34 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeLast->tslNext = newElem; hashDsc->csdTreeLast = newElem; - // When we have a GT_IND node we don't have a reliable struct handle - // and gtGetStructHandleIfPresent will return a guess that can be wrong - // - if (newElem->tslTree->OperGet() != GT_IND) + if (varTypeIsStruct(newElem->tslTree->gtType)) { - CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); - if (newElemStructHnd != NO_CLASS_HANDLE) + // When we have a GT_IND node with a SIMD type then we don't have a reliable + // struct handle and gtGetStructHandleIfPresent returns a guess that can be wrong + // + if ((newElem->tslTree->OperGet() != GT_IND) || !varTypeIsSIMD(newElem->tslTree)) { - if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) - { - // The previous node(s) were GT_IND's and didn't carry the struct handle info - // The current node does have the struct handle info, so record it now - // - hashDsc->csdStructHnd = newElemStructHnd; - } - else if (newElemStructHnd != hashDsc->csdStructHnd) + CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); + if (newElemStructHnd != NO_CLASS_HANDLE) { - hashDsc->csdStructHndMismatch = true; -#ifdef DEBUG - if (verbose) + if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) { - printf("Abandoned - CSE candidate has mismatching struct handles!\n"); - printTreeID(newElem->tslTree); + // The previous node(s) were GT_IND's and didn't carry the struct handle info + // The current node does have the struct handle info, so record it now + // + hashDsc->csdStructHnd = newElemStructHnd; } + else if (newElemStructHnd != hashDsc->csdStructHnd) + { + hashDsc->csdStructHndMismatch = true; +#ifdef DEBUG + if (verbose) + { + printf("Abandoned - CSE candidate has mismatching struct handles!\n"); + printTreeID(newElem->tslTree); + } #endif // DEBUG + } } } } @@ -3026,6 +3032,7 @@ class CSE_Heuristic if (dsc->csdStructHndMismatch) { JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n", candidate.CseIndex()); + assert(!"Mismatched Struct Handle"); continue; } From 5b641a238c38849df6501e66ab0aeb6cff5ee0d8 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 14 Feb 2020 10:44:53 -0800 Subject: [PATCH 17/34] added extra value number argument VNF_SimdType for Most SIMD operations added VNF_SimdType // A value number function to compose a SIMD type added vnDumpSimdType --- src/coreclr/src/jit/valuenum.cpp | 138 +++++++++++++++++++++++++--- src/coreclr/src/jit/valuenum.h | 4 + src/coreclr/src/jit/valuenumfuncs.h | 2 + 3 files changed, 129 insertions(+), 15 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index e2be215f7e3a3..132a68e8d0c27 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5073,6 +5073,10 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) case VNF_ValWithExc: vnDumpValWithExc(comp, &funcApp); break; + case VNF_SimdType: + vnDumpSimdType(comp, &funcApp); + break; + default: printf("%s(", VNFuncName(funcApp.m_func)); for (unsigned i = 0; i < funcApp.m_arity; i++) @@ -5224,6 +5228,19 @@ void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore) comp->vnPrint(newValVN, 0); printf("]"); } + +void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType) +{ + assert(simdType->m_func == VNF_SimdType); // Preconditions. + assert(IsVNConstant(simdType->m_args[0])); + assert(IsVNConstant(simdType->m_args[1])); + + int simdSize = ConstantValue(simdType->m_args[0]); + var_types baseType = (var_types)ConstantValue(simdType->m_args[1]); + + printf("%s(simd%d, %s)", VNFuncName(simdType->m_func), simdSize, varTypeName(baseType)); +} + #endif // DEBUG // Static fields, methods. @@ -5296,17 +5313,49 @@ void ValueNumStore::InitValueNumStoreStatics() assert(vnfNum == VNF_COUNT); #define ValueNumFuncSetArity(vnfNum, arity) \ - vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); /* set the new arity value */ + vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ + vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift) /* set the new arity value */ + +#ifdef FEATURE_SIMD + // ValueNumbering encodes an extra result type argument for these + ValueNumFuncSetArity(VNF_SIMD_Init, 2); + ValueNumFuncSetArity(VNF_SIMD_GetItem, 3); + ValueNumFuncSetArity(VNF_SIMD_Add, 3); + ValueNumFuncSetArity(VNF_SIMD_Sub, 3); + ValueNumFuncSetArity(VNF_SIMD_Mul, 3); + ValueNumFuncSetArity(VNF_SIMD_Div, 3); + ValueNumFuncSetArity(VNF_SIMD_Sqrt, 2); + ValueNumFuncSetArity(VNF_SIMD_Min, 3); + ValueNumFuncSetArity(VNF_SIMD_Max, 3); + ValueNumFuncSetArity(VNF_SIMD_Abs, 2); + ValueNumFuncSetArity(VNF_SIMD_Equal, 3); + ValueNumFuncSetArity(VNF_SIMD_LessThan, 3); + ValueNumFuncSetArity(VNF_SIMD_LessThanOrEqual, 3); + ValueNumFuncSetArity(VNF_SIMD_GreaterThan, 3); + ValueNumFuncSetArity(VNF_SIMD_GreaterThanOrEqual, 3); + ValueNumFuncSetArity(VNF_SIMD_BitwiseAnd, 3); + ValueNumFuncSetArity(VNF_SIMD_BitwiseAndNot, 3); + ValueNumFuncSetArity(VNF_SIMD_BitwiseOr, 3); + ValueNumFuncSetArity(VNF_SIMD_BitwiseXor, 3); + ValueNumFuncSetArity(VNF_SIMD_DotProduct, 3); + ValueNumFuncSetArity(VNF_SIMD_Cast, 2); + ValueNumFuncSetArity(VNF_SIMD_ConvertToSingle, 2); + ValueNumFuncSetArity(VNF_SIMD_ConvertToDouble, 2); + ValueNumFuncSetArity(VNF_SIMD_ConvertToInt32, 2); + ValueNumFuncSetArity(VNF_SIMD_ConvertToInt64, 2); + ValueNumFuncSetArity(VNF_SIMD_Narrow, 3); + ValueNumFuncSetArity(VNF_SIMD_WidenHi, 2); + ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); +#endif #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. - ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2) + ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2); #endif #undef ValueNumFuncSetArity - for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) + for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) { vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp; } @@ -8342,11 +8391,13 @@ void Compiler::fgValueNumberSimd(GenTree* tree) } else // SIMD unary or binary operator. { + ValueNumPair resvnp; ValueNumPair op1vnp; ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); -#ifdef TARGET_XARCH + bool encodeResultType = false; + switch (simdNode->gtSIMDIntrinsicID) { case SIMDIntrinsicInitArray: @@ -8390,15 +8441,54 @@ void Compiler::fgValueNumberSimd(GenTree* tree) } case SIMDIntrinsicInit: + case SIMDIntrinsicGetItem: + case SIMDIntrinsicAdd: + case SIMDIntrinsicSub: + case SIMDIntrinsicMul: + case SIMDIntrinsicDiv: + case SIMDIntrinsicSqrt: + case SIMDIntrinsicMin: + case SIMDIntrinsicMax: + case SIMDIntrinsicAbs: + case SIMDIntrinsicEqual: + case SIMDIntrinsicLessThan: + case SIMDIntrinsicLessThanOrEqual: + case SIMDIntrinsicGreaterThan: + case SIMDIntrinsicGreaterThanOrEqual: + case SIMDIntrinsicBitwiseAnd: + case SIMDIntrinsicBitwiseAndNot: + case SIMDIntrinsicBitwiseOr: + case SIMDIntrinsicBitwiseXor: + case SIMDIntrinsicDotProduct: + case SIMDIntrinsicCast: + case SIMDIntrinsicConvertToSingle: + case SIMDIntrinsicConvertToDouble: + case SIMDIntrinsicConvertToInt32: + case SIMDIntrinsicConvertToInt64: + case SIMDIntrinsicNarrow: + case SIMDIntrinsicWidenHi: + case SIMDIntrinsicWidenLo: { - // Also encode the resulting type as op2vnp - ValueNumPair op2vnp; - op2vnp.SetBoth(vnStore->VNForIntCon(INT32(tree->TypeGet()))); + // Also encodes the resulting type + encodeResultType = true; - excSetPair = op1Xvnp; - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); - tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); - return; + ValueNum vnSize = vnStore->VNForIntCon(simdNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(simdNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 2); + printf("\n"); + } +#endif + + resvnp.SetBoth(simdTypeVN); + + // Handled below as a normal SIMD unary or binary node with an extra resvnp + break; } default: @@ -8406,13 +8496,22 @@ void Compiler::fgValueNumberSimd(GenTree* tree) break; } -#endif // TARGET_XARCH + VNFunc simdFunc = GetVNFuncForNode(tree); if (tree->AsOp()->gtOp2 == nullptr) { // Unary SIMD nodes have a nullptr for op2. excSetPair = op1Xvnp; - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, resvnp); + assert(vnStore->VNFuncArity(simdFunc) == 2); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp); + assert(vnStore->VNFuncArity(simdFunc) == 1); + } } else { @@ -8421,7 +8520,16 @@ void Compiler::fgValueNumberSimd(GenTree* tree) vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree), op1vnp, op2vnp); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, op2vnp, resvnp); + assert(vnStore->VNFuncArity(simdFunc) == 3); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, op2vnp); + assert(vnStore->VNFuncArity(simdFunc) == 2); + } } } tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index 2b0d239719b74..9b194ae209ab8 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -879,6 +879,10 @@ class ValueNumStore // Prints a representation of the set of exceptions on standard out. void vnDumpExcSeq(Compiler* comp, VNFuncApp* excSeq, bool isHead); + // Requires "simdType" to be a VNF_SimdType VNFuncApp. + // Prints a representation (comma-separated list of field names) on standard out. + void vnDumpSimdType(Compiler* comp, VNFuncApp* simdType); + // Returns the string name of "vnf". static const char* VNFuncName(VNFunc vnf); // Used in the implementation of the above. diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index f72181a5b211a..64c41dc9d243c 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -142,6 +142,8 @@ ValueNumFuncDef(BoxNullable, 3, false, false, false) ValueNumFuncDef(StrCns, 2, false, true, false) ValueNumFuncDef(Unbox, 2, false, true, false) +ValueNumFuncDef(SimdType, 2, false, false, false) // A value number function to compose a SIMD type + ValueNumFuncDef(LT_UN, 2, false, false, false) // unsigned or unordered comparisons ValueNumFuncDef(LE_UN, 2, false, false, false) ValueNumFuncDef(GE_UN, 2, false, false, false) From a8ef3194d712878ca033bfcd34676c8e374697f9 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 14 Feb 2020 13:39:37 -0800 Subject: [PATCH 18/34] fix GenTreeSIMD::OperIsMemoryLoad for ARM64 Removed ismatched Struct Handle assert --- src/coreclr/src/jit/gentree.cpp | 2 -- src/coreclr/src/jit/optcse.cpp | 1 - 2 files changed, 3 deletions(-) diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 7b1a66ba678ee..5a8f56748bf2f 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -18127,12 +18127,10 @@ bool GenTree::isCommutativeSIMDIntrinsic() // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeSIMD::OperIsMemoryLoad() const { -#ifdef TARGET_XARCH if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) { return true; } -#endif // TARGET_XARCH return false; } #endif // FEATURE_SIMD diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 9c5510b8e2472..6cecea4502fc5 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -3032,7 +3032,6 @@ class CSE_Heuristic if (dsc->csdStructHndMismatch) { JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n", candidate.CseIndex()); - assert(!"Mismatched Struct Handle"); continue; } From 305a01780adbb5bd42c8024b4f7db2d6e0f197b7 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 14 Feb 2020 15:19:59 -0800 Subject: [PATCH 19/34] Fix the printing of BitSets on Linux, change the printf format specifier --- src/coreclr/src/jit/bitsetasshortlong.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/bitsetasshortlong.h b/src/coreclr/src/jit/bitsetasshortlong.h index c22eefcb6f0df..27a8c1d7349a5 100644 --- a/src/coreclr/src/jit/bitsetasshortlong.h +++ b/src/coreclr/src/jit/bitsetasshortlong.h @@ -412,12 +412,12 @@ class BitSetOps Date: Wed, 19 Feb 2020 18:41:23 -0800 Subject: [PATCH 20/34] Added bool methods vnEncodesResultTypeForSIMDIntrinsic and vnEncodesResultTypeForHWIntrinsic these return true when a SIMD or HW Instrinsic will use an extra arg to record the result type during value numbering Changes the ValueNumFuncDef to set the arity to zero when a -1 value is passed in Updated InitValueNumStoreStatics to fixup the arity of SIMD or HW Instrinsic that have an extra arg to record the result type Allow a type mismatchj when we have a GT_BLK as the lhs of an assignment, as it is used to zero out Simd structs --- src/coreclr/src/jit/compiler.h | 7 + src/coreclr/src/jit/hwintrinsic.cpp | 27 +++ src/coreclr/src/jit/simd.cpp | 40 +++++ src/coreclr/src/jit/valuenum.cpp | 267 ++++++++++++++-------------- src/coreclr/src/jit/valuenumfuncs.h | 2 +- 5 files changed, 209 insertions(+), 134 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 6ad2f38439773..f59a8a9ff2793 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -8178,6 +8178,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif // !FEATURE_SIMD } +#ifdef FEATURE_SIMD + static bool vnEncodesResultTypeForSIMDIntrinsic(SIMDIntrinsicID intrinsicId); +#endif // !FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + static bool vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID); +#endif // FEATURE_HW_INTRINSICS + private: // These routines need not be enclosed under FEATURE_SIMD since lvIsSIMDType() // is defined for both FEATURE_SIMD and !FEATURE_SIMD apropriately. The use diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 716d4904046d6..554a01d05a22e 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -173,6 +173,33 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va return NO_CLASS_HANDLE; } +#ifdef FEATURE_HW_INTRINSICS +/* static */ bool Compiler::vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID) +{ + int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); + + // Currently we only record sn extra Result Type arg when we have a unary or binary HW Intrinsic node + // + if ((numArgs < 1) || (numArgs > 2)) + { + return false; + } + + // We iterate over all of the instructions in the HWIntrinsicInfo table + // if we find more than one then we return true, otherwise false + // + unsigned insCount = 0; + for (var_types baseType = TYP_BYTE; (baseType <= TYP_DOUBLE); baseType = (var_types)(baseType + 1)) + { + if (HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType) != INS_invalid) + { + insCount++; + } + } + return (insCount > 1); +} +#endif // FEATURE_HW_INTRINSICS + //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 1d7b9bf7443a9..95f6739154feb 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1051,6 +1051,46 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in return nullptr; } +/* static */ bool Compiler::vnEncodesResultTypeForSIMDIntrinsic(SIMDIntrinsicID intrinsicId) +{ + switch (intrinsicId) + { + case SIMDIntrinsicInit: + case SIMDIntrinsicGetItem: + case SIMDIntrinsicAdd: + case SIMDIntrinsicSub: + case SIMDIntrinsicMul: + case SIMDIntrinsicDiv: + case SIMDIntrinsicSqrt: + case SIMDIntrinsicMin: + case SIMDIntrinsicMax: + case SIMDIntrinsicAbs: + case SIMDIntrinsicEqual: + case SIMDIntrinsicLessThan: + case SIMDIntrinsicLessThanOrEqual: + case SIMDIntrinsicGreaterThan: + case SIMDIntrinsicGreaterThanOrEqual: + case SIMDIntrinsicBitwiseAnd: + case SIMDIntrinsicBitwiseAndNot: + case SIMDIntrinsicBitwiseOr: + case SIMDIntrinsicBitwiseXor: + case SIMDIntrinsicDotProduct: + case SIMDIntrinsicCast: + case SIMDIntrinsicConvertToSingle: + case SIMDIntrinsicConvertToDouble: + case SIMDIntrinsicConvertToInt32: + case SIMDIntrinsicConvertToInt64: + case SIMDIntrinsicNarrow: + case SIMDIntrinsicWidenHi: + case SIMDIntrinsicWidenLo: + return true; + + default: + break; + } + return false; +} + // Pops and returns GenTree node from importer's type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. // diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 132a68e8d0c27..bd19bca7aab24 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5304,7 +5304,8 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ if (sharedStatic) \ vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ + if (arity > 0) \ + vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ vnfNum++; #include "valuenumfuncs.h" @@ -5316,43 +5317,49 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift) /* set the new arity value */ -#ifdef FEATURE_SIMD - // ValueNumbering encodes an extra result type argument for these + for (SIMDIntrinsicID id = SIMDIntrinsicID::SIMDIntrinsicNone; (id < SIMDIntrinsicID::SIMDIntrinsicInvalid); + id = (SIMDIntrinsicID)(id + 1)) + { + bool encodeResultType = Compiler::vnEncodesResultTypeForSIMDIntrinsic(id); + + if (encodeResultType) + { + // These SIMDIntrinsic's have an extra VNF_SimdType arg. + // + VNFunc func = VNFunc(VNF_SIMD_FIRST + id); + unsigned oldArity = VNFuncArity(func); + unsigned newArity = oldArity + 1; + + ValueNumFuncSetArity(func, newArity); + } + } + // SIMDIntrinsicInit has an incorrect entry of 2 for numArgs and also vnEncodesResultTypeForSIMDIntrinsic returns + // true + // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_Init, 2); - ValueNumFuncSetArity(VNF_SIMD_GetItem, 3); - ValueNumFuncSetArity(VNF_SIMD_Add, 3); - ValueNumFuncSetArity(VNF_SIMD_Sub, 3); - ValueNumFuncSetArity(VNF_SIMD_Mul, 3); - ValueNumFuncSetArity(VNF_SIMD_Div, 3); - ValueNumFuncSetArity(VNF_SIMD_Sqrt, 2); - ValueNumFuncSetArity(VNF_SIMD_Min, 3); - ValueNumFuncSetArity(VNF_SIMD_Max, 3); - ValueNumFuncSetArity(VNF_SIMD_Abs, 2); - ValueNumFuncSetArity(VNF_SIMD_Equal, 3); - ValueNumFuncSetArity(VNF_SIMD_LessThan, 3); - ValueNumFuncSetArity(VNF_SIMD_LessThanOrEqual, 3); - ValueNumFuncSetArity(VNF_SIMD_GreaterThan, 3); - ValueNumFuncSetArity(VNF_SIMD_GreaterThanOrEqual, 3); - ValueNumFuncSetArity(VNF_SIMD_BitwiseAnd, 3); - ValueNumFuncSetArity(VNF_SIMD_BitwiseAndNot, 3); - ValueNumFuncSetArity(VNF_SIMD_BitwiseOr, 3); - ValueNumFuncSetArity(VNF_SIMD_BitwiseXor, 3); - ValueNumFuncSetArity(VNF_SIMD_DotProduct, 3); - ValueNumFuncSetArity(VNF_SIMD_Cast, 2); - ValueNumFuncSetArity(VNF_SIMD_ConvertToSingle, 2); - ValueNumFuncSetArity(VNF_SIMD_ConvertToDouble, 2); - ValueNumFuncSetArity(VNF_SIMD_ConvertToInt32, 2); - ValueNumFuncSetArity(VNF_SIMD_ConvertToInt64, 2); - ValueNumFuncSetArity(VNF_SIMD_Narrow, 3); - ValueNumFuncSetArity(VNF_SIMD_WidenHi, 2); - ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); -#endif #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2); #endif + for (NamedIntrinsic id = (NamedIntrinsic)(NI_HW_INTRINSIC_START + 1); (id < NI_HW_INTRINSIC_END); + id = (NamedIntrinsic)(id + 1)) + { + bool encodeResultType = Compiler::vnEncodesResultTypeForHWIntrinsic(id); + + if (encodeResultType) + { + // These HW_Intrinsic's have an extra VNF_SimdType arg. + // + VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1)); + unsigned oldArity = VNFuncArity(func); + unsigned newArity = oldArity + 1; + + ValueNumFuncSetArity(func, newArity); + } + } + #undef ValueNumFuncSetArity for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) @@ -7103,7 +7110,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair rhsVNPair = rhs->gtVNPair; // Is the type being stored different from the type computed by the rhs? - if (rhs->TypeGet() != lhs->TypeGet()) + if ((rhs->TypeGet() != lhs->TypeGet()) && (lhs->OperGet() != GT_BLK)) { // This means that there is an implicit cast on the rhs value // @@ -7300,11 +7307,13 @@ void Compiler::fgValueNumberTree(GenTree* tree) noway_assert(!"Phi arg cannot be LHS."); break; - case GT_BLK: case GT_OBJ: - noway_assert(!"GT_BLK/GT_OBJ can not be LHS when !varTypeIsStruct(tree) is true!"); + noway_assert(!"GT_OBJ can not be LHS when (tree->TypeGet() != TYP_STRUCT)!"); break; + case GT_BLK: + __fallthrough; + case GT_IND: { bool isVolatile = (lhs->gtFlags & GTF_IND_VOLATILE) != 0; @@ -8391,109 +8400,68 @@ void Compiler::fgValueNumberSimd(GenTree* tree) } else // SIMD unary or binary operator. { - ValueNumPair resvnp; + ValueNumPair resvnp = ValueNumPair(); ValueNumPair op1vnp; ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); - bool encodeResultType = false; - - switch (simdNode->gtSIMDIntrinsicID) + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) { - case SIMDIntrinsicInitArray: - { - // rationalize rewrites this as an explicit load with op1 as the base address + // rationalize rewrites this as an explicit load with op1 as the base address - ValueNumPair op2vnp; - if (tree->AsOp()->gtOp2 == nullptr) - { - // a nullptr for op2 means that we have an impicit index of zero - op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); + ValueNumPair op2vnp; + if (tree->AsOp()->gtOp2 == nullptr) + { + // a nullptr for op2 means that we have an impicit index of zero + op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); - excSetPair = op1Xvnp; - } - else // We have an explicit index in op2 - { - ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + excSetPair = op1Xvnp; + } + else // We have an explicit index in op2 + { + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); - excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - } + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + } - ValueNum addrVN = - vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); + ValueNum addrVN = + vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); #ifdef DEBUG - if (verbose) - { - printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); - vnPrint(addrVN, 0); - } + if (verbose) + { + printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); + vnPrint(addrVN, 0); + } #endif // DEBUG - // The address points into the heap, so it is an ByrefExposed load. - // - ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); - tree->gtVNPair.SetLiberal(loadVN); - tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); - tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); - fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); - return; - } - - case SIMDIntrinsicInit: - case SIMDIntrinsicGetItem: - case SIMDIntrinsicAdd: - case SIMDIntrinsicSub: - case SIMDIntrinsicMul: - case SIMDIntrinsicDiv: - case SIMDIntrinsicSqrt: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: - case SIMDIntrinsicAbs: - case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicGreaterThanOrEqual: - case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: - case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicDotProduct: - case SIMDIntrinsicCast: - case SIMDIntrinsicConvertToSingle: - case SIMDIntrinsicConvertToDouble: - case SIMDIntrinsicConvertToInt32: - case SIMDIntrinsicConvertToInt64: - case SIMDIntrinsicNarrow: - case SIMDIntrinsicWidenHi: - case SIMDIntrinsicWidenLo: - { - // Also encodes the resulting type - encodeResultType = true; - - ValueNum vnSize = vnStore->VNForIntCon(simdNode->gtSIMDSize); - ValueNum vnBaseType = vnStore->VNForIntCon(INT32(simdNode->gtSIMDBaseType)); - ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + // The address points into the heap, so it is an ByrefExposed load. + // + ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); + tree->gtVNPair.SetLiberal(loadVN); + tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); + fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + return; + } -#ifdef DEBUG - if (verbose) - { - printf(" simdTypeVN is "); - vnPrint(simdTypeVN, 2); - printf("\n"); - } -#endif + bool encodeResultType = vnEncodesResultTypeForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID); - resvnp.SetBoth(simdTypeVN); + if (encodeResultType) + { + ValueNum vnSize = vnStore->VNForIntCon(simdNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(simdNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + resvnp.SetBoth(simdTypeVN); - // Handled below as a normal SIMD unary or binary node with an extra resvnp - break; +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 2); + printf("\n"); } - - default: - // Handled below as a normal SIMD unary or binary node - break; +#endif } VNFunc simdFunc = GetVNFuncForNode(tree); @@ -8555,21 +8523,21 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) VNFunc func = GetVNFuncForNode(tree); unsigned fixedArity = vnStore->VNFuncArity(func); // - // fixedArity is the numArgs column in "hwinstrinsiclistxarch.h" - // a -1 value is translated into an unsigned value of 7 + // when the numArgs column in "hwinstrinsiclistxarch.h" is -1 + // it gets translated into a value of 0 - ValueNumPair excSetPair; + ValueNumPair excSetPair = ValueNumStore::VNPForEmptyExcSet(); ValueNumPair normalPair; // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero if (tree->AsOp()->gtOp1 == nullptr) { assert(fixedArity == 0); - excSetPair = ValueNumStore::VNPForEmptyExcSet(); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func); assert(lookupNumArgs == 0); } - else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (fixedArity == 7)) + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (fixedArity == 0)) { // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args // Or the numArgs was specified as -1 in the numArgs column in "hwinstrinsiclistxarch.h" @@ -8581,28 +8549,61 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) } else // HWINTRINSIC unary or binary operator. { + ValueNumPair resvnp = ValueNumPair(); + bool encodeResultType = vnEncodesResultTypeForHWIntrinsic(hwIntrinsicNode->gtHWIntrinsicId); + + if (encodeResultType) + { + ValueNum vnSize = vnStore->VNForIntCon(hwIntrinsicNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(hwIntrinsicNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + resvnp.SetBoth(simdTypeVN); + +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 2); + printf("\n"); + } +#endif + } ValueNumPair op1vnp; ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); if (tree->AsOp()->gtOp2 == nullptr) { - assert(fixedArity == 1); - // Unary SIMD nodes have a nullptr for op2. excSetPair = op1Xvnp; - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp); - assert(lookupNumArgs == 1); + + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, resvnp); + assert(vnStore->VNFuncArity(func) == 2); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp); + assert(vnStore->VNFuncArity(func) == 1); + } } else { - assert(fixedArity == 2); ValueNumPair op2vnp; ValueNumPair op2Xvnp; vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp); - assert(lookupNumArgs == 2); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp, resvnp); + assert(vnStore->VNFuncArity(func) == 3); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp); + assert(vnStore->VNFuncArity(func) == 2); + } } } tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index 64c41dc9d243c..00040c75ef02d 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -172,7 +172,7 @@ ValueNumFuncDef(HWI_##id, argCount, false, false, false) // All of the HARDWAR #elif defined (TARGET_ARM64) #define HARDWARE_INTRINSIC(isa, name, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ -ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of the HARDWARE_INTRINSICS form arm64 +ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for arm64 #include "hwintrinsiclistarm64.h" #define VNF_HWI_FIRST VNF_HWI_Vector64_AsByte From 3c641ab4179f61fe9651871f2fb25b19d5df2f2f Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 20 Feb 2020 11:53:05 -0800 Subject: [PATCH 21/34] Fix for SIMD_WidenLo arg count --- src/coreclr/src/jit/valuenum.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index bd19bca7aab24..40f1e7df19841 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5333,10 +5333,13 @@ void ValueNumStore::InitValueNumStoreStatics() ValueNumFuncSetArity(func, newArity); } } - // SIMDIntrinsicInit has an incorrect entry of 2 for numArgs and also vnEncodesResultTypeForSIMDIntrinsic returns - // true + // SIMDIntrinsicInit has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns true // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_Init, 2); + // SIMDIntrinsicWidenLo has an incorrect entry of 2 for numArgsm also vnEncodesResultTypeForSIMDIntrinsic returns + // true + // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. + ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. From c1aa65a2f4de9f4c83b0a89d59b80fbafb074fbb Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 20 Feb 2020 11:53:38 -0800 Subject: [PATCH 22/34] fix typo --- src/coreclr/src/jit/valuenum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 40f1e7df19841..37d361bcc6abd 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5336,7 +5336,7 @@ void ValueNumStore::InitValueNumStoreStatics() // SIMDIntrinsicInit has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns true // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_Init, 2); - // SIMDIntrinsicWidenLo has an incorrect entry of 2 for numArgsm also vnEncodesResultTypeForSIMDIntrinsic returns + // SIMDIntrinsicWidenLo has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns // true // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); From 55228a6ee3c7cb717208111f3fc4cfc15749b389 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 20 Feb 2020 12:24:46 -0800 Subject: [PATCH 23/34] Fix x86 build breaks Fix SIMD_WidenHi --- src/coreclr/src/jit/valuenum.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 37d361bcc6abd..5648bee7b49fc 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5317,6 +5317,7 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift) /* set the new arity value */ +#ifdef FEATURE_SIMD for (SIMDIntrinsicID id = SIMDIntrinsicID::SIMDIntrinsicNone; (id < SIMDIntrinsicID::SIMDIntrinsicInvalid); id = (SIMDIntrinsicID)(id + 1)) { @@ -5336,15 +5337,20 @@ void ValueNumStore::InitValueNumStoreStatics() // SIMDIntrinsicInit has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns true // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_Init, 2); + // SIMDIntrinsicWidenHi has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns + // true, so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. + ValueNumFuncSetArity(VNF_SIMD_WidenHi, 2); // SIMDIntrinsicWidenLo has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns - // true - // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. + // true, so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); -#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +#ifdef TARGET_XARCH // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2); -#endif +#endif // TARGET_XARCH for (NamedIntrinsic id = (NamedIntrinsic)(NI_HW_INTRINSIC_START + 1); (id < NI_HW_INTRINSIC_END); id = (NamedIntrinsic)(id + 1)) @@ -5362,6 +5368,7 @@ void ValueNumStore::InitValueNumStoreStatics() ValueNumFuncSetArity(func, newArity); } } +#endif // FEATURE_HW_INTRINSICS #undef ValueNumFuncSetArity From 2bbd9821d27a2bf6976f261cf4a65065f39c46c1 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 20 Feb 2020 17:33:09 -0800 Subject: [PATCH 24/34] Added method header comment for vnEncodesResultTypeForHWIntrinsic Added & VNFOA_ArityMask when assigning to vnfOpAttribs[] --- src/coreclr/src/jit/hwintrinsic.cpp | 55 ++++++++++++++++++++++++----- src/coreclr/src/jit/valuenum.cpp | 29 ++++++--------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 554a01d05a22e..35c5b5cd281b1 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -174,29 +174,66 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va } #ifdef FEATURE_HW_INTRINSICS +//------------------------------------------------------------------------ +// vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID): +// +// Arguments: +// hwIntrinsicID -- The id for the HW intrinsic +// +// Return Value: +// Returns true if this intrinsic requires value numbering to add an +// extra SimdType argument that encode the resuklting type +// If we don't do this overloaded versions can return the same VN +// leading to incorrect CSE subsitutions. +// /* static */ bool Compiler::vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID) { + // Currently only use the extra VNF_SimdType arg when we have a unary or + // binary HW Intrinsic node. + int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); - // Currently we only record sn extra Result Type arg when we have a unary or binary HW Intrinsic node - // - if ((numArgs < 1) || (numArgs > 2)) + // HW Instrinsic with -1 for numArgs have a varying number of args + // so we value number them, or add an extra argument. + if (numArgs == -1) { return false; } - // We iterate over all of the instructions in the HWIntrinsicInfo table - // if we find more than one then we return true, otherwise false + // We iterate over all of the different baseType's for this instrinsic in the HWIntrinsicInfo table + // insCount is set to 2 if we see two of more different instructions in the table. + // insCount is set to 1 if we see only one kind of instruction in the table. + // insCount is set to 0 if there are only invalid instructions in the table. // - unsigned insCount = 0; + unsigned insCount = 0; + instruction lastIns = INS_invalid; for (var_types baseType = TYP_BYTE; (baseType <= TYP_DOUBLE); baseType = (var_types)(baseType + 1)) { - if (HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType) != INS_invalid) + instruction curIns = HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType); + if (curIns != INS_invalid) { - insCount++; + if (curIns != lastIns) + { + insCount++; + if (insCount >= 2) + { + // We can early exit the loop now + break; + } + // remember the last valid instruction that we saw + lastIns = curIns; + } } } - return (insCount > 1); +#ifdef TARGET_XARCH + // If we see two (or more) different instructions we need the extra + return (insCount >= 2); +#elif defined(TARGET_ARM64) + // On ARM64 we use the same instruction and specify an insOpt arrangement + // so we return true even when we just see one instruction. + // + return (insCount >= 1); +#endif } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 5648bee7b49fc..cbad6a5710a5a 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5285,7 +5285,7 @@ void ValueNumStore::InitValueNumStoreStatics() { arity = 2; } - vnfOpAttribs[i] |= (arity << VNFOA_ArityShift); + vnfOpAttribs[i] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); if (GenTree::OperIsCommutative(gtOper)) { @@ -5305,7 +5305,7 @@ void ValueNumStore::InitValueNumStoreStatics() if (sharedStatic) \ vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ if (arity > 0) \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ + vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); \ vnfNum++; #include "valuenumfuncs.h" @@ -5314,8 +5314,8 @@ void ValueNumStore::InitValueNumStoreStatics() assert(vnfNum == VNF_COUNT); #define ValueNumFuncSetArity(vnfNum, arity) \ - vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift) /* set the new arity value */ + vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ + vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask) /* set the new arity value */ #ifdef FEATURE_SIMD for (SIMDIntrinsicID id = SIMDIntrinsicID::SIMDIntrinsicNone; (id < SIMDIntrinsicID::SIMDIntrinsicInvalid); @@ -5334,23 +5334,19 @@ void ValueNumStore::InitValueNumStoreStatics() ValueNumFuncSetArity(func, newArity); } } - // SIMDIntrinsicInit has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns true + // SIMDIntrinsicInit has an entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns true // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_Init, 2); - // SIMDIntrinsicWidenHi has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns + // SIMDIntrinsicWidenHi has an entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns // true, so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_WidenHi, 2); - // SIMDIntrinsicWidenLo has an incorrect entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns + // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns // true, so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS -#ifdef TARGET_XARCH - // SSE2_Shuffle has a -1 entry for numArgs, but when used as a HWINSTRINSIC always has two operands. - ValueNumFuncSetArity(VNF_HWI_SSE2_Shuffle, 2); -#endif // TARGET_XARCH for (NamedIntrinsic id = (NamedIntrinsic)(NI_HW_INTRINSIC_START + 1); (id < NI_HW_INTRINSIC_END); id = (NamedIntrinsic)(id + 1)) @@ -8468,7 +8464,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) if (verbose) { printf(" simdTypeVN is "); - vnPrint(simdTypeVN, 2); + vnPrint(simdTypeVN, 1); printf("\n"); } #endif @@ -8529,12 +8525,9 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); } - int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode); + int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode->gtHWIntrinsicId); VNFunc func = GetVNFuncForNode(tree); unsigned fixedArity = vnStore->VNFuncArity(func); - // - // when the numArgs column in "hwinstrinsiclistxarch.h" is -1 - // it gets translated into a value of 0 ValueNumPair excSetPair = ValueNumStore::VNPForEmptyExcSet(); ValueNumPair normalPair; @@ -8547,7 +8540,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func); assert(lookupNumArgs == 0); } - else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (fixedArity == 0)) + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (lookupNumArgs == -1)) { // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args // Or the numArgs was specified as -1 in the numArgs column in "hwinstrinsiclistxarch.h" @@ -8573,7 +8566,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) if (verbose) { printf(" simdTypeVN is "); - vnPrint(simdTypeVN, 2); + vnPrint(simdTypeVN, 1); printf("\n"); } #endif From a0e6a8c41ff93fa98537ec93a7756008e5229f77 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 21 Feb 2020 10:31:30 -0800 Subject: [PATCH 25/34] Codereview feedback and some more comments --- src/coreclr/src/jit/compiler.h | 3 ++ src/coreclr/src/jit/hwintrinsic.cpp | 51 ++++++++++++++--------------- src/coreclr/src/jit/valuenum.cpp | 4 +++ src/coreclr/src/jit/valuenum.h | 2 ++ src/coreclr/src/jit/valuenumfuncs.h | 8 +++-- 5 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index f59a8a9ff2793..02f4c7c70f0c4 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -6176,6 +6176,9 @@ class Compiler treeStmtLst* csdTreeList; // list of matching tree nodes: head treeStmtLst* csdTreeLast; // list of matching tree nodes: tail + // ToDo: This can be removed when gtGetStructHandleIfPresent stops guessing + // and GT_IND nodes always have valid struct handle. + // CORINFO_CLASS_HANDLE csdStructHnd; // The class handle, currently needed to create a SIMD LclVar in PerformCSE bool csdStructHndMismatch; diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 35c5b5cd281b1..131aed8316656 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -182,58 +182,57 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va // // Return Value: // Returns true if this intrinsic requires value numbering to add an -// extra SimdType argument that encode the resuklting type +// extra SimdType argument that encodes the resulting type. // If we don't do this overloaded versions can return the same VN // leading to incorrect CSE subsitutions. // /* static */ bool Compiler::vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID) { - // Currently only use the extra VNF_SimdType arg when we have a unary or - // binary HW Intrinsic node. - int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); - // HW Instrinsic with -1 for numArgs have a varying number of args - // so we value number them, or add an extra argument. + // HW Instrinsic's with -1 for numArgs have a varying number of args, so we currently + // give themm a unque value number them, and don't add an extra argument. + // if (numArgs == -1) { return false; } // We iterate over all of the different baseType's for this instrinsic in the HWIntrinsicInfo table - // insCount is set to 2 if we see two of more different instructions in the table. - // insCount is set to 1 if we see only one kind of instruction in the table. - // insCount is set to 0 if there are only invalid instructions in the table. + // We set diffInsCount to the number of instructions that can execute differently. // - unsigned insCount = 0; - instruction lastIns = INS_invalid; + unsigned diffInsCount = 0; +#ifdef TARGET_XARCH + instruction lastIns = INS_invalid; +#endif for (var_types baseType = TYP_BYTE; (baseType <= TYP_DOUBLE); baseType = (var_types)(baseType + 1)) { instruction curIns = HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType); if (curIns != INS_invalid) { +#ifdef TARGET_XARCH if (curIns != lastIns) { - insCount++; - if (insCount >= 2) - { - // We can early exit the loop now - break; - } + diffInsCount++; // remember the last valid instruction that we saw lastIns = curIns; } +#elif defined(TARGET_ARM64) + // On ARM64 we use the same instruction and specify an insOpt arrangement + // so we always consider the instruction operation to be different + // + diffInsCount++; +#endif // TARGET + if (diffInsCount >= 2) + { + // We can early exit the loop now + break; + } } } -#ifdef TARGET_XARCH - // If we see two (or more) different instructions we need the extra - return (insCount >= 2); -#elif defined(TARGET_ARM64) - // On ARM64 we use the same instruction and specify an insOpt arrangement - // so we return true even when we just see one instruction. - // - return (insCount >= 1); -#endif + + // If we see two (or more) different instructions we need the extra VNF_SimdType arg + return (diffInsCount >= 2); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index cbad6a5710a5a..94f3ea4d44f9f 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5073,9 +5073,11 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) case VNF_ValWithExc: vnDumpValWithExc(comp, &funcApp); break; +#ifdef FEATURE_SIMD case VNF_SimdType: vnDumpSimdType(comp, &funcApp); break; +#endif // FEATURE_SIMD default: printf("%s(", VNFuncName(funcApp.m_func)); @@ -5229,6 +5231,7 @@ void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore) printf("]"); } +#ifdef FEATURE_SIMD void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType) { assert(simdType->m_func == VNF_SimdType); // Preconditions. @@ -5240,6 +5243,7 @@ void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType) printf("%s(simd%d, %s)", VNFuncName(simdType->m_func), simdSize, varTypeName(baseType)); } +#endif // FEATURE_SIMD #endif // DEBUG diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index 9b194ae209ab8..d4167a00c0747 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -879,9 +879,11 @@ class ValueNumStore // Prints a representation of the set of exceptions on standard out. void vnDumpExcSeq(Compiler* comp, VNFuncApp* excSeq, bool isHead); +#ifdef FEATURE_SIMD // Requires "simdType" to be a VNF_SimdType VNFuncApp. // Prints a representation (comma-separated list of field names) on standard out. void vnDumpSimdType(Compiler* comp, VNFuncApp* simdType); +#endif // FEATURE_SIMD // Returns the string name of "vnf". static const char* VNFuncName(VNFunc vnf); diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index 00040c75ef02d..00fa69083a89c 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -142,14 +142,12 @@ ValueNumFuncDef(BoxNullable, 3, false, false, false) ValueNumFuncDef(StrCns, 2, false, true, false) ValueNumFuncDef(Unbox, 2, false, true, false) -ValueNumFuncDef(SimdType, 2, false, false, false) // A value number function to compose a SIMD type - ValueNumFuncDef(LT_UN, 2, false, false, false) // unsigned or unordered comparisons ValueNumFuncDef(LE_UN, 2, false, false, false) ValueNumFuncDef(GE_UN, 2, false, false, false) ValueNumFuncDef(GT_UN, 2, false, false, false) -// currently we won't constant fold the next six +// currently we don't constant fold the next six ValueNumFuncDef(ADD_OVF, 2, true, false, false) // overflow checking operations ValueNumFuncDef(SUB_OVF, 2, false, false, false) @@ -159,6 +157,10 @@ ValueNumFuncDef(ADD_UN_OVF, 2, true, false, false) // unsigned overflow checkin ValueNumFuncDef(SUB_UN_OVF, 2, false, false, false) ValueNumFuncDef(MUL_UN_OVF, 2, true, false, false) +#ifdef FEATURE_SIMD +ValueNumFuncDef(SimdType, 2, false, false, false) // A value number function to compose a SIMD type +#endif + #define SIMD_INTRINSIC(m, i, id, n, r, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \ ValueNumFuncDef(SIMD_##id, argCount, false, false, false) // All of the SIMD intrinsic (Consider isCommutativeSIMDIntrinsic) #include "simdintrinsiclist.h" From cbe5538352ee8269b15bdf24af436bb6857974b3 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 21 Feb 2020 10:33:47 -0800 Subject: [PATCH 26/34] fix typo --- src/coreclr/src/jit/hwintrinsic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 131aed8316656..e85c93589150a 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -191,7 +191,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); // HW Instrinsic's with -1 for numArgs have a varying number of args, so we currently - // give themm a unque value number them, and don't add an extra argument. + // give themm a unique value number them, and don't add an extra argument. // if (numArgs == -1) { From 3e927bf6fb401aa659b7a50b1ef426dcd4ef9f9f Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 21 Feb 2020 13:55:27 -0800 Subject: [PATCH 27/34] Moved the code that sets the arg count for the three SIMD intrinsics --- src/coreclr/src/jit/valuenum.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 94f3ea4d44f9f..214743f2c6c93 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5322,6 +5322,16 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask) /* set the new arity value */ #ifdef FEATURE_SIMD + + // SIMDIntrinsicInit has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_Init, 1); + // SIMDIntrinsicWidenHi has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_WidenHi, 1); + // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_WidenLo, 1); + + // Some SIMD intrinsic nodes have an extra VNF_SimdType arg + // for (SIMDIntrinsicID id = SIMDIntrinsicID::SIMDIntrinsicNone; (id < SIMDIntrinsicID::SIMDIntrinsicInvalid); id = (SIMDIntrinsicID)(id + 1)) { @@ -5338,15 +5348,6 @@ void ValueNumStore::InitValueNumStoreStatics() ValueNumFuncSetArity(func, newArity); } } - // SIMDIntrinsicInit has an entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns true - // so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. - ValueNumFuncSetArity(VNF_SIMD_Init, 2); - // SIMDIntrinsicWidenHi has an entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns - // true, so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. - ValueNumFuncSetArity(VNF_SIMD_WidenHi, 2); - // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, also vnEncodesResultTypeForSIMDIntrinsic returns - // true, so we have to fix the Arity here, so that it has one normal arg and one VNF_SimdType arg. - ValueNumFuncSetArity(VNF_SIMD_WidenLo, 2); #endif // FEATURE_SIMD @@ -5368,6 +5369,7 @@ void ValueNumStore::InitValueNumStoreStatics() ValueNumFuncSetArity(func, newArity); } } + #endif // FEATURE_HW_INTRINSICS #undef ValueNumFuncSetArity From b2b89865d2464a8edd90a34e9f234aca38045638 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Fri, 21 Feb 2020 17:07:50 -0800 Subject: [PATCH 28/34] clang-format --- src/coreclr/src/jit/valuenum.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 214743f2c6c93..22f3643ca3589 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -5323,11 +5323,11 @@ void ValueNumStore::InitValueNumStoreStatics() #ifdef FEATURE_SIMD - // SIMDIntrinsicInit has an entry of 2 for numArgs, but it only has one normal arg + // SIMDIntrinsicInit has an entry of 2 for numArgs, but it only has one normal arg ValueNumFuncSetArity(VNF_SIMD_Init, 1); // SIMDIntrinsicWidenHi has an entry of 2 for numArgs, but it only has one normal arg ValueNumFuncSetArity(VNF_SIMD_WidenHi, 1); - // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, but it only has one normal arg + // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, but it only has one normal arg ValueNumFuncSetArity(VNF_SIMD_WidenLo, 1); // Some SIMD intrinsic nodes have an extra VNF_SimdType arg From 7bd9303d9d7f4a5791e539b20f87cb222c75fa7d Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Mon, 24 Feb 2020 17:13:09 -0800 Subject: [PATCH 29/34] Adjust CSE for SIMD types that are live across a call --- src/coreclr/src/jit/optcse.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 6cecea4502fc5..1428b1058e095 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -2461,6 +2461,28 @@ class CSE_Heuristic extra_yes_cost *= 2; // full cost if we are being Conservative } } + +#ifdef FEATURE_SIMD + // SIMD types will cause a SIMD register to be spilled/restored + // + if (varTypeIsSIMD(candidate->Expr()->TypeGet())) + { + int spillSimdRegInProlog = 1; + + // If we have a SIMD32 that is live across a call we have even higher spill costs + // + if (candidate->Expr()->TypeGet() == TYP_SIMD32) + { + spillSimdRegInProlog++; // we likely need to spill an extra register to save the upper half + + // Increase the cse_use_cost since at use sites we have to generate code to spill/restore + // the upper half of the YMM register + cse_use_cost += 2; + } + + extra_yes_cost = (BB_UNITY_WEIGHT * spillSimdRegInProlog) * 3; + } +#endif // FEATURE_SIMD } // estimate the cost from lost codesize reduction if we do not perform the CSE From 169c24eeafb072a79ffbdb2d1d36960ebc0c17b4 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Mon, 24 Feb 2020 17:20:26 -0800 Subject: [PATCH 30/34] Proposed fix for #32085 --- src/coreclr/src/jit/emitxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index 22b800ecee4fa..b175b1415dfa6 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -2835,7 +2835,7 @@ void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, // Either not generating relocatable code, or addr must be an icon handle, or the // constant is zero (which we won't generate a relocation for). - assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0)); + assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0) || memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) { From 8d0ce825318032656d993a6b517a02f8cef0e16b Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 25 Feb 2020 11:10:33 -0800 Subject: [PATCH 31/34] Revert "Proposed fix for #32085" This reverts commit 169c24eeafb072a79ffbdb2d1d36960ebc0c17b4. --- src/coreclr/src/jit/emitxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index b175b1415dfa6..22b800ecee4fa 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -2835,7 +2835,7 @@ void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, // Either not generating relocatable code, or addr must be an icon handle, or the // constant is zero (which we won't generate a relocation for). - assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0) || memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); + assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0)); if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) { From 6ae894a5180f9e3bd41ca94d8fead98246ca65bb Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 25 Feb 2020 11:58:52 -0800 Subject: [PATCH 32/34] Added better comments for optcse SIMD caller saved register heuristics --- src/coreclr/src/jit/optcse.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 1428b1058e095..efb4ed3433161 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -2463,20 +2463,28 @@ class CSE_Heuristic } #ifdef FEATURE_SIMD - // SIMD types will cause a SIMD register to be spilled/restored + // SIMD types may cause a SIMD register to be spilled/restored in the prolog and epilog. // if (varTypeIsSIMD(candidate->Expr()->TypeGet())) { + // We don't have complete information about when these extra spilled/restore will be needed. + // Instead we are conservative and assume that each SIMD CSE that is live across a call + // will cause an additional spill/restore in the prolog and epilog. + // int spillSimdRegInProlog = 1; // If we have a SIMD32 that is live across a call we have even higher spill costs // if (candidate->Expr()->TypeGet() == TYP_SIMD32) { - spillSimdRegInProlog++; // we likely need to spill an extra register to save the upper half + // Additionally for a simd32 CSE candidate we assume that and second spilled/restore will be needed. + // (to hold the upper half of the simd32 register that isn't preserved across the call) + // + spillSimdRegInProlog++; - // Increase the cse_use_cost since at use sites we have to generate code to spill/restore - // the upper half of the YMM register + // We also increase the CSE use cost here to because we may have to generate instructions + // to move the upper half of the simd32 before and after a call. + // cse_use_cost += 2; } From b4eb40664cea887ab6aad73c430848048c5d1e3a Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 25 Feb 2020 17:55:36 -0800 Subject: [PATCH 33/34] Added CONFIG_INTEGER: JitDisableSimdVN, Default 0, ValueNumbering of SIMD nodes and HW Intrinsic nodes enabled If 1, then disable ValueNumbering of SIMD nodes If 2, then disable ValueNumbering of HW Intrinsic nodes If 3, disable both SIMD and HW Intrinsic nodes --- src/coreclr/src/jit/jitconfigvalues.h | 6 +++++ src/coreclr/src/jit/valuenum.cpp | 35 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index 688008f69717d..9322ec116a872 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -136,6 +136,12 @@ CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0) CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0) CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0) CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing +CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNumbering of SIMD nodes and HW Intrinsic + // nodes enabled + // If 1, then disable ValueNumbering of SIMD nodes + // If 2, then disable ValueNumbering of HW Intrinsic nodes + // If 3, disable both SIMD and HW Intrinsic nodes + CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak")) CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 22f3643ca3589..5d966a9120eee 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -6797,6 +6797,41 @@ void Compiler::fgValueNumberTree(GenTree* tree) { genTreeOps oper = tree->OperGet(); +#ifdef FEATURE_SIMD + if ((JitConfig.JitDisableSimdVN() & 1) == 1) + { + // This Jit Config forces the previous behavior of value numbering for SIMD nodes + if (oper == GT_SIMD) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); + return; + } + } +#endif + +#ifdef FEATURE_HW_INTRINSICS + if ((JitConfig.JitDisableSimdVN() & 2) == 2) + { + // This Jit Config forces the previous behavior of value numbering for HW Intrinsic nodes + if (oper == GT_HWINTRINSIC) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); + + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + + // For safety/correctness we must mutate the global heap valuenumber + // for any HW intrinsic that performs a memory store operation + if (hwIntrinsicNode->OperIsMemoryStore()) + { + fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); + } + + return; + } + } +#endif // FEATURE_HW_INTRINSICS + var_types typ = tree->TypeGet(); if (GenTree::OperIsConst(oper)) { From 90bf7b3aad7346572918e352263bd990d16be1c4 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 25 Feb 2020 18:57:36 -0800 Subject: [PATCH 34/34] Moved JitDisableSimdVN from DEBUG to RETAIL --- src/coreclr/src/jit/jitconfigvalues.h | 13 ++++++++----- src/coreclr/src/jit/valuenum.cpp | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index 9322ec116a872..1f91cfd1fbf29 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -136,11 +136,6 @@ CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0) CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0) CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0) CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing -CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNumbering of SIMD nodes and HW Intrinsic - // nodes enabled - // If 1, then disable ValueNumbering of SIMD nodes - // If 2, then disable ValueNumbering of HW Intrinsic nodes - // If 3, disable both SIMD and HW Intrinsic nodes CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak")) @@ -279,6 +274,14 @@ CONFIG_INTEGER(EnableArm64Sve, W("EnableArm64Sve"), 1) // clang-format on +#ifdef FEATURE_SIMD +CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNumbering of SIMD nodes and HW Intrinsic + // nodes enabled + // If 1, then disable ValueNumbering of SIMD nodes + // If 2, then disable ValueNumbering of HW Intrinsic nodes + // If 3, disable both SIMD and HW Intrinsic nodes +#endif // FEATURE_SIMD + /// /// JIT /// diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 5d966a9120eee..1e9c6dc7ddaae 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -6807,7 +6807,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) return; } } -#endif +#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS if ((JitConfig.JitDisableSimdVN() & 2) == 2)