Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a couple issues with Vector128.Get/WithElement #52985

Merged
13 commits merged into from
May 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8228,6 +8228,23 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

bool IsBaselineSimdIsaSupported()
{
#ifdef FEATURE_SIMD
#if defined(TARGET_XARCH)
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
#elif defined(TARGET_ARM64)
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

return compOpportunisticallyDependsOn(minimumIsa) && JitConfig.EnableHWIntrinsic();
#else
return false;
#endif
}

// Get highest available level for SIMD codegen
SIMDLevel getSIMDSupportLevel()
{
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6507,7 +6507,13 @@ GenTree* Compiler::gtNewLclvNode(unsigned lnum, var_types type DEBUGARG(IL_OFFSE
// should be able to remove this exception and handle the assignment mismatch in
// Lowering.
LclVarDsc* varDsc = lvaGetDesc(lnum);
assert((type == varDsc->lvType) ||

bool simd12ToSimd16Widening = false;
#if FEATURE_SIMD
// We can additionally have a SIMD12 that was widened to a SIMD16, generally as part of lowering
simd12ToSimd16Widening = (type == TYP_SIMD16) && (varDsc->lvType == TYP_SIMD12);
#endif
assert((type == varDsc->lvType) || simd12ToSimd16Widening ||
(lvaIsImplicitByRefLocal(lnum) && fgGlobalMorph && (varDsc->lvType == TYP_BYREF)) ||
((varDsc->lvType == TYP_STRUCT) && (genTypeSize(type) == varDsc->lvExactSize)));
}
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/jit/lir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,11 @@ void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
// lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
// default) is provided, this method will create and use a new
// local var.
// assign - On return, if non null, contains the created assignment node
//
// Return Value: The number of the local var used for temporary storage.
//
unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum, GenTree** assign)
{
assert(IsInitialized());
assert(compiler != nullptr);
Expand Down Expand Up @@ -277,6 +278,10 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
JITDUMP("ReplaceWithLclVar created store :\n");
DISPNODE(store);

if (assign != nullptr)
{
*assign = store;
}
return lclNum;
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lir.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class LIR final
bool IsDummyUse() const;

void ReplaceWith(Compiler* compiler, GenTree* replacement);
unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM);
unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM, GenTree** assign = nullptr);
};

//------------------------------------------------------------------------
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,18 @@ class Lowering final : public Phase
GenTree* oldUseNode = use.Def();
if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
{
use.ReplaceWithLclVar(comp, tempNum);
GenTree* assign;
use.ReplaceWithLclVar(comp, tempNum, &assign);

GenTree* newUseNode = use.Def();
ContainCheckRange(oldUseNode->gtNext, newUseNode);

// We need to lower the LclVar and assignment since there may be certain
// types or scenarios, such as TYP_SIMD12, that need special handling
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

LowerNode(assign);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think it is a correct approach, this function is used during different phases, for example, during long decomposition, so calling LowerNode during it is incorrect.
ReplaceWithLclVar creates such IR:

currentNode
storeCurrentNodeToNewNode
NewNodeUse

and lowering works node by node, so after we are done with currentNode we should go to the next and lower it, why don't we?

See, for example, LowerSwitch

ReplaceWithLclVar(use);

we call ReplaceWithLclVar and it creates these new nodes, but then we return node->gtNext that is the created store:
GenTree* next = node->gtNext;
// Get rid of the GT_SWITCH(temp).
switchBBRange.Remove(node->AsOp()->gtOp1);
switchBBRange.Remove(node);
return next;

and it does their lowering.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The one used in long decomposition is the instance method exposed on LIR::Use which is distinct from this method that is explicit to lowering and which itself calls into the LIR::Use method

Copy link
Member Author

@tannergooding tannergooding May 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and lowering works node by node, so after we are done with currentNode we should go to the next and lower it, why don't we?

We should already have lowered all the previous nodes, we aren't removing the "current" node, and we aren't inserting a node after the "current" node, so the "next" node is already correct. We simply need to ensure new nodes created and inserted before this node are also lowered.

This is how the existing HWIntrinsic and SimdIntrinsics lowering has been handled and so the same technique is used here as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should already have lowered all the previous nodes, we aren't removing the "current" node,

Agree

and we aren't inserting a node after the "current" node, so the "next" node is already correct.

Don't understand, ReplaceWithLclVar creates new nodes and inserts them after the "current" node, what do you mean?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't understand, ReplaceWithLclVar creates new nodes and inserts them after the "current" node, what do you mean?

ReplaceWithLclVar creates new nodes and inserts them after the node it replaces. In this case, we are replacing an operand of the "current" node, which means it precedes the current node in the list and will already have been lowered.

LowerNode(newUseNode);

return newUseNode->AsLclVar();
}
return oldUseNode->AsLclVar();
Expand Down
91 changes: 65 additions & 26 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3007,15 +3007,16 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

NamedIntrinsic resIntrinsic = NI_Illegal;

idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

switch (simdBaseType)
{
case TYP_LONG:
case TYP_ULONG:
{
op2 = idx;
idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

op1 = comp->gtNewArgList(op1, op3, idx);
op2 = nullptr;
resIntrinsic = NI_SSE41_X64_Insert;
break;
}
Expand All @@ -3033,7 +3034,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_FLOAT,
16);
BlockRange().InsertBefore(idx, tmp1);
BlockRange().InsertBefore(node, tmp1);
LowerNode(tmp1);

if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
Expand Down Expand Up @@ -3088,26 +3089,36 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
ssize_t controlBits1;
ssize_t controlBits2;

// The comments beside the control bits below are listed using the managed API operands
//
// In practice, for the first step the value being inserted (op3) is in tmp1
// while the other elements of the result (op1) are in tmp2. The result ends
// up containing the value being inserted and its immediate neighbor.
//
// The second step takes that result (which is in op1) plus the other elements
// from op2 (a clone of op1/tmp2 from the previous step) and combines them to
// create the final result.

switch (imm8)
{
case 1:
{
controlBits1 = 0;
controlBits2 = 226;
controlBits1 = 0; // 00 00 00 00; op1 = { X = op3, Y = op3, Z = op1.X, W = op1.X }
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
controlBits2 = 226; // 11 10 00 10; node = { X = op1.X, Y = op3, Z = op1.Z, W = op1.W }
break;
}

case 2:
{
controlBits1 = 48;
controlBits2 = 132;
controlBits1 = 15; // 00 00 11 11; op1 = { X = op1.W, Y = op1.W, Z = op3, W = op3 }
controlBits2 = 36; // 00 10 01 00; node = { X = op1.X, Y = op1.Y, Z = op3, W = op1.W }
break;
}

case 3:
{
controlBits1 = 32;
controlBits2 = 36;
controlBits1 = 10; // 00 00 10 10; op1 = { X = op1.Z, Y = op1.Z, Z = op3, W = op3 }
controlBits2 = 132; // 10 00 01 00; node = { X = op1.X, Y = op1.Y, Z = op1.Z, W = op3 }
break;
}

Expand All @@ -3118,19 +3129,24 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
idx = comp->gtNewIconNode(controlBits1);
BlockRange().InsertAfter(tmp2, idx);

if (imm8 == 1)
if (imm8 != 1)
{
std::swap(tmp1, tmp2);
}

op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp2, tmp1, idx, NI_SSE_Shuffle,
op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_SSE_Shuffle,
CORINFO_TYPE_FLOAT, 16);
BlockRange().InsertAfter(idx, op1);
LowerNode(op1);

idx = comp->gtNewIconNode(controlBits2);
BlockRange().InsertAfter(op1, idx);

if (imm8 != 1)
{
std::swap(op1, op2);
}

op1 = comp->gtNewArgList(op1, op2, idx);
op2 = nullptr;
resIntrinsic = NI_SSE_Shuffle;
Expand All @@ -3139,8 +3155,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
}
else
{
op3 = tmp1;
idx->AsIntCon()->SetIconValue(imm8 * 16);
imm8 = imm8 * 16;
op3 = tmp1;
FALLTHROUGH;
}
}
Expand All @@ -3150,6 +3166,9 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
case TYP_INT:
case TYP_UINT:
{
idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

op1 = comp->gtNewArgList(op1, op3, idx);
op2 = nullptr;
resIntrinsic = NI_SSE41_Insert;
Expand All @@ -3159,6 +3178,9 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
case TYP_SHORT:
case TYP_USHORT:
{
idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

op1 = comp->gtNewArgList(op1, op3, idx);
op2 = nullptr;
resIntrinsic = NI_SSE2_Insert;
Expand All @@ -3178,7 +3200,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_DOUBLE,
16);
BlockRange().InsertBefore(idx, tmp1);
BlockRange().InsertBefore(node, tmp1);
LowerNode(tmp1);

op2 = tmp1;
Expand Down Expand Up @@ -5474,8 +5496,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

default:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}

if (!comp->canUseVexEncoding())
{
Expand Down Expand Up @@ -5535,9 +5560,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
assert(supportsSIMDScalarLoads == false);
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}
assert(!supportsSIMDScalarLoads);

supportsAlignedSIMDLoads = !comp->canUseVexEncoding() || !comp->opts.MinOpts();
supportsUnalignedSIMDLoads = comp->canUseVexEncoding();
Expand All @@ -5553,7 +5581,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
if (containingNode->GetSimdBaseType() == TYP_FLOAT)
{
assert(containingIntrinsicId == NI_SSE41_Insert);
assert(genTypeSize(node->TypeGet()) == 16);

if (genTypeSize(node->TypeGet()) != 16)
{
// These intrinsics only expect 16-byte nodes for containment
break;
}

// Sse41.Insert(V128<float>, V128<float>, byte) is a bit special
// in that it has different behavior depending on whether the
Expand Down Expand Up @@ -5620,8 +5653,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

case NI_AVX_CompareScalar:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}

assert(supportsAlignedSIMDLoads == false);
assert(supportsUnalignedSIMDLoads == false);
Expand Down Expand Up @@ -5700,8 +5736,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

default:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}

supportsSIMDScalarLoads = true;
supportsGeneralLoads = supportsSIMDScalarLoads;
Expand Down
12 changes: 8 additions & 4 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6009,11 +6009,14 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
// if this field belongs to simd struct, translate it to simd intrinsic.
if (mac == nullptr)
{
GenTree* newTree = fgMorphFieldToSimdGetElement(tree);
if (newTree != tree)
if (IsBaselineSimdIsaSupported())
{
newTree = fgMorphSmpOp(newTree);
return newTree;
GenTree* newTree = fgMorphFieldToSimdGetElement(tree);
if (newTree != tree)
{
newTree = fgMorphSmpOp(newTree);
return newTree;
}
}
}
else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
Expand Down Expand Up @@ -12238,6 +12241,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
op2 = tree->AsOp()->gtOp2;

#ifdef FEATURE_SIMD
if (IsBaselineSimdIsaSupported())
{
// We should check whether op2 should be assigned to a SIMD field or not.
// If it is, we should tranlate the tree to simd intrinsic.
Expand Down
10 changes: 1 addition & 9 deletions src/coreclr/jit/simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1901,15 +1901,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode,
return nullptr;
}

#if defined(TARGET_XARCH)
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
#elif defined(TARGET_ARM64)
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

if (!compOpportunisticallyDependsOn(minimumIsa) || !JitConfig.EnableHWIntrinsic())
if (!IsBaselineSimdIsaSupported())
{
// The user disabled support for the baseline ISA so
// don't emit any SIMD intrinsics as they all require
Expand Down
10 changes: 1 addition & 9 deletions src/coreclr/jit/simdashwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic,
return nullptr;
}

#if defined(TARGET_XARCH)
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
#elif defined(TARGET_ARM64)
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

if (!compOpportunisticallyDependsOn(minimumIsa) || !JitConfig.EnableHWIntrinsic())
if (!IsBaselineSimdIsaSupported())
{
// The user disabled support for the baseline ISA so
// don't emit any SIMD intrinsics as they all require
Expand Down
2 changes: 1 addition & 1 deletion src/tests/JIT/Directed/StructABI/structreturn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1332,7 +1332,7 @@ private static void TestReturnViaThrowing<T>() where T : struct
T value = vector[Vector<T>.Count];
System.Diagnostics.Debug.Assert(false);
}
catch (IndexOutOfRangeException)
catch (ArgumentOutOfRangeException)
{
return;
}
Expand Down
Loading