Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add xarch andn #64350

Merged
merged 5 commits into from
Feb 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,10 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr
case IF_RWR:
case IF_RRD:
case IF_RRW:
case IF_RWR_RRD_RRD:
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
case IF_RWR_RRD_MRD:
case IF_RWR_RRD_ARD:
case IF_RWR_RRD_SRD:
break;
default:
return false;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA

// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
Expand Down
49 changes: 1 addition & 48 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_AND:
case GT_OR:
case GT_XOR:
return LowerBinaryArithmeticCommon(node->AsOp());
return LowerBinaryArithmetic(node->AsOp());

case GT_MUL:
case GT_MULHI:
Expand Down Expand Up @@ -5122,53 +5122,6 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
return nullptr;
}

//------------------------------------------------------------------------
// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node.
//
// Recognizes opportunities for using target-independent "combined" nodes
// (currently AND_NOT on ARMArch). Performs containment checks.
//
// Arguments:
// node - the arithmetic node to lower
//
// Returns:
// The next node to lower.
//
GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp)
{
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
// this logic on the support for the instruction set on XArch.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_ARMARCH
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
{
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (binOp->gtGetOp1()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp1();
opNode = binOp->gtGetOp2();
}
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp2();
opNode = binOp->gtGetOp1();
}

if (notNode != nullptr)
{
binOp->gtOp1 = opNode;
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
binOp->ChangeOper(GT_AND_NOT);
BlockRange().Remove(notNode);
}
}
#endif

return LowerBinaryArithmetic(binOp);
}

//------------------------------------------------------------------------
// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
//
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ class Lowering final : public Phase
void LowerStoreIndir(GenTreeStoreInd* node);
GenTree* LowerAdd(GenTreeOp* node);
GenTree* LowerMul(GenTreeOp* mul);
GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp);
GenTree* LowerBinaryArithmetic(GenTreeOp* binOp);
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
GenTree* LowerConstIntDivOrMod(GenTree* node);
Expand Down Expand Up @@ -344,7 +343,8 @@ class Lowering final : public Phase
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
#elif defined(TARGET_ARM64)
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,30 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
//
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
{
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
Wraith2 marked this conversation as resolved.
Show resolved Hide resolved
{
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (binOp->gtGetOp1()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp1();
opNode = binOp->gtGetOp2();
}
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp2();
opNode = binOp->gtGetOp1();
}

if (notNode != nullptr)
{
binOp->gtOp1 = opNode;
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
binOp->ChangeOper(GT_AND_NOT);
BlockRange().Remove(notNode);
}
}

ContainCheckBinary(binOp);

return binOp->gtNext;
Expand Down
99 changes: 95 additions & 4 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
//------------------------------------------------------------------------
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
//
// Recognizes opportunities for using target-independent "combined" nodes
// Performs containment checks.
//
// Arguments:
// node - the arithmetic node to lower
//
Expand All @@ -173,10 +176,16 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
#ifdef FEATURE_HW_INTRINSICS
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp))
{
GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp);
if (blsrNode != nullptr)
GenTree* replacementNode = TryLowerAndOpToAndNot(binOp);
if (replacementNode != nullptr)
{
return replacementNode->gtNext;
}

replacementNode = TryLowerAndOpToResetLowestSetBit(binOp);
if (replacementNode != nullptr)
{
return blsrNode->gtNext;
return replacementNode->gtNext;
}
}
#endif
Expand Down Expand Up @@ -3726,14 +3735,16 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
}

//----------------------------------------------------------------------------------------------
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1)) to HWIntrinsic::ResetLowestSetBit
//
// Arguments:
// andNode - GT_AND node of integral type
//
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
{
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));
Expand Down Expand Up @@ -3802,6 +3813,86 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
return blsrNode;
}

//----------------------------------------------------------------------------------------------
// Lowering::TryLowerAndOpToAndNot: Lowers a tree AND(X, NOT(Y)) to HWIntrinsic::AndNot
//
// Arguments:
// andNode - GT_AND node of integral type
//
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode)
{
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));

GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (andNode->gtGetOp1()->OperIs(GT_NOT))
{
notNode = andNode->gtGetOp1();
opNode = andNode->gtGetOp2();
}
else if (andNode->gtGetOp2()->OperIs(GT_NOT))
{
notNode = andNode->gtGetOp2();
opNode = andNode->gtGetOp1();
}

if (opNode == nullptr)
{
return nullptr;
}

// We want to avoid using "andn" when one of the operands is both a source and the destination and is also coming
// from memory. In this scenario, we will get smaller and likely faster code by using the RMW encoding of `and`
if (IsBinOpInRMWStoreInd(andNode))
{
return nullptr;
}

NamedIntrinsic intrinsic;
if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64))
{
intrinsic = NamedIntrinsic::NI_BMI1_X64_AndNot;
}
else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))
{
intrinsic = NamedIntrinsic::NI_BMI1_AndNot;
}
else
{
return nullptr;
}

LIR::Use use;
if (!BlockRange().TryGetUse(andNode, &use))
{
return nullptr;
}

// note that parameter order for andn is ~y, x so these are purposefully reversed when creating the node
GenTreeHWIntrinsic* andnNode =
comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), notNode->AsUnOp()->gtGetOp1(), opNode, intrinsic);

JITDUMP("Lower: optimize AND(X, NOT(Y)))\n");
DISPNODE(andNode);
JITDUMP("to:\n");
DISPNODE(andnNode);

use.ReplaceWith(andnNode);

BlockRange().InsertBefore(andNode, andnNode);
BlockRange().Remove(andNode);
BlockRange().Remove(notNode);

ContainCheckHWIntrinsic(andnNode);

return andnNode;
}

#endif // FEATURE_HW_INTRINSICS

//----------------------------------------------------------------------------------------------
Expand Down