-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT ARM64-SVE: Add Sve.Abs() and Sve.Add() #100134
Changes from 5 commits
0d437e3
e9fa735
5acc122
15e893d
c22f458
508a52a
5825c20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1396,6 +1396,60 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, | |
GenTree* op3 = nullptr; | ||
GenTree* op4 = nullptr; | ||
|
||
switch (numArgs) | ||
{ | ||
case 4: | ||
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd); | ||
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound); | ||
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd); | ||
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
break; | ||
|
||
case 3: | ||
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there no case where a range check is needed for 3 arguments? Should there be an assert to validate that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That seems to be the case. The case for 3 args is checked further down and depending on the intrinsics, does the |
||
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
break; | ||
|
||
case 2: | ||
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); | ||
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
break; | ||
|
||
case 1: | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
break; | ||
|
||
default: | ||
break; | ||
} | ||
|
||
#if defined(TARGET_ARM64) | ||
// Embedded masks need inserting as op1. | ||
if (HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsic)) | ||
{ | ||
numArgs++; | ||
assert(numArgs <= 4); | ||
switch (numArgs) | ||
{ | ||
case 4: | ||
op4 = op3; | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
FALLTHROUGH; | ||
case 3: | ||
op3 = op2; | ||
FALLTHROUGH; | ||
case 2: | ||
op2 = op1; | ||
FALLTHROUGH; | ||
default: | ||
break; | ||
} | ||
op1 = gtNewSimdEmbeddedMaskNode(simdBaseJitType, simdSize); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this need to be done here? This seems like its just inserting the implicit Seemingly this could just be inserted as part of lowering instead so that it only impacts LSRA and codegen? |
||
} | ||
#endif | ||
|
||
switch (numArgs) | ||
{ | ||
case 0: | ||
|
@@ -1407,8 +1461,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, | |
|
||
case 1: | ||
{ | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
|
||
if ((category == HW_Category_MemoryLoad) && op1->OperIs(GT_CAST)) | ||
{ | ||
// Although the API specifies a pointer, if what we have is a BYREF, that's what | ||
|
@@ -1467,10 +1519,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, | |
|
||
case 2: | ||
{ | ||
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); | ||
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
|
||
retNode = isScalar | ||
? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, intrinsic) | ||
: gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize); | ||
|
@@ -1524,10 +1572,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, | |
|
||
case 3: | ||
{ | ||
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd); | ||
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
|
||
#ifdef TARGET_ARM64 | ||
if (intrinsic == NI_AdvSimd_LoadAndInsertScalar) | ||
{ | ||
|
@@ -1569,12 +1613,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, | |
|
||
case 4: | ||
{ | ||
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd); | ||
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound); | ||
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd); | ||
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); | ||
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); | ||
|
||
assert(!isScalar); | ||
retNode = | ||
gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -398,6 +398,64 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) | |
unreached(); | ||
} | ||
} | ||
else if (isRMW) | ||
{ | ||
assert(!hasImmediateOperand); | ||
assert(!HWIntrinsicInfo::SupportsContainment(intrin.id)); | ||
|
||
// Move the RMW register out of the way and do not pass it to the emit. | ||
|
||
if (HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrin.id)) | ||
{ | ||
// op1Reg contains a mask, op2Reg contains the RMW register. | ||
|
||
if (targetReg != op2Reg) | ||
{ | ||
assert(targetReg != op3Reg); | ||
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg, /* canSkip */ true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if it is needed here, but we should start document the rules of using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's the same general So I'm not sure there's really any rules to document here, we certainly don't document it at all for x86 or x64 or any of the existing Arm64 RMW cases. |
||
} | ||
|
||
switch (intrin.numOperands) | ||
{ | ||
case 2: | ||
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); | ||
break; | ||
|
||
case 3: | ||
assert(targetReg != op3Reg); | ||
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt); | ||
break; | ||
|
||
default: | ||
unreached(); | ||
} | ||
} | ||
else | ||
{ | ||
// op1Reg contains the RMW register. | ||
|
||
if (targetReg != op1Reg) | ||
{ | ||
assert(targetReg != op2Reg); | ||
assert(targetReg != op3Reg); | ||
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); | ||
} | ||
|
||
switch (intrin.numOperands) | ||
{ | ||
case 2: | ||
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); | ||
break; | ||
|
||
case 3: | ||
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need an assert that none of the input registers are mask registers? I'd guess the only RMW instructions with masks are ones that have at least 4 operands, so we shouldn't ever see that here. |
||
break; | ||
|
||
default: | ||
unreached(); | ||
} | ||
} | ||
} | ||
else | ||
{ | ||
assert(!hasImmediateOperand); | ||
|
@@ -416,35 +474,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) | |
{ | ||
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); | ||
} | ||
else if (isRMW) | ||
{ | ||
if (targetReg != op1Reg) | ||
{ | ||
assert(targetReg != op2Reg); | ||
|
||
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, | ||
/* canSkip */ true); | ||
} | ||
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); | ||
} | ||
else | ||
{ | ||
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); | ||
} | ||
break; | ||
|
||
case 3: | ||
assert(isRMW); | ||
if (targetReg != op1Reg) | ||
{ | ||
assert(targetReg != op2Reg); | ||
assert(targetReg != op3Reg); | ||
|
||
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); | ||
} | ||
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); | ||
break; | ||
|
||
default: | ||
unreached(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,10 @@ | |
// SVE Intrinsics | ||
|
||
// Sve | ||
HARDWARE_INTRINSIC(Sve, Abs, -1, -1, false, {INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_fabs, INS_sve_fabs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need the |
||
|
||
HARDWARE_INTRINSIC(Sve, Add, -1, -1, false, {INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_fadd, INS_sve_fadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate a bit on what I'm looking at the architecture manual and don't see any limitations on the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The instruction only has 3bits for a predicate register so is limited to using predicate registers 0 to 7. This is quite a common pattern across Sve, hence using an common enum for it (I wouldn't do similar for the handful have only 2 bits) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see. I think a flag makes sense then, but I'm not a huge fan of the name given how Maybe something more explicit like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this one in particular need |
||
|
||
HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) | ||
HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) | ||
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wondering why is this change? It will return
4
here, right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed. The changes for the mask load/stores were the required ones.
Reverted.