Skip to content

Commit

Permalink
ARM64-SVE: Add MultiplyAddRotateComplexBySelectedScalar (#105002)
Browse files Browse the repository at this point in the history
  • Loading branch information
amanasifkhalid authored Jul 17, 2024
1 parent 27776e2 commit 1ba2974
Show file tree
Hide file tree
Showing 15 changed files with 872 additions and 28 deletions.
6 changes: 3 additions & 3 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6592,11 +6592,11 @@ void CodeGen::genArm64EmitterUnitTestsSve()
// IF_SVE_GV_3A
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0,
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90,
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1,
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180,
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2,
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270,
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3,
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>

// IF_SVE_GX_3A
Expand Down
13 changes: 6 additions & 7 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5825,14 +5825,13 @@ void emitter::emitInsSve_R_R_R_I_I(instruction ins,

case INS_sve_fcmla:
assert(opt == INS_OPTS_SCALABLE_S);
assert(isVectorRegister(reg1)); // ddddd
assert(isVectorRegister(reg2)); // nnnnn
assert(isLowVectorRegister(reg3)); // mmmm
assert(isValidUimm<1>(imm1)); // i
assert(isValidRot(imm2)); // rr
assert(isVectorRegister(reg1)); // ddddd
assert(isVectorRegister(reg2)); // nnnnn
assert(isLowVectorRegister(reg3)); // mmmm
assert(isValidUimm<1>(imm1)); // i
assert(emitIsValidEncodedRotationImm0_to_270(imm2)); // rr

// Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
imm = (imm1 << 2) | imm2;
fmt = IF_SVE_GV_3A;
break;

Expand Down
4 changes: 1 addition & 3 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -963,9 +963,7 @@ static void ValidateHWIntrinsicInfo(CORINFO_InstructionSet isa, NamedIntrinsic n
if (info.numArgs != -1)
{
// We should only have an expected number of arguments
#if defined(TARGET_ARM64)
assert((info.numArgs >= 0) && (info.numArgs <= 4));
#elif defined(TARGET_XARCH)
#if defined(TARGET_ARM64) || defined(TARGET_XARCH)
assert((info.numArgs >= 0) && (info.numArgs <= 5));
#else
unreached();
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,14 @@ struct HWIntrinsicInfo
break;
}

case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
{
assert(sig->numArgs == 5);
*imm1Pos = 0;
*imm2Pos = 1;
break;
}

default:
{
assert(sig->numArgs > 0);
Expand All @@ -1105,6 +1113,7 @@ struct HWIntrinsic final
, op2(nullptr)
, op3(nullptr)
, op4(nullptr)
, op5(nullptr)
, numOperands(0)
, baseType(TYP_UNDEF)
{
Expand Down Expand Up @@ -1134,6 +1143,7 @@ struct HWIntrinsic final
GenTree* op2;
GenTree* op3;
GenTree* op4;
GenTree* op5;
size_t numOperands;
var_types baseType;

Expand All @@ -1144,6 +1154,9 @@ struct HWIntrinsic final

switch (numOperands)
{
case 5:
op5 = node->Op(5);
FALLTHROUGH;
case 4:
op4 = node->Op(4);
FALLTHROUGH;
Expand Down
66 changes: 66 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,27 @@ void HWIntrinsicInfo::lookupImmBounds(
immUpperBound = 3;
break;

case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
// rotation comes after index in the intrinsic's signature,
// but flip the order here so we check the larger range first.
// This conforms to the existing logic in LinearScan::BuildHWIntrinsic
// when determining if we need an internal register for the jump table.
// This flipped ordering is reflected in HWIntrinsicInfo::GetImmOpsPositions.
if (immNumber == 1)
{
// Bounds for rotation
immLowerBound = 0;
immUpperBound = 3;
}
else
{
// Bounds for index
assert(immNumber == 2);
immLowerBound = 0;
immUpperBound = 1;
}
break;

case NI_Sve_TrigonometricMultiplyAddCoefficient:
immLowerBound = 0;
immUpperBound = 7;
Expand Down Expand Up @@ -3004,6 +3025,51 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
{
assert(sig->numArgs == 5);
assert(!isScalar);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3);
CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

int imm1LowerBound, imm1UpperBound; // Range for rotation
int imm2LowerBound, imm2UpperBound; // Range for index
HWIntrinsicInfo::lookupImmBounds(intrinsic, simdSize, simdBaseType, 1, &imm1LowerBound, &imm1UpperBound);
HWIntrinsicInfo::lookupImmBounds(intrinsic, simdSize, simdBaseType, 2, &imm2LowerBound, &imm2UpperBound);

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass)));
GenTree* op5 = getArgForHWIntrinsic(argType, argClass);
assert(HWIntrinsicInfo::isImmOp(intrinsic, op5));
op5 = addRangeCheckIfNeeded(intrinsic, op5, mustExpand, imm1LowerBound, imm1UpperBound);

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass)));
op4 = getArgForHWIntrinsic(argType, argClass);
assert(HWIntrinsicInfo::isImmOp(intrinsic, op4));
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, imm2LowerBound, imm2UpperBound);

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
op3 = getArgForHWIntrinsic(argType, argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

SetOpLclRelatedToSIMDIntrinsic(op1);
SetOpLclRelatedToSIMDIntrinsic(op2);
SetOpLclRelatedToSIMDIntrinsic(op3);
SetOpLclRelatedToSIMDIntrinsic(op4);
SetOpLclRelatedToSIMDIntrinsic(op5);
retNode = new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(retType, getAllocator(CMK_ASTNode), intrinsic,
simdBaseJitType, simdSize, op1, op2, op3, op4, op5);
break;
}

default:
{
return nullptr;
Expand Down
60 changes: 60 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
regNumber op2Reg = REG_NA;
regNumber op3Reg = REG_NA;
regNumber op4Reg = REG_NA;
regNumber op5Reg = REG_NA;

switch (intrin.numOperands)
{
case 5:
assert(intrin.op5 != nullptr);
op5Reg = intrin.op5->GetRegNum();
FALLTHROUGH;

case 4:
assert(intrin.op4 != nullptr);
op4Reg = intrin.op4->GetRegNum();
Expand Down Expand Up @@ -2425,6 +2431,60 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
{
assert(isRMW);
assert(hasImmediateOperand);

if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
}

// If both immediates are constant, we don't need a jump table
if (intrin.op4->IsCnsIntOrI() && intrin.op5->IsCnsIntOrI())
{
assert(intrin.op4->isContainedIntOrIImmed() && intrin.op5->isContainedIntOrIImmed());
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg,
intrin.op4->AsIntCon()->gtIconVal,
intrin.op5->AsIntCon()->gtIconVal, opt);
}
else
{
// Use the helper to generate a table. The table can only use a single lookup value, therefore
// the two immediates index (0 to 1, in op4Reg) and rotation (0 to 3, in op5Reg) must be
// combined to a single value (0 to 7)
assert(!intrin.op4->isContainedIntOrIImmed() && !intrin.op5->isContainedIntOrIImmed());
emitAttr scalarSize = emitActualTypeSize(node->GetSimdBaseType());

// Combine the two immediates into op4Reg
// Shift rotation left to be out of range of index
GetEmitter()->emitIns_R_R_I(INS_lsl, scalarSize, op5Reg, op5Reg, 1);
// Combine the two values by ORing
GetEmitter()->emitIns_R_R_R(INS_orr, scalarSize, op4Reg, op4Reg, op5Reg);

// Generate the table using the combined immediate
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
// Extract index and rotation from the immediate
const int value = helper.ImmValue();
const ssize_t index = value & 1;
const ssize_t rotation = value >> 1;
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg, index, rotation,
opt);
}

// Restore the original values in op4Reg and op5Reg
GetEmitter()->emitIns_R_R_I(INS_and, scalarSize, op4Reg, op4Reg, 1);
GetEmitter()->emitIns_R_R_I(INS_lsr, scalarSize, op5Reg, op5Reg, 1);
}

break;
}

default:
unreached();
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ HARDWARE_INTRINSIC(Sve, MinNumberAcross,
HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation)
HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3698,6 +3698,18 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;

case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op4));
assert(varTypeIsIntegral(intrin.op5));
// Can only avoid generating a table if both immediates are constant.
if (intrin.op4->IsCnsIntOrI() && intrin.op5->IsCnsIntOrI())
{
MakeSrcContained(node, intrin.op4);
MakeSrcContained(node, intrin.op5);
}
break;

default:
unreached();
}
Expand Down
Loading

0 comments on commit 1ba2974

Please sign in to comment.