Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64-SVE: Remove HW_Flag_HasEnumOperand #105702 #106622

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 33 additions & 46 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,24 @@ enum HWIntrinsicFlag : unsigned int
// The intrinsic has no EVEX compatible form
HW_Flag_NoEvexSemantics = 0x100000,

// The intrinsic is an RMW intrinsic
HW_Flag_RmwIntrinsic = 0x200000,

// The intrinsic is a PermuteVar2x intrinsic
HW_Flag_PermuteVar2x = 0x400000,

// The intrinsic is an embedded broadcast compatible intrinsic
HW_Flag_EmbBroadcastCompatible = 0x800000,

// The intrinsic is an embedded rounding compatible intrinsic
HW_Flag_EmbRoundingCompatible = 0x1000000,

// The intrinsic is an embedded masking compatible intrinsic
HW_Flag_EmbMaskingCompatible = 0x2000000,

// The base type of this intrinsic needs to be normalized to int/uint unless it is long/ulong.
HW_Flag_NormalizeSmallTypeToInt = 0x4000000,

#elif defined(TARGET_ARM64)
// The intrinsic has an immediate operand
// - the value can be (and should be) encoded in a corresponding instruction when the operand value is constant
Expand Down Expand Up @@ -196,57 +214,32 @@ enum HWIntrinsicFlag : unsigned int
// The intrinsic uses a mask in arg1 to select elements present in the result, which is not present in the API call
HW_Flag_EmbeddedMaskedOperation = 0x100000,

// The intrinsic comes in both vector and scalar variants. During the import stage if the basetype is scalar,
// then the intrinsic should be switched to a scalar only version.
HW_Flag_HasScalarInputVariant = 0x200000,

// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
HW_Flag_LowVectorOperation = 0x400000,

// The intrinsic uses a mask in arg1 to select elements present in the result, which zeros inactive elements
// (instead of merging).
HW_Flag_ZeroingMaskedOperation = 0x800000,

#else
#error Unsupported platform
#endif

// The intrinsic has some barrier special side effect that should be tracked
HW_Flag_SpecialSideEffect_Barrier = 0x200000,
HW_Flag_SpecialSideEffect_Barrier = 0x8000000,

// The intrinsic has some other special side effect that should be tracked
HW_Flag_SpecialSideEffect_Other = 0x400000,
HW_Flag_SpecialSideEffect_Other = 0x10000000,

HW_Flag_SpecialSideEffectMask = (HW_Flag_SpecialSideEffect_Barrier | HW_Flag_SpecialSideEffect_Other),

// MaybeNoJmpTable IMM
// the imm intrinsic may not need jumptable fallback when it gets non-const argument
HW_Flag_MaybeNoJmpTableIMM = 0x800000,

#if defined(TARGET_XARCH)
// The intrinsic is an RMW intrinsic
HW_Flag_RmwIntrinsic = 0x1000000,

// The intrinsic is a PermuteVar2x intrinsic
HW_Flag_PermuteVar2x = 0x2000000,

// The intrinsic is an embedded broadcast compatible intrinsic
HW_Flag_EmbBroadcastCompatible = 0x4000000,

// The intrinsic is an embedded rounding compatible intrinsic
HW_Flag_EmbRoundingCompatible = 0x8000000,

// The intrinsic is an embedded masking compatible intrinsic
HW_Flag_EmbMaskingCompatible = 0x10000000,

// The base type of this intrinsic needs to be normalized to int/uint unless it is long/ulong.
HW_Flag_NormalizeSmallTypeToInt = 0x20000000,
#elif defined(TARGET_ARM64)

// The intrinsic has an enum operand. Using this implies HW_Flag_HasImmediateOperand.
HW_Flag_HasEnumOperand = 0x1000000,

// The intrinsic comes in both vector and scalar variants. During the import stage if the basetype is scalar,
// then the intrinsic should be switched to a scalar only version.
HW_Flag_HasScalarInputVariant = 0x2000000,

// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
HW_Flag_LowVectorOperation = 0x4000000,

// The intrinsic uses a mask in arg1 to select elements present in the result, which zeros inactive elements
// (instead of merging).
HW_Flag_ZeroingMaskedOperation = 0x8000000,

#endif // TARGET_XARCH
HW_Flag_MaybeNoJmpTableIMM = 0x20000000,

// The intrinsic is a FusedMultiplyAdd intrinsic
HW_Flag_FmaIntrinsic = 0x40000000,
Expand Down Expand Up @@ -926,7 +919,7 @@ struct HWIntrinsicInfo
{
#if defined(TARGET_ARM64)
const HWIntrinsicFlag flags = lookupFlags(id);
return ((flags & HW_Flag_HasImmediateOperand) != 0) || HasEnumOperand(id);
return ((flags & HW_Flag_HasImmediateOperand) != 0);
#elif defined(TARGET_XARCH)
return lookupCategory(id) == HW_Category_IMM;
#else
Expand Down Expand Up @@ -983,12 +976,6 @@ struct HWIntrinsicInfo
return (flags & HW_Flag_ExplicitMaskedOperation) != 0;
}

static bool HasEnumOperand(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_HasEnumOperand) != 0;
}

static bool HasScalarInputVariant(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
Expand Down
45 changes: 20 additions & 25 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,27 +468,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
emitShift(intrin.op2, op1Reg);
}
}
else if (HWIntrinsicInfo::HasEnumOperand(intrin.id))
{
assert(hasImmediateOperand);

switch (intrin.numOperands)
{
case 1:
{
HWIntrinsicImmOpHelper helper(this, intrin.op1, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const insSvePattern pattern = (insSvePattern)helper.ImmValue();
GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern);
}
};
break;

default:
unreached();
}
}
else if (intrin.numOperands >= 2 && intrin.op2->IsEmbMaskOp())
{
// Handle case where op2 is operation that needs embedded mask
Expand Down Expand Up @@ -1043,18 +1022,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else
{
assert(!hasImmediateOperand);

switch (intrin.numOperands)
{
case 0:
assert(!hasImmediateOperand);
GetEmitter()->emitIns_R(ins, emitSize, targetReg, opt);
break;

case 1:
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
if (hasImmediateOperand)
{
assert(HWIntrinsicInfo::IsScalable(intrin.id));
HWIntrinsicImmOpHelper helper(this, intrin.op1, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const insSvePattern pattern = (insSvePattern)helper.ImmValue();
GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern);
}
}
else
{
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
break;

case 2:
assert(!hasImmediateOperand);

// This handles optimizations for instructions that have
// an implicit 'zero' vector of what would be the second operand.
if (HWIntrinsicInfo::SupportsContainment(intrin.id) && intrin.op2->isContained() &&
Expand Down Expand Up @@ -1110,6 +1104,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;

case 3:
assert(!hasImmediateOperand);

if (isRMW)
{
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
Expand Down Expand Up @@ -1803,7 +1799,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Sve_PrefetchInt64:
{
assert(hasImmediateOperand);
assert(HWIntrinsicInfo::HasEnumOperand(intrin.id));
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
Expand Down
Loading
Loading