Skip to content

Commit

Permalink
AVX10.1 API introduction in JIT (#101938)
Browse files Browse the repository at this point in the history
* Add AVX10v1 API surface

* Define HWINTRINSIC for AVX10v1, AVX10v1_V256 and AVX10v1_V512

* Setup template testing for AVX10v1 APIs

* Handle AVX10v1 APIs in JIT where equivalent AVX512* APIs are handled

* Merge Avx10v1 and Avx10v1.V256. Rename Avx10.cs to Avx10v1.cs

* Add Avx10v1 to relevant places

* Fix CI errors. Add missing API in Avx10v1.PlatofrmNotSupported ad end line with a new character

* Changes to be made with latest changes on main. Make appropriate comments. Update tests in template testing for Avx10v1

* Lower AVX10v1 hwintrinsic in lowering and gentree.cpp for simdSize 32/16

* Fix failures on GNR for AVX10v1

* Disable template tests disabled for Avx512

* Distinguish between Avx10v1 and Avx10v1/512, Add appropriate comments and clean up code in lowerCast

* Remove duplicate code and rather use a single if condition

* Use bool instead of compIsa checks where possible

* remove duplication of code in shuffle

* resolve review comments. Make evex encoding checks clear to read and resolve a bug in gtNewSimdCvtNode

* Add FMA and Avx512F.X64 instructions to AVX10v1. Restructure code and compOpportunistic checks

* Combine compOpportunistic checks with Avx10 check using IsAvx10OrIsaSupportedOpportunistically

* Introduce a new internal ISA InstructionSet_EVEX and remove InstructionSet_AVX10v1_V256 to make space for the new ISA. Also change all the internal special intrinsic nodes for Avx512F on x86/x64 arch to evex nodes

* Addressing review comments. resolving errors introduced when merged with main

* fix formatting

* Reorder declaration of InstructionSet_EVEX to proper position. Run formatting adn resolve errors introduced when merging with main
  • Loading branch information
khushal1996 authored Jun 9, 2024
1 parent fa1acc6 commit b5948bf
Show file tree
Hide file tree
Showing 115 changed files with 11,280 additions and 555 deletions.
164 changes: 84 additions & 80 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 227e46fa-1be3-4770-b613-4a239e7c28aa */
0x227e46fa,
0x1be3,
0x4770,
{0xb6, 0x13, 0x4a, 0x23, 0x9e, 0x7c, 0x28, 0xaa}
constexpr GUID JITEEVersionIdentifier = { /* 6e0b439f-0d18-4836-a486-4962af0cc948 */
0x6e0b439f,
0x0d18,
0x4836,
{0xa4, 0x86, 0x49, 0x62, 0xaf, 0x0c, 0xc9, 0x48}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Rcpc2=42,
READYTORUN_INSTRUCTION_Sve=43,
READYTORUN_INSTRUCTION_Avx10v1=44,
READYTORUN_INSTRUCTION_Avx10v1_V256=45,
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
READYTORUN_INSTRUCTION_EVEX=47,

};

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3184,8 +3184,8 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
}

case NI_AVX512F_CompareEqualMask:
case NI_AVX512F_CompareNotEqualMask:
case NI_EVEX_CompareEqualMask:
case NI_EVEX_CompareNotEqualMask:
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0
Expand Down
34 changes: 32 additions & 2 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1827,7 +1827,22 @@ void CodeGen::genGenerateMachineCode()
#if defined(TARGET_X86)
if (compiler->canUseEvexEncoding())
{
printf("X86 with AVX512");
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
{
printf("X86 with AVX10/512");
}
else
{
printf("X86 with AVX10/256");
}
}
else
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
printf("X86 with AVX512");
}
}
else if (compiler->canUseVexEncoding())
{
Expand All @@ -1840,7 +1855,22 @@ void CodeGen::genGenerateMachineCode()
#elif defined(TARGET_AMD64)
if (compiler->canUseEvexEncoding())
{
printf("X64 with AVX512");
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
{
printf("X86 with AVX10/512");
}
else
{
printf("X86 with AVX10/256");
}
}
else
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
printf("X86 with AVX512");
}
}
else if (compiler->canUseVexEncoding())
{
Expand Down
37 changes: 24 additions & 13 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand All @@ -492,7 +492,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand Down Expand Up @@ -521,7 +521,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand All @@ -548,7 +548,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand Down Expand Up @@ -667,7 +667,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, EA_16BYTE, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand Down Expand Up @@ -5654,6 +5654,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_AVX512F_ExtractVector256:
case NI_AVX512DQ_ExtractVector128:
case NI_AVX512DQ_ExtractVector256:
case NI_AVX10v1_V512_ExtractVector128:
case NI_AVX10v1_V512_ExtractVector256:
{
// These intrinsics are "ins reg/mem, xmm, imm8"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
Expand Down Expand Up @@ -5682,6 +5684,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_AVX512F_ConvertToVector256UInt32:
case NI_AVX512F_VL_ConvertToVector128UInt32:
case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
case NI_AVX10v1_ConvertToVector128UInt32:
case NI_AVX10v1_ConvertToVector128UInt32WithSaturation:
{
assert(!varTypeIsFloating(baseType));
FALLTHROUGH;
Expand Down Expand Up @@ -5719,6 +5723,16 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
case NI_AVX512BW_VL_ConvertToVector128SByte:
case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
case NI_AVX10v1_ConvertToVector128Byte:
case NI_AVX10v1_ConvertToVector128ByteWithSaturation:
case NI_AVX10v1_ConvertToVector128Int16:
case NI_AVX10v1_ConvertToVector128Int16WithSaturation:
case NI_AVX10v1_ConvertToVector128Int32:
case NI_AVX10v1_ConvertToVector128Int32WithSaturation:
case NI_AVX10v1_ConvertToVector128SByte:
case NI_AVX10v1_ConvertToVector128SByteWithSaturation:
case NI_AVX10v1_ConvertToVector128UInt16:
case NI_AVX10v1_ConvertToVector128UInt16WithSaturation:
{
// These intrinsics are "ins reg/mem, xmm"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
Expand Down Expand Up @@ -7324,13 +7338,11 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
// Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
// here since they should have been lowered appropriately.
noway_assert(srcType != TYP_UINT);
assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || compiler->canUseEvexEncodingDebugOnly());

if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) &&
compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) && compiler->canUseEvexEncoding())
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
genConsumeOperands(treeNode->AsOp());
instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
Expand Down Expand Up @@ -7458,13 +7470,12 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
// into a helper call by either front-end or lowering phase, unless we have AVX512F
// accelerated conversions.
assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
compiler->canUseEvexEncodingDebugOnly());

// If the dstType is TYP_UINT, we have 32-bits to encode the
// float number. Any of 33rd or above bits can be the sign bit.
// To achieve it we pretend as if we are converting it to a long.
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
!compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) && !compiler->canUseEvexEncoding())
{
dstType = TYP_LONG;
}
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2307,7 +2307,6 @@ void Compiler::compSetProcessor()
{
instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
}

// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
// These have been shipped together historically and at the time of this writing
// there exists no hardware which doesn't support the entire feature set. To simplify
Expand Down
94 changes: 93 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -9554,6 +9554,14 @@ class Compiler
return opts.compSupportsISA.HasInstructionSet(isa);
}

// Following cases should be taken into consideration when using the below APIs:
// InstructionSet_EVEX implies Avx10v1 -or- Avx512F+CD+DQ+BW+VL and can be used for 128-bit or 256-bit EVEX encoding
// instructions in these instruction sets InstructionSet_Avx10v1_V512 should never be queried directly, it is
// covered by querying Avx512* InstructionSet_Avx512F (and same for BW, CD, DQ) is only queried for 512-bit EVEX
// encoded instructions
// InstructionSet_Avx10v1 is only queried for cases like 128-bit/256-bit instructions that wouldn't be in
// F+CD+DQ+BW+VL (such as VBMI) and should appear with a corresponding query around AVX512*_VL (i.e. AVX512_VBMI_VL)

#ifdef DEBUG
//------------------------------------------------------------------------
// IsBaselineVector512IsaSupportedDebugOnly - Does isa support exist for Vector512.
Expand All @@ -9567,6 +9575,42 @@ class Compiler
return compIsaSupportedDebugOnly(InstructionSet_AVX512F);
#else
return false;
#endif
}

//------------------------------------------------------------------------
// canUseEvexEncodingDebugOnly - Answer the question: Is Evex encoding supported on this target.
//
// Returns:
// `true` if Evex encoding is supported, `false` if not.
//
bool canUseEvexEncodingDebugOnly() const
{
#ifdef TARGET_XARCH
return (compIsaSupportedDebugOnly(InstructionSet_EVEX));
#else
return false;
#endif
}

//------------------------------------------------------------------------
// IsAvx10OrIsaSupportedDebugOnly - Answer the question: Is AVX10v1 or the given ISA supported.
//
// Returns:
// `true` if AVX10v1 or the given ISA is supported, `false` if not.
//
bool IsAvx10OrIsaSupportedDebugOnly(CORINFO_InstructionSet isa) const
{
#ifdef TARGET_XARCH
// For the below cases, check for evex encoding should be used.
assert(isa != InstructionSet_AVX512F || isa != InstructionSet_AVX512F_VL || isa != InstructionSet_AVX512BW ||
isa != InstructionSet_AVX512BW_VL || isa != InstructionSet_AVX512CD ||
isa != InstructionSet_AVX512CD_VL || isa != InstructionSet_AVX512DQ ||
isa != InstructionSet_AVX512DQ_VL);

return (compIsaSupportedDebugOnly(InstructionSet_AVX10v1) || compIsaSupportedDebugOnly(isa));
#else
return false;
#endif
}
#endif // DEBUG
Expand All @@ -9586,6 +9630,21 @@ class Compiler
#endif
}

//------------------------------------------------------------------------
// IsAvx10OrIsaSupportedOpportunistically - Does opportunistic isa support exist for AVX10v1 or the given ISA.
//
// Returns:
// `true` if AVX10v1 or the given ISA is supported, `false` if not.
//
bool IsAvx10OrIsaSupportedOpportunistically(CORINFO_InstructionSet isa) const
{
#ifdef TARGET_XARCH
return (compOpportunisticallyDependsOn(InstructionSet_AVX10v1) || compOpportunisticallyDependsOn(isa));
#else
return false;
#endif
}

bool canUseEmbeddedBroadcast() const
{
return JitConfig.EnableEmbeddedBroadcast();
Expand All @@ -9598,6 +9657,35 @@ class Compiler

#ifdef TARGET_XARCH
public:

//------------------------------------------------------------------------
// compIsEvexOpportunisticallySupported - Checks for whether AVX10v1 or avx512InstructionSet is supported
// opportunistically.
//
// Returns:
// returns true if AVX10v1 or avx512InstructionSet is supported opportunistically and
// sets isV512Supported to true if AVX512F is supported, false otherwise.
//
bool compIsEvexOpportunisticallySupported(bool& isV512Supported,
CORINFO_InstructionSet avx512InstructionSet = InstructionSet_AVX512F)
{
assert(avx512InstructionSet == InstructionSet_AVX512F || avx512InstructionSet == InstructionSet_AVX512F_VL ||
avx512InstructionSet == InstructionSet_AVX512BW || avx512InstructionSet == InstructionSet_AVX512BW_VL ||
avx512InstructionSet == InstructionSet_AVX512CD || avx512InstructionSet == InstructionSet_AVX512CD_VL ||
avx512InstructionSet == InstructionSet_AVX512DQ || avx512InstructionSet == InstructionSet_AVX512DQ_VL ||
avx512InstructionSet == InstructionSet_AVX512VBMI ||
avx512InstructionSet == InstructionSet_AVX512VBMI_VL);

if (compOpportunisticallyDependsOn(avx512InstructionSet))
{
isV512Supported = true;
return true;
}

isV512Supported = false;
return compOpportunisticallyDependsOn(InstructionSet_AVX10v1);
}

bool canUseVexEncoding() const
{
return compOpportunisticallyDependsOn(InstructionSet_AVX);
Expand All @@ -9611,7 +9699,7 @@ class Compiler
//
bool canUseEvexEncoding() const
{
return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
return (compOpportunisticallyDependsOn(InstructionSet_EVEX));
}

private:
Expand Down Expand Up @@ -9641,6 +9729,10 @@ class Compiler

return true;
}
else if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
return true;
}
#endif // DEBUG

return false;
Expand Down
Loading

0 comments on commit b5948bf

Please sign in to comment.