Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable EVEX support by default #83648

Merged
merged 16 commits into from
Mar 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions eng/pipelines/common/templates/runtimes/run-test-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,11 @@ jobs:
- jitstress_isas_nohwintrinsic
- jitstress_isas_nohwintrinsic_nosimd
- jitstress_isas_nosimd
- jitstress_isas_x86_evex
- jitstress_isas_x86_noaes
- jitstress_isas_x86_noavx
- jitstress_isas_x86_noavx2
- jitstress_isas_x86_noavx512
- jitstress_isas_x86_nobmi1
- jitstress_isas_x86_nobmi2
- jitstress_isas_x86_nofma
Expand All @@ -427,6 +429,7 @@ jobs:
- jitstress_isas_1_x86_noaes
- jitstress_isas_1_x86_noavx
- jitstress_isas_1_x86_noavx2
- jitstress_isas_1_x86_noavx512
- jitstress_isas_1_x86_nobmi1
- jitstress_isas_1_x86_nobmi2
- jitstress_isas_1_x86_nofma
Expand All @@ -444,6 +447,7 @@ jobs:
- jitstress_isas_2_x86_noaes
- jitstress_isas_2_x86_noavx
- jitstress_isas_2_x86_noavx2
- jitstress_isas_2_x86_noavx512
- jitstress_isas_2_x86_nobmi1
- jitstress_isas_2_x86_nobmi2
- jitstress_isas_2_x86_nofma
Expand All @@ -460,8 +464,9 @@ jobs:
- jitstress_isas_2_x86_nossse3
${{ if in(parameters.testGroup, 'jitstress-isas-avx512') }}:
scenarios:
- jitstress_isas_avx512_forceevex
- jitstress_isas_avx512_forceevex_stresshighregs
- jitstress_isas_x86_evex
- jitstress_isas_x86_noavx512
- jitstressregs0x2000
${{ if in(parameters.testGroup, 'jitstressregs-x86') }}:
scenarios:
- jitstressregs1_x86_noavx
Expand All @@ -472,6 +477,7 @@ jobs:
- jitstressregs0x10_x86_noavx
- jitstressregs0x80_x86_noavx
- jitstressregs0x1000_x86_noavx
- jitstressregs0x2000_x86_noavx
${{ if in(parameters.testGroup, 'jitstressregs' ) }}:
scenarios:
- jitstressregs1
Expand All @@ -482,6 +488,7 @@ jobs:
- jitstressregs0x10
- jitstressregs0x80
- jitstressregs0x1000
- jitstressregs0x2000
${{ if in(parameters.testGroup, 'jitstress2-jitstressregs') }}:
scenarios:
- jitstress2_jitstressregs1
Expand All @@ -492,6 +499,7 @@ jobs:
- jitstress2_jitstressregs0x10
- jitstress2_jitstressregs0x80
- jitstress2_jitstressregs0x1000
- jitstress2_jitstressregs0x2000
${{ if in(parameters.testGroup, 'gcstress0x3-gcstress0xc') }}:
scenarios:
- gcstress0x3
Expand Down Expand Up @@ -520,6 +528,7 @@ jobs:
- jitstressregs0x10
- jitstressregs0x80
- jitstressregs0x1000
- jitstressregs0x2000
- jitminopts
- forcerelocs
- gcstress0xc
Expand Down
2 changes: 2 additions & 0 deletions eng/pipelines/libraries/run-test-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ jobs:
- jitstressregs0x10
- jitstressregs0x80
- jitstressregs0x1000
- jitstressregs0x2000
${{ if in(parameters.coreclrTestGroup, 'jitstress2-jitstressregs') }}:
scenarios:
- jitstress2_jitstressregs1
Expand All @@ -171,6 +172,7 @@ jobs:
- jitstress2_jitstressregs0x10
- jitstress2_jitstressregs0x80
- jitstress2_jitstressregs0x1000
- jitstress2_jitstressregs0x2000
${{ if in(parameters.coreclrTestGroup, 'gcstress0x3-gcstress0xc') }}:
scenarios:
# Disable gcstress0x3 for now; it causes lots of test timeouts. Investigate this after
Expand Down
125 changes: 76 additions & 49 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2275,64 +2275,51 @@ void Compiler::compSetProcessor()
{
instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
}

if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX))
{
instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
}
// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL and
// AVX512F/AVX512BW/AVX512CD/AVX512DQ/VX512VL have been shipped together historically.
// It is therefore unlikely that future CPUs only support "just one" and
// not worth the additional complexity in the JIT to support.
if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F) &&
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) &&
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ))
{
// Using JitStressEVEXEncoding flag will force instructions which would
// otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
// This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus
// causing failure if not running on compatible hardware.

// We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't
// been set yet as that's what we're trying to set here
// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
// These have been shipped together historically and at the time of this writing
// there exists no hardware which doesn't support the entire feature set. To simplify
// the overall JIT implementation, we currently require the entire set of ISAs to be
// supported and disable AVX512 support otherwise.
Comment on lines +2284 to +2288
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like the comment says, we want to support AVX512 only if all of AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL are supported.

These ISAs form the x86-64-v4 baseline and there has never shipped a piece of hardware without all of them.

Some examples of things we'd have to consider are that legacy-encoded xorps is SSE and VEX-encoded vxorps is AVX. However, EVEX-encoded xorps is AVX512DQ. Likewise the EVEX support for XMM/YMM based xorps is then AVX512DQ + AVX512VL (AVX512DQ_VL).

Supporting this "properly" would require us to add some fairly complex checks to import and likely LSRA to handle the difference and ensure that some suitable fallback is generated. We could write all the logic to support them, but without hardware existing that will will be "needless" overhead and negatively impact throughput. So its much easier to just write the JIT to disable AVX512 entirely if any of the "core" ISAs are unsupported.


bool enableAvx512 = false;
if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL) &&
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL) &&
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL))
{
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW));
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD));
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ));
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F));
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL));

#if defined(DEBUG)
if (JitConfig.JitForceEVEXEncoding())
{
enableAvx512 = true;
}
else if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
{
enableAvx512 = true;
}
#endif // DEBUG
instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
}
else
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL);

if (!enableAvx512)
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL);
#ifdef TARGET_AMD64
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64);
#endif // TARGET_AMD64
}
else
{
instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
}
}
#elif defined(TARGET_ARM64)
if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd))
Expand Down Expand Up @@ -3399,7 +3386,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;

if (DoJitStressEvexEncoding())
if (canUseEvexEncoding())
{
rbmAllFloat |= RBM_HIGHFLOAT;
rbmFltCalleeTrash |= RBM_HIGHFLOAT;
Expand Down Expand Up @@ -6028,6 +6015,46 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI);
}

if (JitConfig.EnableAVX512F() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F);
}
Comment on lines +6019 to +6022
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This and the below are what allows AVX512 to light up in the AltJit.


if (JitConfig.EnableAVX512F_VL() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F_VL);
}

if (JitConfig.EnableAVX512BW() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW);
}

if (JitConfig.EnableAVX512BW_VL() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW_VL);
}

if (JitConfig.EnableAVX512CD() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD);
}

if (JitConfig.EnableAVX512CD_VL() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD_VL);
}

if (JitConfig.EnableAVX512DQ() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ);
}

if (JitConfig.EnableAVX512DQ_VL() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ_VL);
}
#endif

// These calls are important and explicitly ordered to ensure that the flags are correct in
Expand Down
35 changes: 12 additions & 23 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -9170,7 +9170,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
//
bool IsBaselineVector512IsaSupportedDebugOnly() const
{
#ifdef TARGET_AMD64
#ifdef TARGET_XARCH
return (compIsaSupportedDebugOnly(InstructionSet_Vector512));
#else
return false;
Expand All @@ -9186,7 +9186,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
//
bool IsBaselineVector512IsaSupported() const
{
#ifdef TARGET_AMD64
#ifdef TARGET_XARCH
return (compExactlyDependsOn(InstructionSet_Vector512));
#else
return false;
Expand All @@ -9196,13 +9196,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifdef TARGET_XARCH
bool canUseVexEncoding() const
{
#ifdef DEBUG
if (JitConfig.JitForceEVEXEncoding())
{
return true;
}
#endif // DEBUG

return compOpportunisticallyDependsOn(InstructionSet_AVX);
}

Expand All @@ -9214,13 +9207,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
//
bool canUseEvexEncoding() const
{
#ifdef DEBUG
if (JitConfig.JitForceEVEXEncoding())
{
return true;
}
#endif // DEBUG

return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
}

Expand All @@ -9235,16 +9221,19 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifdef DEBUG
// Using JitStressEVEXEncoding flag will force instructions which would
// otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
// This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus
// causing failure if not running on compatible hardware.
// This requires AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL support

if (JitConfig.JitForceEVEXEncoding())
if (JitConfig.JitStressEvexEncoding() && IsBaselineVector512IsaSupported())
{
return true;
}
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW_VL));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD_VL));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL));

if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
{
return true;
}
#endif // DEBUG
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6591,7 +6591,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
coldCodeBlock = nullptr;

// This restricts the data alignment to: 4, 8, 16, 32 or 64 bytes
// Alignments greater than 32 would require VM support in ICorJitInfo::allocMem
// Alignments greater than 64 would require VM support in ICorJitInfo::allocMem
uint32_t dataAlignment = emitConsDsc.alignment;
assert((dataSection::MIN_DATA_ALIGN <= dataAlignment) && (dataAlignment <= dataSection::MAX_DATA_ALIGN) &&
isPow2(dataAlignment));
Expand Down Expand Up @@ -6710,11 +6710,16 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
{
assert(((size_t)codeBlock & 15) == 0);
}
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)

if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN) != 0)
{
assert(((size_t)consBlock & 63) == 0);
}
else if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)
{
assert(((size_t)consBlock & 31) == 0);
}
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0)
else if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0)
{
assert(((size_t)consBlock & 15) == 0);
}
Expand Down
Loading