Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add X86Serialize hardware intrinsic. #68677

Merged
merged 1 commit into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3_4, W("EnableSSE3_4"), 1
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE41, W("EnableSSE41"), 1, "Allows SSE4.1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE42, W("EnableSSE42"), 1, "Allows SSE4.2+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSSE3, W("EnableSSSE3"), 1, "Allows SSSE3+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableX86Serialize, W("EnableX86Serialize"), 1, "Allows X86Serialize+ hardware intrinsics to be disabled")
#elif defined(TARGET_ARM64)
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64AdvSimd, W("EnableArm64AdvSimd"), 1, "Allows Arm64 AdvSimd+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Aes, W("EnableArm64Aes"), 1, "Allows Arm64 Aes+ hardware intrinsics to be disabled")
Expand Down
94 changes: 58 additions & 36 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,24 +59,26 @@ enum CORINFO_InstructionSet
InstructionSet_Vector256=18,
InstructionSet_AVXVNNI=19,
InstructionSet_MOVBE=20,
InstructionSet_X86Base_X64=21,
InstructionSet_SSE_X64=22,
InstructionSet_SSE2_X64=23,
InstructionSet_SSE3_X64=24,
InstructionSet_SSSE3_X64=25,
InstructionSet_SSE41_X64=26,
InstructionSet_SSE42_X64=27,
InstructionSet_AVX_X64=28,
InstructionSet_AVX2_X64=29,
InstructionSet_AES_X64=30,
InstructionSet_BMI1_X64=31,
InstructionSet_BMI2_X64=32,
InstructionSet_FMA_X64=33,
InstructionSet_LZCNT_X64=34,
InstructionSet_PCLMULQDQ_X64=35,
InstructionSet_POPCNT_X64=36,
InstructionSet_AVXVNNI_X64=37,
InstructionSet_MOVBE_X64=38,
InstructionSet_X86Serialize=21,
InstructionSet_X86Base_X64=22,
InstructionSet_SSE_X64=23,
InstructionSet_SSE2_X64=24,
InstructionSet_SSE3_X64=25,
InstructionSet_SSSE3_X64=26,
InstructionSet_SSE41_X64=27,
InstructionSet_SSE42_X64=28,
InstructionSet_AVX_X64=29,
InstructionSet_AVX2_X64=30,
InstructionSet_AES_X64=31,
InstructionSet_BMI1_X64=32,
InstructionSet_BMI2_X64=33,
InstructionSet_FMA_X64=34,
InstructionSet_LZCNT_X64=35,
InstructionSet_PCLMULQDQ_X64=36,
InstructionSet_POPCNT_X64=37,
InstructionSet_AVXVNNI_X64=38,
InstructionSet_MOVBE_X64=39,
InstructionSet_X86Serialize_X64=40,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand All @@ -99,24 +101,26 @@ enum CORINFO_InstructionSet
InstructionSet_Vector256=18,
InstructionSet_AVXVNNI=19,
InstructionSet_MOVBE=20,
InstructionSet_X86Base_X64=21,
InstructionSet_SSE_X64=22,
InstructionSet_SSE2_X64=23,
InstructionSet_SSE3_X64=24,
InstructionSet_SSSE3_X64=25,
InstructionSet_SSE41_X64=26,
InstructionSet_SSE42_X64=27,
InstructionSet_AVX_X64=28,
InstructionSet_AVX2_X64=29,
InstructionSet_AES_X64=30,
InstructionSet_BMI1_X64=31,
InstructionSet_BMI2_X64=32,
InstructionSet_FMA_X64=33,
InstructionSet_LZCNT_X64=34,
InstructionSet_PCLMULQDQ_X64=35,
InstructionSet_POPCNT_X64=36,
InstructionSet_AVXVNNI_X64=37,
InstructionSet_MOVBE_X64=38,
InstructionSet_X86Serialize=21,
InstructionSet_X86Base_X64=22,
InstructionSet_SSE_X64=23,
InstructionSet_SSE2_X64=24,
InstructionSet_SSE3_X64=25,
InstructionSet_SSSE3_X64=26,
InstructionSet_SSE41_X64=27,
InstructionSet_SSE42_X64=28,
InstructionSet_AVX_X64=29,
InstructionSet_AVX2_X64=30,
InstructionSet_AES_X64=31,
InstructionSet_BMI1_X64=32,
InstructionSet_BMI2_X64=33,
InstructionSet_FMA_X64=34,
InstructionSet_LZCNT_X64=35,
InstructionSet_PCLMULQDQ_X64=36,
InstructionSet_POPCNT_X64=37,
InstructionSet_AVXVNNI_X64=38,
InstructionSet_MOVBE_X64=39,
InstructionSet_X86Serialize_X64=40,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -218,6 +222,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_AVXVNNI_X64);
if (HasInstructionSet(InstructionSet_MOVBE))
AddInstructionSet(InstructionSet_MOVBE_X64);
if (HasInstructionSet(InstructionSet_X86Serialize))
AddInstructionSet(InstructionSet_X86Serialize_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -367,6 +373,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_MOVBE);
if (resultflags.HasInstructionSet(InstructionSet_MOVBE_X64) && !resultflags.HasInstructionSet(InstructionSet_MOVBE))
resultflags.RemoveInstructionSet(InstructionSet_MOVBE_X64);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -405,6 +415,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
resultflags.RemoveInstructionSet(InstructionSet_MOVBE);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
#endif // TARGET_AMD64
#ifdef TARGET_X86
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
Expand Down Expand Up @@ -445,6 +457,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
resultflags.RemoveInstructionSet(InstructionSet_MOVBE);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
#endif // TARGET_X86

} while (!oldflags.Equals(resultflags));
Expand Down Expand Up @@ -581,6 +595,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "MOVBE";
case InstructionSet_MOVBE_X64 :
return "MOVBE_X64";
case InstructionSet_X86Serialize :
return "X86Serialize";
case InstructionSet_X86Serialize_X64 :
return "X86Serialize_X64";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -623,6 +641,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVXVNNI";
case InstructionSet_MOVBE :
return "MOVBE";
case InstructionSet_X86Serialize :
return "X86Serialize";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -673,6 +693,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE;
case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand All @@ -693,6 +714,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE;
case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize;
#endif // TARGET_X86

default:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* dfda4767-9618-4d6a-8105-a86034dc52eb */
0xdfda4767,
0x9618,
0x4d6a,
{0x81, 0x05, 0xa8, 0x60, 0x34, 0xdc, 0x52, 0xeb}
constexpr GUID JITEEVersionIdentifier = { /* f2a217c4-2a69-4308-99ce-8292c6763776 */
0xf2a217c4,
0x2a69,
0x4308,
{0x99, 0xce, 0x82, 0x92, 0xc6, 0x76, 0x37, 0x76}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_AvxVnni=25,
READYTORUN_INSTRUCTION_Rcpc=26,
READYTORUN_INSTRUCTION_Movbe=27,
READYTORUN_INSTRUCTION_X86Serialize=28,

};

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node);
void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node);
void genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins);
void genX86SerializeIntrinsic(GenTreeHWIntrinsic* node);
template <typename HWIntrinsicSwitchCaseBody>
void genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic,
regNumber nonConstImmReg,
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3737,6 +3737,8 @@ class Compiler
GenTree* impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);

GenTree* impSerializeIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS
GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,7 @@ class emitter
#define PERFSCORE_THROUGHPUT_19C 19.0f // slower - 19 cycles
#define PERFSCORE_THROUGHPUT_25C 25.0f // slower - 25 cycles
#define PERFSCORE_THROUGHPUT_33C 33.0f // slower - 33 cycles
#define PERFSCORE_THROUGHPUT_50C 50.0f // slower - 50 cycles
#define PERFSCORE_THROUGHPUT_52C 52.0f // slower - 52 cycles
#define PERFSCORE_THROUGHPUT_57C 57.0f // slower - 57 cycles
#define PERFSCORE_THROUGHPUT_140C 140.0f // slower - 140 cycles
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2952,7 +2952,7 @@ void emitter::emitIns(instruction ins)
ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
// These instructions take zero operands
|| ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence ||
ins == INS_pause);
ins == INS_pause || ins == INS_serialize);

assert(assertCond);
}
Expand Down Expand Up @@ -16317,6 +16317,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
}
break;

case INS_serialize:
{
result.insThroughput = PERFSCORE_THROUGHPUT_50C;
break;
}

default:
// unhandled instruction insFmt combination
perfScoreUnhandledInstruction(id, &result);
Expand Down
34 changes: 34 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_POPCNT_X64:
genPOPCNTIntrinsic(node);
break;
case InstructionSet_X86Serialize:
case InstructionSet_X86Serialize_X64:
genX86SerializeIntrinsic(node);
break;

default:
unreached();
break;
Expand Down Expand Up @@ -1957,4 +1962,33 @@ void CodeGen::genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins)
genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, op1);
}

//------------------------------------------------------------------------
// genX86SerializeIntrinsic: Generates the code for an X86 serialize hardware intrinsic node
//
// Arguments:
// node - The hardware intrinsic node
//
void CodeGen::genX86SerializeIntrinsic(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();

genConsumeMultiOpOperands(node);

switch (intrinsicId)
{
case NI_X86Serialize_Serialize:
{
assert(node->GetSimdBaseType() == TYP_UNKNOWN);
GetEmitter()->emitIns(INS_serialize);
break;
}

default:
unreached();
break;
}

genProduceReg(node);
}

#endif // FEATURE_HW_INTRINSICS
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,14 @@ HARDWARE_INTRINSIC(POPCNT, PopCount,
// POPCNT Intrinsics
HARDWARE_INTRINSIC(POPCNT_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// X86Serialize Intrinsics
HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)


// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
Expand Down
36 changes: 36 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_PCLMULQDQ_X64;
case InstructionSet_POPCNT:
return InstructionSet_POPCNT_X64;
case InstructionSet_X86Serialize:
return InstructionSet_X86Serialize_X64;
default:
return InstructionSet_NONE;
}
Expand Down Expand Up @@ -159,6 +161,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
{
return InstructionSet_X86Base;
}
else if (strcmp(className, "X86Serialize") == 0)
{
return InstructionSet_X86Serialize;
}

return InstructionSet_ILLEGAL;
}
Expand Down Expand Up @@ -384,6 +390,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_Vector256:
case InstructionSet_X86Base:
case InstructionSet_X86Base_X64:
case InstructionSet_X86Serialize:
case InstructionSet_X86Serialize_X64:
{
return true;
}
Expand Down Expand Up @@ -506,6 +514,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case InstructionSet_BMI2:
case InstructionSet_BMI2_X64:
return impBMI1OrBMI2Intrinsic(intrinsic, method, sig);

case InstructionSet_X86Serialize:
case InstructionSet_X86Serialize_X64:
return impSerializeIntrinsic(intrinsic, method, sig);

default:
return nullptr;
}
Expand Down Expand Up @@ -2282,6 +2295,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
}

case NI_X86Base_Pause:
case NI_X86Serialize_Serialize:
{
assert(sig->numArgs == 0);
assert(JITtype2varType(sig->retType) == TYP_VOID);
Expand Down Expand Up @@ -2566,4 +2580,26 @@ GenTree* Compiler::impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METH
}
}

GenTree* Compiler::impSerializeIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
GenTree* retNode = nullptr;

switch (intrinsic)
{
case NI_X86Serialize_Serialize:
{
assert(sig->numArgs == 0);
assert(JITtype2varType(sig->retType) == TYP_VOID);

retNode = gtNewScalarHWIntrinsicNode(TYP_VOID, intrinsic);
break;
}

default:
return nullptr;
}

return retNode;
}

#endif // FEATURE_HW_INTRINSICS
2 changes: 2 additions & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,8 @@ INST1(pause, "pause", IUM_RD, 0x0090F3,
INST1(lock, "lock", IUM_RD, 0x0000F0, INS_FLAGS_None )
INST1(leave, "leave", IUM_RD, 0x0000C9, INS_FLAGS_None )

INST1(serialize, "serialize", IUM_RD, 0x0fe801, INS_FLAGS_None )

INST1(neg, "neg", IUM_RW, 0x0018F6, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit )
INST1(not, "not", IUM_RW, 0x0010F6, INS_FLAGS_None | INS_FLAGS_Has_Wbit )

Expand Down
Loading