Skip to content

Commit

Permalink
Add BSF and BSR fallbacks for BitOperations methods (#34550)
Browse files Browse the repository at this point in the history
* implement X86Base, add BSF and BSR intrinsics
* add X86Base to Utf8String S.R.I. shim
* add BSF and BSR fallbacks in BitOperations
* add X86Base to zapinfo
* disable 'w' size bit for BSF and BSR
  • Loading branch information
saucecontrol authored Apr 29, 2020
1 parent 1a99589 commit d10a782
Show file tree
Hide file tree
Showing 23 changed files with 439 additions and 129 deletions.
10 changes: 5 additions & 5 deletions src/coreclr/src/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ TODO: Talk about initializing strutures before use
#endif
#endif

SELECTANY const GUID JITEEVersionIdentifier = { /* bb6ea6c3-ce5a-4543-86b7-c9c88f9ec780 */
0xbb6ea6c3,
0xce5a,
0x4543,
{ 0x86, 0xb7, 0xc9, 0xc8, 0x8f, 0x9e, 0xc7, 0x80 }
SELECTANY const GUID JITEEVersionIdentifier = { /* 8b2226a2-ac30-4f5c-ae5c-926c792ecdb9 */
0x8b2226a2,
0xac30,
0x4f5c,
{ 0xae, 0x5c, 0x92, 0x6c, 0x79, 0x2e, 0xcd, 0xb9 }
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
122 changes: 72 additions & 50 deletions src/coreclr/src/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,58 +32,62 @@ enum CORINFO_InstructionSet
InstructionSet_Vector128=12,
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
InstructionSet_SSE=1,
InstructionSet_SSE2=2,
InstructionSet_SSE3=3,
InstructionSet_SSSE3=4,
InstructionSet_SSE41=5,
InstructionSet_SSE42=6,
InstructionSet_AVX=7,
InstructionSet_AVX2=8,
InstructionSet_AES=9,
InstructionSet_BMI1=10,
InstructionSet_BMI2=11,
InstructionSet_FMA=12,
InstructionSet_LZCNT=13,
InstructionSet_PCLMULQDQ=14,
InstructionSet_POPCNT=15,
InstructionSet_Vector128=16,
InstructionSet_Vector256=17,
InstructionSet_BMI1_X64=18,
InstructionSet_BMI2_X64=19,
InstructionSet_LZCNT_X64=20,
InstructionSet_POPCNT_X64=21,
InstructionSet_SSE_X64=22,
InstructionSet_SSE2_X64=23,
InstructionSet_SSE41_X64=24,
InstructionSet_SSE42_X64=25,
InstructionSet_X86Base=1,
InstructionSet_SSE=2,
InstructionSet_SSE2=3,
InstructionSet_SSE3=4,
InstructionSet_SSSE3=5,
InstructionSet_SSE41=6,
InstructionSet_SSE42=7,
InstructionSet_AVX=8,
InstructionSet_AVX2=9,
InstructionSet_AES=10,
InstructionSet_BMI1=11,
InstructionSet_BMI2=12,
InstructionSet_FMA=13,
InstructionSet_LZCNT=14,
InstructionSet_PCLMULQDQ=15,
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_X86Base_X64=19,
InstructionSet_BMI1_X64=20,
InstructionSet_BMI2_X64=21,
InstructionSet_LZCNT_X64=22,
InstructionSet_POPCNT_X64=23,
InstructionSet_SSE_X64=24,
InstructionSet_SSE2_X64=25,
InstructionSet_SSE41_X64=26,
InstructionSet_SSE42_X64=27,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_SSE=1,
InstructionSet_SSE2=2,
InstructionSet_SSE3=3,
InstructionSet_SSSE3=4,
InstructionSet_SSE41=5,
InstructionSet_SSE42=6,
InstructionSet_AVX=7,
InstructionSet_AVX2=8,
InstructionSet_AES=9,
InstructionSet_BMI1=10,
InstructionSet_BMI2=11,
InstructionSet_FMA=12,
InstructionSet_LZCNT=13,
InstructionSet_PCLMULQDQ=14,
InstructionSet_POPCNT=15,
InstructionSet_Vector128=16,
InstructionSet_Vector256=17,
InstructionSet_BMI1_X64=18,
InstructionSet_BMI2_X64=19,
InstructionSet_LZCNT_X64=20,
InstructionSet_POPCNT_X64=21,
InstructionSet_SSE_X64=22,
InstructionSet_SSE2_X64=23,
InstructionSet_SSE41_X64=24,
InstructionSet_SSE42_X64=25,
InstructionSet_X86Base=1,
InstructionSet_SSE=2,
InstructionSet_SSE2=3,
InstructionSet_SSE3=4,
InstructionSet_SSSE3=5,
InstructionSet_SSE41=6,
InstructionSet_SSE42=7,
InstructionSet_AVX=8,
InstructionSet_AVX2=9,
InstructionSet_AES=10,
InstructionSet_BMI1=11,
InstructionSet_BMI2=12,
InstructionSet_FMA=13,
InstructionSet_LZCNT=14,
InstructionSet_PCLMULQDQ=15,
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_X86Base_X64=19,
InstructionSet_BMI1_X64=20,
InstructionSet_BMI2_X64=21,
InstructionSet_LZCNT_X64=22,
InstructionSet_POPCNT_X64=23,
InstructionSet_SSE_X64=24,
InstructionSet_SSE2_X64=25,
InstructionSet_SSE41_X64=26,
InstructionSet_SSE42_X64=27,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -139,6 +143,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_Crc32_Arm64);
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
if (HasInstructionSet(InstructionSet_X86Base))
AddInstructionSet(InstructionSet_X86Base_X64);
if (HasInstructionSet(InstructionSet_SSE))
AddInstructionSet(InstructionSet_SSE_X64);
if (HasInstructionSet(InstructionSet_SSE2))
Expand Down Expand Up @@ -204,6 +210,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_Sha256);
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64))
resultflags.RemoveInstructionSet(InstructionSet_X86Base);
if (resultflags.HasInstructionSet(InstructionSet_X86Base_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Base_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_SSE_X64))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -236,6 +246,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT))
resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
resultflags.RemoveInstructionSet(InstructionSet_SSE2);
if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
Expand Down Expand Up @@ -264,6 +276,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
#endif // TARGET_AMD64
#ifdef TARGET_X86
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
resultflags.RemoveInstructionSet(InstructionSet_SSE2);
if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
Expand Down Expand Up @@ -332,6 +346,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case InstructionSet_X86Base :
return "X86Base";
case InstructionSet_X86Base_X64 :
return "X86Base_X64";
case InstructionSet_SSE :
return "SSE";
case InstructionSet_SSE_X64 :
Expand Down Expand Up @@ -384,6 +402,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector256";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
return "X86Base";
case InstructionSet_SSE :
return "SSE";
case InstructionSet_SSE2 :
Expand Down Expand Up @@ -447,6 +467,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics;
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE;
case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2;
case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3;
Expand All @@ -464,6 +485,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE;
case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2;
case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Sha1=19,
READYTORUN_INSTRUCTION_Sha256=20,
READYTORUN_INSTRUCTION_Atomics=21,
READYTORUN_INSTRUCTION_X86Base=22,

};

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genHWIntrinsic_R_R_R_RM(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3);
void genBaseIntrinsic(GenTreeHWIntrinsic* node);
void genX86BaseIntrinsic(GenTreeHWIntrinsic* node);
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
void genSSE41Intrinsic(GenTreeHWIntrinsic* node);
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/src/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2161,7 +2161,7 @@ const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
void Compiler::compSetProcessor()
{
//
// NOTE: This function needs to be kept in sync with EEJitManager::SetCpuInfo() in vm\codemap.cpp
// NOTE: This function needs to be kept in sync with EEJitManager::SetCpuInfo() in vm\codeman.cpp
//

const JitFlags& jitFlags = *opts.jitFlags;
Expand Down Expand Up @@ -2195,13 +2195,14 @@ void Compiler::compSetProcessor()

#endif // TARGET_X86

// The VM will set the ISA flags depending on actual hardware support.
// We then select which ISAs to leave enabled based on the JIT config.
// The exception to this is the dummy Vector64/128/256 ISAs, which must be added explicitly.
CORINFO_InstructionSetFlags instructionSetFlags = jitFlags.GetInstructionSetFlags();
opts.compSupportsISA = 0;
opts.compSupportsISAReported = 0;

#ifdef TARGET_XARCH
bool avxSupported = false;

if (JitConfig.EnableHWIntrinsic())
{
// Dummy ISAs for simplifying the JIT code
Expand Down Expand Up @@ -2315,8 +2316,6 @@ void Compiler::compSetProcessor()
if (JitConfig.EnableHWIntrinsic())
{
// Dummy ISAs for simplifying the JIT code
instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase);
instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase_Arm64);
instructionSetFlags.AddInstructionSet(InstructionSet_Vector64);
instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
}
Expand Down
13 changes: 9 additions & 4 deletions src/coreclr/src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9448,7 +9448,8 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

// Use the large version if this is not a byte. This trick will not
// work in case of SSE2 and AVX instructions.
if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins))
if ((size != EA_1BYTE) && (ins != INS_imul) && (ins != INS_bsf) && (ins != INS_bsr) && !IsSSEInstruction(ins) &&
!IsAVXInstruction(ins))
{
code++;
}
Expand Down Expand Up @@ -10214,8 +10215,9 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}

// Use the large version if this is not a byte
if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) &&
!IsAVXInstruction(ins))
// TODO-XArch-Cleanup Can the need for the 'w' size bit be encoded in the instruction flags?
if ((size != EA_1BYTE) && (ins != INS_imul) && (ins != INS_bsf) && (ins != INS_bsr) && (!insIsCMOV(ins)) &&
!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
{
code |= 0x1;
}
Expand Down Expand Up @@ -11248,7 +11250,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
#endif // TARGET_AMD64
}
#ifdef FEATURE_HW_INTRINSICS
else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt))
else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
(ins == INS_tzcnt))
{
code = insEncodeRMreg(ins, code);
if ((ins == INS_crc32) && (size > EA_1BYTE))
Expand Down Expand Up @@ -14826,6 +14829,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency += PERFSCORE_LATENCY_2C;
break;

case INS_bsf:
case INS_bsr:
case INS_pextrb:
case INS_pextrd:
case INS_pextrw:
Expand Down
38 changes: 38 additions & 0 deletions src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_Vector256:
genBaseIntrinsic(node);
break;
case InstructionSet_X86Base:
case InstructionSet_X86Base_X64:
genX86BaseIntrinsic(node);
break;
case InstructionSet_SSE:
case InstructionSet_SSE_X64:
genSSEIntrinsic(node);
Expand Down Expand Up @@ -1249,6 +1253,40 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
genProduceReg(node);
}

//------------------------------------------------------------------------
// genX86BaseIntrinsic: Generates the code for an X86 base hardware intrinsic node
//
// Arguments:
// node - The hardware intrinsic node
//
void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;

switch (intrinsicId)
{
case NI_X86Base_BitScanForward:
case NI_X86Base_BitScanReverse:
case NI_X86Base_X64_BitScanForward:
case NI_X86Base_X64_BitScanReverse:
{
GenTree* op1 = node->gtGetOp1();
regNumber targetReg = node->GetRegNum();
var_types targetType = node->TypeGet();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);

genConsumeOperands(node);
genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1);
genProduceReg(node);
break;
}

default:
unreached();
break;
}
}

//------------------------------------------------------------------------
// genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node
//
Expand Down
Loading

0 comments on commit d10a782

Please sign in to comment.