Skip to content

Commit

Permalink
Adding support for X86Base.CpuId (#40167)
Browse files Browse the repository at this point in the history
* Adding support for X86Base.CpuId

* Rename getcpuid and getextcpuid to __cpuid and __cpuidex, respectively

* Removing xchg from the Unix x64 __cpuid implementation

* Add a comment as to why the X86/X86Base/CpuId test limits the checked vendors

* Apply suggestions from code review

Co-authored-by: Jan Kotas <[email protected]>

* Adding back a missing parentheses

* Fixing a typo in the isGenuineIntel check

* Avoid a conflict around cpuInfo

* Avoid an implicit cast when comparing the cpuidInfo

* Separate the __cpuidex qcall into coreclr and mono specific variants

* Add the partial modifier to the X86Base.PlatformNotSupported.cs file

Co-authored-by: Jan Kotas <[email protected]>
  • Loading branch information
tannergooding and jkotas authored Aug 5, 2020
1 parent 5c29e14 commit 96f178d
Show file tree
Hide file tree
Showing 27 changed files with 460 additions and 262 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@
<Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\Marshal.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\MemoryMarshal.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\NativeLibrary.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\X86\X86Base.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\Loader\AssemblyLoadContext.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\Versioning\CompatibilitySwitch.cs" />
<Compile Include="$(BclSourcesRoot)\System\RuntimeArgumentHandle.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Runtime.Intrinsics.X86
{
public abstract partial class X86Base
{
[DllImport(RuntimeHelpers.QCall)]
private static extern unsafe void __cpuidex(int* cpuInfo, int functionId, int subFunctionId);
}
}
10 changes: 9 additions & 1 deletion src/coreclr/src/classlibnative/bcltype/system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,17 @@ BOOL QCALLTYPE SystemNative::WinRTSupported()

#endif // FEATURE_COMINTEROP

#if defined(TARGET_X86) || defined(TARGET_AMD64)

void QCALLTYPE SystemNative::X86BaseCpuId(int cpuInfo[4], int functionId, int subFunctionId)
{
QCALL_CONTRACT;

BEGIN_QCALL;

__cpuidex(cpuInfo, functionId, subFunctionId);

END_QCALL;
}


#endif // defined(TARGET_X86) || defined(TARGET_AMD64)
4 changes: 4 additions & 0 deletions src/coreclr/src/classlibnative/bcltype/system.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ class SystemNative
// Return a method info for the method were the exception was thrown
static FCDECL1(ReflectMethodObject*, GetMethodFromStackTrace, ArrayBase* pStackTraceUNSAFE);

#if defined(TARGET_X86) || defined(TARGET_AMD64)
static void QCALLTYPE X86BaseCpuId(int cpuInfo[4], int functionId, int subFunctionId);
#endif // defined(TARGET_X86) || defined(TARGET_AMD64)

private:
// Common processing code for FailFast
static void GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExceptionForWatsonBucketing, UINT_PTR retAddress, UINT exitCode, STRINGREF errorSource);
Expand Down
45 changes: 0 additions & 45 deletions src/coreclr/src/vm/amd64/AsmHelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -667,27 +667,6 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT
NESTED_END ProfileTailcallNaked, _TEXT


;; extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
NESTED_ENTRY getcpuid, _TEXT

push_nonvol_reg rbx
push_nonvol_reg rsi
END_PROLOGUE

mov eax, ecx ; first arg
mov rsi, rdx ; second arg (result)
xor ecx, ecx ; clear ecx - needed for "Structured Extended Feature Flags"
cpuid
mov [rsi+ 0], eax
mov [rsi+ 4], ebx
mov [rsi+ 8], ecx
mov [rsi+12], edx
pop rsi
pop rbx
ret
NESTED_END getcpuid, _TEXT


;; extern "C" DWORD __stdcall xmmYmmStateSupport();
LEAF_ENTRY xmmYmmStateSupport, _TEXT
mov ecx, 0 ; Specify xcr0
Expand All @@ -703,30 +682,6 @@ LEAF_ENTRY xmmYmmStateSupport, _TEXT
ret
LEAF_END xmmYmmStateSupport, _TEXT

;The following function uses Deterministic Cache Parameter leafs to determine the cache hierarchy information on Prescott & Above platforms.
; This function takes 3 arguments:
; Arg1 is an input to ECX. Used as index to specify which cache level to return information on by CPUID.
; Arg1 is already passed in ECX on call to getextcpuid, so no explicit assignment is required;
; Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2.
; Arg3 is a pointer to the return dwbuffer
NESTED_ENTRY getextcpuid, _TEXT
push_nonvol_reg rbx
push_nonvol_reg rsi
END_PROLOGUE

mov eax, edx ; second arg (input to EAX)
mov rsi, r8 ; third arg (pointer to return dwbuffer)
cpuid
mov [rsi+ 0], eax
mov [rsi+ 4], ebx
mov [rsi+ 8], ecx
mov [rsi+12], edx
pop rsi
pop rbx

ret
NESTED_END getextcpuid, _TEXT


; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target);
; <NOTE>
Expand Down
39 changes: 15 additions & 24 deletions src/coreclr/src/vm/amd64/unixstubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,26 @@ extern "C"
PORTABILITY_ASSERT("Implement for PAL");
}

DWORD getcpuid(DWORD arg, unsigned char result[16])
void __cpuid(int cpuInfo[4], int function_id)
{
DWORD eax;
__asm(" xor %%ecx, %%ecx\n" \
" cpuid\n" \
" mov %%eax, 0(%[result])\n" \
" mov %%ebx, 4(%[result])\n" \
" mov %%ecx, 8(%[result])\n" \
" mov %%edx, 12(%[result])\n" \
: "=a"(eax) /*output in eax*/\
: "a"(arg), [result]"r"(result) /*inputs - arg in eax, result in any register*/\
: "rbx", "ecx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
);
return eax;
// Based on the Clang implementation provided in cpuid.h:
// https://github.com/llvm/llvm-project/blob/master/clang/lib/Headers/cpuid.h

__asm(" cpuid\n" \
: "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \
: "0"(function_id)
);
}

DWORD getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16])
void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id)
{
DWORD eax;
// Based on the Clang implementation provided in cpuid.h:
// https://github.com/llvm/llvm-project/blob/master/clang/lib/Headers/cpuid.h

__asm(" cpuid\n" \
" mov %%eax, 0(%[result])\n" \
" mov %%ebx, 4(%[result])\n" \
" mov %%ecx, 8(%[result])\n" \
" mov %%edx, 12(%[result])\n" \
: "=a"(eax) /*output in eax*/\
: "c"(arg1), "a"(arg2), [result]"r"(result) /*inputs - arg1 in ecx, arg2 in eax, result in any register*/\
: "rbx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
);
return eax;
: "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \
: "0"(function_id), "2"(subFunction_id)
);
}

DWORD xmmYmmStateSupport()
Expand Down
19 changes: 10 additions & 9 deletions src/coreclr/src/vm/cgensys.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,22 @@ inline void GetSpecificCpuInfo(CORINFO_CPU * cpuInfo)
#endif // !TARGET_X86

#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && !defined(CROSSGEN_COMPILE)
extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
extern "C" DWORD __stdcall getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16]);
#ifdef TARGET_UNIX
// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
// We define matching signatures for use on Unix platforms.

extern "C" void __stdcall __cpuid(int cpuInfo[4], int function_id);
extern "C" void __stdcall __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
#endif // TARGET_UNIX
extern "C" DWORD __stdcall xmmYmmStateSupport();
#endif

inline bool TargetHasAVXSupport()
{
#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && !defined(CROSSGEN_COMPILE)
unsigned char buffer[16];
// All x86/AMD64 targets support cpuid.
(void) getcpuid(1, buffer);
// getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
// It returns the resulting eax, ebx, ecx and edx (in that order) in buffer[].
// The AVX feature is ECX bit 28.
return ((buffer[11] & 0x10) != 0);
int cpuInfo[4];
__cpuid(cpuInfo, 0x00000001); // All x86/AMD64 targets support cpuid.
return ((cpuInfo[3] & (1 << 28)) != 0); // The AVX feature is ECX bit 28.
#endif // (defined(TARGET_X86) || defined(TARGET_AMD64)) && !defined(CROSSGEN_COMPILE)
return false;
}
Expand Down
100 changes: 46 additions & 54 deletions src/coreclr/src/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1308,115 +1308,110 @@ void EEJitManager::SetCpuInfo()

// We will set the following flags:
// CORJIT_FLAG_USE_SSE2 is required
// SSE - EDX bit 25 (buffer[15] & 0x02)
// SSE2 - EDX bit 26 (buffer[15] & 0x04)
// SSE - EDX bit 25
// SSE2 - EDX bit 26
// CORJIT_FLAG_USE_AES
// CORJIT_FLAG_USE_SSE2
// AES - ECX bit 25
// CORJIT_FLAG_USE_PCLMULQDQ
// CORJIT_FLAG_USE_SSE2
// PCLMULQDQ - ECX bit 1
// CORJIT_FLAG_USE_SSE3 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE2
// SSE3 - ECX bit 0 (buffer[8] & 0x01)
// SSE3 - ECX bit 0
// CORJIT_FLAG_USE_SSSE3 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE3
// SSSE3 - ECX bit 9 (buffer[9] & 0x02)
// SSSE3 - ECX bit 9
// CORJIT_FLAG_USE_SSE41 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSSE3
// SSE4.1 - ECX bit 19 (buffer[10] & 0x08)
// SSE4.1 - ECX bit 19
// CORJIT_FLAG_USE_SSE42 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE41
// SSE4.2 - ECX bit 20 (buffer[10] & 0x10)
// SSE4.2 - ECX bit 20
// CORJIT_FLAG_USE_POPCNT if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE42
// POPCNT - ECX bit 23 (buffer[10] & 0x80)
// POPCNT - ECX bit 23
// CORJIT_FLAG_USE_AVX if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1:
// CORJIT_FLAG_USE_SSE42
// OSXSAVE - ECX bit 27 (buffer[11] & 0x08)
// OSXSAVE - ECX bit 27
// AVX - ECX bit 28
// XGETBV - XCR0[2:1] 11b
// AVX - ECX bit 28 (buffer[11] & 0x10)
// CORJIT_FLAG_USE_FMA if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1:
// CORJIT_FLAG_USE_AVX
// FMA - ECX bit 12 (buffer[9] & 0x10)
// FMA - ECX bit 12
// CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// CORJIT_FLAG_USE_AVX
// AVX2 - EBX bit 5 (buffer[4] & 0x20)
// AVX2 - EBX bit 5
// CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without
// CORJIT_FLAG_USE_AES
// CORJIT_FLAG_USE_SSE2
// AES - ECX bit 25 (buffer[11] & 0x01)
// CORJIT_FLAG_USE_PCLMULQDQ
// CORJIT_FLAG_USE_SSE2
// PCLMULQDQ - ECX bit 1 (buffer[8] & 0x01)
// CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// BMI1 - EBX bit 3 (buffer[4] & 0x08)
// BMI1 - EBX bit 3
// CORJIT_FLAG_USE_BMI2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// BMI2 - EBX bit 8 (buffer[5] & 0x01)
// BMI2 - EBX bit 8
// CORJIT_FLAG_USE_LZCNT if the following feature bits are set (input EAX of 80000001H)
// LZCNT - ECX bit 5 (buffer[8] & 0x20)
// LZCNT - ECX bit 5
// synchronously updating VM and JIT.

unsigned char buffer[16];
DWORD maxCpuId = getcpuid(0, buffer);
int cpuidInfo[4];

__cpuid(cpuidInfo, 0x00000000);
uint32_t maxCpuId = static_cast<uint32_t>(cpuidInfo[0]);

if (maxCpuId >= 1)
{
// getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
// It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11],
// and edx in buffer[12-15].

(void) getcpuid(1, buffer);
__cpuid(cpuidInfo, 0x00000001);

// If SSE/SSE2 is not enabled, there is no point in checking the rest.
// SSE is bit 25 of EDX (buffer[15] & 0x02)
// SSE2 is bit 26 of EDX (buffer[15] & 0x04)

if ((buffer[15] & 0x06) == 0x06) // SSE & SSE2
if (((cpuidInfo[3] & (1 << 25)) != 0) && ((cpuidInfo[3] & (1 << 26)) != 0)) // SSE & SSE2
{
CPUCompileFlags.Set(InstructionSet_SSE);
CPUCompileFlags.Set(InstructionSet_SSE2);
if ((buffer[11] & 0x02) != 0) // AESNI

if ((cpuidInfo[2] & (1 << 25)) != 0) // AESNI
{
CPUCompileFlags.Set(InstructionSet_AES);
}

if ((buffer[8] & 0x02) != 0) // PCLMULQDQ
if ((cpuidInfo[2] & (1 << 1)) != 0) // PCLMULQDQ
{
CPUCompileFlags.Set(InstructionSet_PCLMULQDQ);
}

if ((buffer[8] & 0x01) != 0) // SSE3
if ((cpuidInfo[2] & (1 << 0)) != 0) // SSE3
{
CPUCompileFlags.Set(InstructionSet_SSE3);

if ((buffer[9] & 0x02) != 0) // SSSE3
if ((cpuidInfo[2] & (1 << 9)) != 0) // SSSE3
{
CPUCompileFlags.Set(InstructionSet_SSSE3);

if ((buffer[10] & 0x08) != 0) // SSE4.1
if ((cpuidInfo[2] & (1 << 19)) != 0) // SSE4.1
{
CPUCompileFlags.Set(InstructionSet_SSE41);

if ((buffer[10] & 0x10) != 0) // SSE4.2
if ((cpuidInfo[2] & (1 << 20)) != 0) // SSE4.2
{
CPUCompileFlags.Set(InstructionSet_SSE42);

if ((buffer[10] & 0x80) != 0) // POPCNT
if ((cpuidInfo[2] & (1 << 23)) != 0) // POPCNT
{
CPUCompileFlags.Set(InstructionSet_POPCNT);
}

if ((buffer[11] & 0x18) == 0x18) // AVX & OSXSAVE
if (((cpuidInfo[2] & (1 << 27)) != 0) && ((cpuidInfo[2] & (1 << 28)) != 0)) // OSXSAVE & AVX
{
if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1))
if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
{
CPUCompileFlags.Set(InstructionSet_AVX);

if ((buffer[9] & 0x10) != 0) // FMA
if ((cpuidInfo[2] & (1 << 12)) != 0) // FMA
{
CPUCompileFlags.Set(InstructionSet_FMA);
}

if (maxCpuId >= 0x07)
{
(void) getextcpuid(0, 0x07, buffer);
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((buffer[4] & 0x20) != 0) // AVX2
if ((cpuidInfo[1] & (1 << 5)) != 0) // AVX2
{
CPUCompileFlags.Set(InstructionSet_AVX2);
}
Expand All @@ -1443,31 +1438,28 @@ void EEJitManager::SetCpuInfo()

if (maxCpuId >= 0x07)
{
(void)getextcpuid(0, 0x07, buffer);
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((buffer[4] & 0x08) != 0) // BMI1
if ((cpuidInfo[2] & (1 << 3)) != 0) // BMI1
{
CPUCompileFlags.Set(InstructionSet_BMI1);
}

if ((buffer[5] & 0x01) != 0) // BMI2
if ((cpuidInfo[2] & (1 << 8)) != 0) // BMI2
{
CPUCompileFlags.Set(InstructionSet_BMI2);
}
}
}

DWORD maxCpuIdEx = getcpuid(0x80000000, buffer);
__cpuid(cpuidInfo, 0x80000000);
uint32_t maxCpuIdEx = static_cast<uint32_t>(cpuidInfo[0]);

if (maxCpuIdEx >= 0x80000001)
{
// getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
// It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11],
// and edx in buffer[12-15].

(void) getcpuid(0x80000001, buffer);
__cpuid(cpuidInfo, 0x80000001);

if ((buffer[8] & 0x20) != 0) // LZCNT
if ((cpuidInfo[3] & (1 << 5)) != 0) // LZCNT
{
CPUCompileFlags.Set(InstructionSet_LZCNT);
}
Expand Down
Loading

0 comments on commit 96f178d

Please sign in to comment.