Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for X86Base.CpuId #40167

Merged
merged 11 commits into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@
<Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\Marshal.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\MemoryMarshal.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\NativeLibrary.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\X86\X86Base.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\Loader\AssemblyLoadContext.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Runtime\Versioning\CompatibilitySwitch.cs" />
<Compile Include="$(BclSourcesRoot)\System\RuntimeArgumentHandle.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Runtime.Intrinsics.X86
{
public abstract partial class X86Base
{
[DllImport(RuntimeHelpers.QCall)]
private static extern unsafe void __cpuidex(int* cpuInfo, int functionId, int subFunctionId);
}
}
10 changes: 9 additions & 1 deletion src/coreclr/src/classlibnative/bcltype/system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,17 @@ BOOL QCALLTYPE SystemNative::WinRTSupported()

#endif // FEATURE_COMINTEROP

#if defined(TARGET_X86) || defined(TARGET_AMD64)

void QCALLTYPE SystemNative::X86BaseCpuId(int cpuInfo[4], int functionId, int subFunctionId)
{
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
QCALL_CONTRACT;

BEGIN_QCALL;

__cpuidex(cpuInfo, functionId, subFunctionId);

END_QCALL;
}


#endif // defined(TARGET_X86) || defined(TARGET_AMD64)
4 changes: 4 additions & 0 deletions src/coreclr/src/classlibnative/bcltype/system.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ class SystemNative
// Return a method info for the method were the exception was thrown
static FCDECL1(ReflectMethodObject*, GetMethodFromStackTrace, ArrayBase* pStackTraceUNSAFE);

#if defined(TARGET_X86) || defined(TARGET_AMD64)
static void QCALLTYPE X86BaseCpuId(int cpuInfo[4], int functionId, int subFunctionId);
#endif // defined(TARGET_X86) || defined(TARGET_AMD64)

private:
// Common processing code for FailFast
static void GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExceptionForWatsonBucketing, UINT_PTR retAddress, UINT exitCode, STRINGREF errorSource);
Expand Down
45 changes: 0 additions & 45 deletions src/coreclr/src/vm/amd64/AsmHelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -667,27 +667,6 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT
NESTED_END ProfileTailcallNaked, _TEXT


;; extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
NESTED_ENTRY getcpuid, _TEXT

push_nonvol_reg rbx
push_nonvol_reg rsi
END_PROLOGUE

mov eax, ecx ; first arg
mov rsi, rdx ; second arg (result)
xor ecx, ecx ; clear ecx - needed for "Structured Extended Feature Flags"
cpuid
mov [rsi+ 0], eax
mov [rsi+ 4], ebx
mov [rsi+ 8], ecx
mov [rsi+12], edx
pop rsi
pop rbx
ret
NESTED_END getcpuid, _TEXT


;; extern "C" DWORD __stdcall xmmYmmStateSupport();
LEAF_ENTRY xmmYmmStateSupport, _TEXT
mov ecx, 0 ; Specify xcr0
Expand All @@ -703,30 +682,6 @@ LEAF_ENTRY xmmYmmStateSupport, _TEXT
ret
LEAF_END xmmYmmStateSupport, _TEXT

;The following function uses Deterministic Cache Parameter leafs to determine the cache hierarchy information on Prescott & Above platforms.
; This function takes 3 arguments:
; Arg1 is an input to ECX. Used as index to specify which cache level to return information on by CPUID.
; Arg1 is already passed in ECX on call to getextcpuid, so no explicit assignment is required;
; Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2.
; Arg3 is a pointer to the return dwbuffer
NESTED_ENTRY getextcpuid, _TEXT
push_nonvol_reg rbx
push_nonvol_reg rsi
END_PROLOGUE

mov eax, edx ; second arg (input to EAX)
mov rsi, r8 ; third arg (pointer to return dwbuffer)
cpuid
mov [rsi+ 0], eax
mov [rsi+ 4], ebx
mov [rsi+ 8], ecx
mov [rsi+12], edx
pop rsi
pop rbx

ret
NESTED_END getextcpuid, _TEXT


; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target);
; <NOTE>
Expand Down
39 changes: 15 additions & 24 deletions src/coreclr/src/vm/amd64/unixstubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,26 @@ extern "C"
PORTABILITY_ASSERT("Implement for PAL");
}

DWORD getcpuid(DWORD arg, unsigned char result[16])
void __cpuid(int cpuInfo[4], int function_id)
{
DWORD eax;
__asm(" xor %%ecx, %%ecx\n" \
" cpuid\n" \
" mov %%eax, 0(%[result])\n" \
" mov %%ebx, 4(%[result])\n" \
" mov %%ecx, 8(%[result])\n" \
" mov %%edx, 12(%[result])\n" \
: "=a"(eax) /*output in eax*/\
: "a"(arg), [result]"r"(result) /*inputs - arg in eax, result in any register*/\
: "rbx", "ecx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
);
return eax;
// Based on the Clang implementation provided in cpuid.h:
// https://github.com/llvm/llvm-project/blob/master/clang/lib/Headers/cpuid.h

__asm(" cpuid\n" \
: "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \
: "0"(function_id)
);
}

DWORD getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16])
void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id)
{
DWORD eax;
// Based on the Clang implementation provided in cpuid.h:
// https://github.com/llvm/llvm-project/blob/master/clang/lib/Headers/cpuid.h

__asm(" cpuid\n" \
" mov %%eax, 0(%[result])\n" \
" mov %%ebx, 4(%[result])\n" \
" mov %%ecx, 8(%[result])\n" \
" mov %%edx, 12(%[result])\n" \
: "=a"(eax) /*output in eax*/\
: "c"(arg1), "a"(arg2), [result]"r"(result) /*inputs - arg1 in ecx, arg2 in eax, result in any register*/\
: "rbx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
);
return eax;
: "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \
: "0"(function_id), "2"(subFunction_id)
);
}

DWORD xmmYmmStateSupport()
Expand Down
19 changes: 10 additions & 9 deletions src/coreclr/src/vm/cgensys.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,22 @@ inline void GetSpecificCpuInfo(CORINFO_CPU * cpuInfo)
#endif // !TARGET_X86

#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && !defined(CROSSGEN_COMPILE)
extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
extern "C" DWORD __stdcall getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16]);
#ifdef TARGET_UNIX
// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
// We define matching signatures for use on Unix platforms.

extern "C" void __stdcall __cpuid(int cpuInfo[4], int function_id);
extern "C" void __stdcall __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
#endif // TARGET_UNIX
extern "C" DWORD __stdcall xmmYmmStateSupport();
#endif

inline bool TargetHasAVXSupport()
{
#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && !defined(CROSSGEN_COMPILE)
unsigned char buffer[16];
// All x86/AMD64 targets support cpuid.
(void) getcpuid(1, buffer);
// getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
// It returns the resulting eax, ebx, ecx and edx (in that order) in buffer[].
// The AVX feature is ECX bit 28.
return ((buffer[11] & 0x10) != 0);
int cpuInfo[4];
__cpuid(cpuInfo, 0x00000001); // All x86/AMD64 targets support cpuid.
return ((cpuInfo[3] & (1 << 28)) != 0); // The AVX feature is ECX bit 28.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't ECX in cpuInfo[2]?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#endif // (defined(TARGET_X86) || defined(TARGET_AMD64)) && !defined(CROSSGEN_COMPILE)
return false;
}
Expand Down
100 changes: 46 additions & 54 deletions src/coreclr/src/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1308,115 +1308,110 @@ void EEJitManager::SetCpuInfo()

// We will set the following flags:
// CORJIT_FLAG_USE_SSE2 is required
// SSE - EDX bit 25 (buffer[15] & 0x02)
// SSE2 - EDX bit 26 (buffer[15] & 0x04)
// SSE - EDX bit 25
// SSE2 - EDX bit 26
// CORJIT_FLAG_USE_AES
// CORJIT_FLAG_USE_SSE2
// AES - ECX bit 25
// CORJIT_FLAG_USE_PCLMULQDQ
// CORJIT_FLAG_USE_SSE2
// PCLMULQDQ - ECX bit 1
// CORJIT_FLAG_USE_SSE3 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE2
// SSE3 - ECX bit 0 (buffer[8] & 0x01)
// SSE3 - ECX bit 0
// CORJIT_FLAG_USE_SSSE3 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE3
// SSSE3 - ECX bit 9 (buffer[9] & 0x02)
// SSSE3 - ECX bit 9
// CORJIT_FLAG_USE_SSE41 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSSE3
// SSE4.1 - ECX bit 19 (buffer[10] & 0x08)
// SSE4.1 - ECX bit 19
// CORJIT_FLAG_USE_SSE42 if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE41
// SSE4.2 - ECX bit 20 (buffer[10] & 0x10)
// SSE4.2 - ECX bit 20
// CORJIT_FLAG_USE_POPCNT if the following feature bits are set (input EAX of 1)
// CORJIT_FLAG_USE_SSE42
// POPCNT - ECX bit 23 (buffer[10] & 0x80)
// POPCNT - ECX bit 23
// CORJIT_FLAG_USE_AVX if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1:
// CORJIT_FLAG_USE_SSE42
// OSXSAVE - ECX bit 27 (buffer[11] & 0x08)
// OSXSAVE - ECX bit 27
// AVX - ECX bit 28
// XGETBV - XCR0[2:1] 11b
// AVX - ECX bit 28 (buffer[11] & 0x10)
// CORJIT_FLAG_USE_FMA if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1:
// CORJIT_FLAG_USE_AVX
// FMA - ECX bit 12 (buffer[9] & 0x10)
// FMA - ECX bit 12
// CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// CORJIT_FLAG_USE_AVX
// AVX2 - EBX bit 5 (buffer[4] & 0x20)
// AVX2 - EBX bit 5
// CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without
// CORJIT_FLAG_USE_AES
// CORJIT_FLAG_USE_SSE2
// AES - ECX bit 25 (buffer[11] & 0x01)
// CORJIT_FLAG_USE_PCLMULQDQ
// CORJIT_FLAG_USE_SSE2
// PCLMULQDQ - ECX bit 1 (buffer[8] & 0x01)
// CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// BMI1 - EBX bit 3 (buffer[4] & 0x08)
// BMI1 - EBX bit 3
// CORJIT_FLAG_USE_BMI2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// BMI2 - EBX bit 8 (buffer[5] & 0x01)
// BMI2 - EBX bit 8
// CORJIT_FLAG_USE_LZCNT if the following feature bits are set (input EAX of 80000001H)
// LZCNT - ECX bit 5 (buffer[8] & 0x20)
// LZCNT - ECX bit 5
// synchronously updating VM and JIT.

unsigned char buffer[16];
DWORD maxCpuId = getcpuid(0, buffer);
int cpuidInfo[4];

__cpuid(cpuidInfo, 0x00000000);
uint32_t maxCpuId = static_cast<uint32_t>(cpuidInfo[0]);

if (maxCpuId >= 1)
{
// getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
// It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11],
// and edx in buffer[12-15].

(void) getcpuid(1, buffer);
__cpuid(cpuidInfo, 0x00000001);

// If SSE/SSE2 is not enabled, there is no point in checking the rest.
// SSE is bit 25 of EDX (buffer[15] & 0x02)
// SSE2 is bit 26 of EDX (buffer[15] & 0x04)

if ((buffer[15] & 0x06) == 0x06) // SSE & SSE2
if (((cpuidInfo[3] & (1 << 25)) != 0) && ((cpuidInfo[3] & (1 << 26)) != 0)) // SSE & SSE2
{
CPUCompileFlags.Set(InstructionSet_SSE);
CPUCompileFlags.Set(InstructionSet_SSE2);
if ((buffer[11] & 0x02) != 0) // AESNI

if ((cpuidInfo[2] & (1 << 25)) != 0) // AESNI
{
CPUCompileFlags.Set(InstructionSet_AES);
}

if ((buffer[8] & 0x02) != 0) // PCLMULQDQ
if ((cpuidInfo[2] & (1 << 1)) != 0) // PCLMULQDQ
{
CPUCompileFlags.Set(InstructionSet_PCLMULQDQ);
}

if ((buffer[8] & 0x01) != 0) // SSE3
if ((cpuidInfo[2] & (1 << 0)) != 0) // SSE3
{
CPUCompileFlags.Set(InstructionSet_SSE3);

if ((buffer[9] & 0x02) != 0) // SSSE3
if ((cpuidInfo[2] & (1 << 9)) != 0) // SSSE3
{
CPUCompileFlags.Set(InstructionSet_SSSE3);

if ((buffer[10] & 0x08) != 0) // SSE4.1
if ((cpuidInfo[2] & (1 << 19)) != 0) // SSE4.1
{
CPUCompileFlags.Set(InstructionSet_SSE41);

if ((buffer[10] & 0x10) != 0) // SSE4.2
if ((cpuidInfo[2] & (1 << 20)) != 0) // SSE4.2
{
CPUCompileFlags.Set(InstructionSet_SSE42);

if ((buffer[10] & 0x80) != 0) // POPCNT
if ((cpuidInfo[2] & (1 << 23)) != 0) // POPCNT
{
CPUCompileFlags.Set(InstructionSet_POPCNT);
}

if ((buffer[11] & 0x18) == 0x18) // AVX & OSXSAVE
if (((cpuidInfo[2] & (1 << 27)) != 0) && ((cpuidInfo[2] & (1 << 28)) != 0)) // OSXSAVE & AVX
{
if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1))
if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
{
CPUCompileFlags.Set(InstructionSet_AVX);

if ((buffer[9] & 0x10) != 0) // FMA
if ((cpuidInfo[2] & (1 << 12)) != 0) // FMA
{
CPUCompileFlags.Set(InstructionSet_FMA);
}

if (maxCpuId >= 0x07)
{
(void) getextcpuid(0, 0x07, buffer);
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((buffer[4] & 0x20) != 0) // AVX2
if ((cpuidInfo[1] & (1 << 5)) != 0) // AVX2
{
CPUCompileFlags.Set(InstructionSet_AVX2);
}
Expand All @@ -1443,31 +1438,28 @@ void EEJitManager::SetCpuInfo()

if (maxCpuId >= 0x07)
{
(void)getextcpuid(0, 0x07, buffer);
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((buffer[4] & 0x08) != 0) // BMI1
if ((cpuidInfo[2] & (1 << 3)) != 0) // BMI1
{
CPUCompileFlags.Set(InstructionSet_BMI1);
}

if ((buffer[5] & 0x01) != 0) // BMI2
if ((cpuidInfo[2] & (1 << 8)) != 0) // BMI2
{
CPUCompileFlags.Set(InstructionSet_BMI2);
}
}
}

DWORD maxCpuIdEx = getcpuid(0x80000000, buffer);
__cpuid(cpuidInfo, 0x80000000);
uint32_t maxCpuIdEx = static_cast<uint32_t>(cpuidInfo[0]);

if (maxCpuIdEx >= 0x80000001)
{
// getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
// It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11],
// and edx in buffer[12-15].

(void) getcpuid(0x80000001, buffer);
__cpuid(cpuidInfo, 0x80000001);

if ((buffer[8] & 0x20) != 0) // LZCNT
if ((cpuidInfo[3] & (1 << 5)) != 0) // LZCNT
{
CPUCompileFlags.Set(InstructionSet_LZCNT);
}
Expand Down
Loading