Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#19921 [Dup] LLC Core count calculations updated #20171

Merged
merged 5 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions onnxruntime/core/platform/windows/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "core/common/span_utils.h"
#include "core/platform/env.h"
#include "core/platform/scoped_resource.h"
#if defined(_M_X64) && !defined(_M_ARM64EC) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC)

Check warning on line 35 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/platform/windows/env.cc:35: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
#include "core/platform/windows/hardware_core_enumerator.h"
#endif
#include <unsupported/Eigen/CXX11/ThreadPool>
Expand Down Expand Up @@ -252,7 +252,7 @@
}

// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)

Check warning on line 255 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/platform/windows/env.cc:255: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
static constexpr std::array<int, 3> kVendorID_Intel = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
#endif
int WindowsEnv::DefaultNumCores() {
Expand All @@ -261,7 +261,7 @@

int WindowsEnv::GetNumPhysicalCpuCores() const {
// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)

Check warning on line 264 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/platform/windows/env.cc:264: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
// The following code is a temporary fix for a perf problem on Intel's Meteor Lake CPUs. The Intel compute platform has
// a hybrid architecture that some CPU cores runs significant slower than the others. If we distribute our compute work
// evenly to all CPU cores, the slowest CPU core will drag the performance down. So, instead, we reduce the total number
Expand Down
14 changes: 8 additions & 6 deletions onnxruntime/core/platform/windows/hardware_core_enumerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t SocDieCores = 0;
uint32_t LLCCores = 0;

Check warning on line 19 in onnxruntime/core/platform/windows/hardware_core_enumerator.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Redundant blank line at the end of a code block should be deleted. [whitespace/blank_line] [3] Raw Output: onnxruntime/core/platform/windows/hardware_core_enumerator.cc:19: Redundant blank line at the end of a code block should be deleted. [whitespace/blank_line] [3]
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
Expand All @@ -42,7 +43,7 @@
return c;
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
Expand Down Expand Up @@ -73,17 +74,18 @@

read += currentProcessorInfo->Size;
}
//Cores with L2 and LLC cache levels = # Physical Cores - # logical cores without LLC

Check warning on line 77 in onnxruntime/core/platform/windows/hardware_core_enumerator.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: onnxruntime/core/platform/windows/hardware_core_enumerator.cc:77: Should have a space between // and comment [whitespace/comments] [4]
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);

cores.SocDieCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
// We want to use the number of physical cores, but exclude soc cores
return cores.PhysicalCores - cores.SocDieCores;
auto cores = GetCoreInfo();

return cores.LLCCores;
}

} // namespace onnxruntime
15 changes: 8 additions & 7 deletions winml/lib/Api/HardwareCoreEnumerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t Num2CacheCores = 0;
uint32_t LLCCores = 0;
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
Expand Down Expand Up @@ -42,7 +42,7 @@
return c;
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
Expand All @@ -64,6 +64,7 @@
cores.PhysicalCores++;
break;
case RelationCache:
//Cache level masks count Logicial processors

Check warning on line 67 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: winml/lib/Api/HardwareCoreEnumerator.cpp:67: Should have a space between // and comment [whitespace/comments] [4]
if (currentProcessorInfo->Cache.Level == 2) {
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
} else if (currentProcessorInfo->Cache.Level == 3) {
Expand All @@ -75,14 +76,15 @@
read += currentProcessorInfo->Size;
}

cores.Num2CacheCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);

return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
auto cores = GetCoreInfo();

#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__)
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
Expand All @@ -97,9 +99,8 @@
auto isHybrid = (regs_leaf7[3] & (1 << 15));

if (isIntel && isHybrid) {
// We want to use the number of physical cores, but exclude soc cores
// On Intel Hybrid processors, numSocCores == cores.Num2CacheCores
return cores.PhysicalCores - cores.Num2CacheCores;
// We want to use the number of physical cores, but exclude cores without an LLC
return cores.LLCCores;
}
#endif

Expand Down
Loading