Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LLC Core count calculations updated #19921

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions onnxruntime/core/platform/windows/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "core/common/span_utils.h"
#include "core/platform/env.h"
#include "core/platform/scoped_resource.h"
#if defined(_M_X64) && !defined(_M_ARM64EC) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC)

Check warning on line 35 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/platform/windows/env.cc:35: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
#include "core/platform/windows/hardware_core_enumerator.h"
#endif
#include <unsupported/Eigen/CXX11/ThreadPool>
Expand Down Expand Up @@ -252,7 +252,7 @@
}

// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)

Check warning on line 255 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/platform/windows/env.cc:255: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
static constexpr std::array<int, 3> kVendorID_Intel = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
#endif
int WindowsEnv::DefaultNumCores() {
Expand All @@ -261,7 +261,7 @@

int WindowsEnv::GetNumPhysicalCpuCores() const {
// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)

Check warning on line 264 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/platform/windows/env.cc:264: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
// The following code is a temporary fix for a perf problem on Intel's Meteor Lake CPUs. The Intel compute platform has
// a hybrid architecture that some CPU cores runs significant slower than the others. If we distribute our compute work
// evenly to all CPU cores, the slowest CPU core will drag the performance down. So, instead, we reduce the total number
Expand Down
14 changes: 8 additions & 6 deletions onnxruntime/core/platform/windows/hardware_core_enumerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t SocDieCores = 0;
uint32_t LLCCores = 0;

Check warning on line 19 in onnxruntime/core/platform/windows/hardware_core_enumerator.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Redundant blank line at the end of a code block should be deleted. [whitespace/blank_line] [3] Raw Output: onnxruntime/core/platform/windows/hardware_core_enumerator.cc:19: Redundant blank line at the end of a code block should be deleted. [whitespace/blank_line] [3]
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
Expand All @@ -42,7 +43,7 @@
return c;
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
Expand Down Expand Up @@ -73,17 +74,18 @@

read += currentProcessorInfo->Size;
}
//Cores with L2 and LLC cache levels = # Physical Cores - # logical cores without LLC

Check warning on line 77 in onnxruntime/core/platform/windows/hardware_core_enumerator.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: onnxruntime/core/platform/windows/hardware_core_enumerator.cc:77: Should have a space between // and comment [whitespace/comments] [4]
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);

cores.SocDieCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
// We want to use the number of physical cores, but exclude soc cores
return cores.PhysicalCores - cores.SocDieCores;
auto cores = GetCoreInfo();

return cores.LLCCores;
}

} // namespace onnxruntime
3 changes: 1 addition & 2 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1533,8 +1533,7 @@ def generate_build_tree(
ldflags = ["/profile", "/DYNAMICBASE"]
# Address Sanitizer libs do not have a Qspectre version. So they two cannot be both enabled.
if not args.enable_address_sanitizer:
# Also enable a special perf patch that was made for Intel Meteor Lake mobile CPUs
cflags += ["/Qspectre", "/DONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This flag is default ON. It is enabled in every ONNX Runtime's official package. It was added because if in any case the code path doesn't work, at least we have a fallback. I can tell the customer to build the code without this flag.
Is there any benefit if we remove it?

cflags += ["/Qspectre"]
if config == "Release":
cflags += ["/O2", "/Ob2", "/DNDEBUG"]
elif config == "RelWithDebInfo":
Expand Down
15 changes: 8 additions & 7 deletions winml/lib/Api/HardwareCoreEnumerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

smk2007 marked this conversation as resolved.
Show resolved Hide resolved
struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t Num2CacheCores = 0;
uint32_t LLCCores = 0;
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
Expand Down Expand Up @@ -42,7 +42,7 @@
return c;
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
Expand All @@ -64,6 +64,7 @@
cores.PhysicalCores++;
break;
case RelationCache:
//Cache level masks count Logicial processors

Check warning on line 67 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: winml/lib/Api/HardwareCoreEnumerator.cpp:67: Should have a space between // and comment [whitespace/comments] [4]
if (currentProcessorInfo->Cache.Level == 2) {
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
} else if (currentProcessorInfo->Cache.Level == 3) {
Expand All @@ -75,14 +76,15 @@
read += currentProcessorInfo->Size;
}

cores.Num2CacheCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);

return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
auto cores = GetCoreInfo();

#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__)
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
Expand All @@ -97,9 +99,8 @@
auto isHybrid = (regs_leaf7[3] & (1 << 15));

if (isIntel && isHybrid) {
// We want to use the number of physical cores, but exclude soc cores
// On Intel Hybrid processors, numSocCores == cores.Num2CacheCores
return cores.PhysicalCores - cores.Num2CacheCores;
// We want to use the number of physical cores, but exclude cores without an LLC
return cores.LLCCores;
}
#endif

Expand Down
Loading