Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix bug: getting current cpu core type #10630

Merged
merged 2 commits into from
Feb 25, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 1 addition & 23 deletions onnxruntime/core/mlas/lib/mlasi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2032,27 +2032,5 @@ enum MlasCoreType { mlas_core_unknown = 0, mlas_core_little = 2, mlas_core_big =
* @return 2 current core is little core with narrow memory load (e.g. ARMv8 a53)
* 3 current core is big core with wider load (e.g. ARMv8 a72)
*/
MLAS_FORCEINLINE
int32_t
MlasGetCoreUArch()
{
thread_local int32_t core_type = mlas_core_unknown;
if (core_type == mlas_core_unknown) {
// initialization needed
#if defined(MLAS_TARGET_ARM64) && defined(__linux__)
auto uarch = MLAS_CPUIDINFO::GetCPUIDInfo().GetCurrentUarch();
if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
uarch == cpuinfo_uarch_cortex_a55) {
core_type = mlas_core_little;
} else {
core_type = mlas_core_big;
}
#else
core_type = mlas_core_big;
#endif // MLAS_TARGET_ARM64

}
return core_type;
}

extern MlasCoreType MlasGetCoreType();
chenfucn marked this conversation as resolved.
Show resolved Hide resolved

64 changes: 64 additions & 0 deletions onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ Module Name:

#include "mlasi.h"

#include <thread>
#include <mutex>

#if defined(MLAS_TARGET_POWER) && defined(__linux__)
#include <sys/auxv.h>
#endif
Expand All @@ -28,6 +31,12 @@ Module Name:
#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
#endif
#elif defined(__linux__)
#include <unistd.h>
#include <sys/syscall.h>
#if !defined(__NR_getcpu)
#include <asm-generic/unistd.h>
#endif

#include <sys/auxv.h>
#include <asm/hwcap.h>
// N.B. Support building with older versions of asm/hwcap.h that do not define
Expand Down Expand Up @@ -425,3 +434,58 @@ Return Value:
return MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT;
#endif
}

#if defined(MLAS_TARGET_ARM64) && defined(__linux__)
static MlasCoreType* mlas_coretype_tbl = nullptr;
static uint32_t mlas_coretype_tbl_size = 0;
static std::once_flag mlas_init_coretype_tbl;
#endif

MlasCoreType
MlasGetCoreType()
{

#if defined(MLAS_TARGET_ARM64) && defined(__linux__)
std::call_once(mlas_init_coretype_tbl, []() {
mlas_coretype_tbl_size = std::thread::hardware_concurrency();
if (mlas_coretype_tbl_size == 0) {
return;
}
mlas_coretype_tbl = (MlasCoreType*)malloc(sizeof(MlasCoreType) * mlas_coretype_tbl_size);
chenfucn marked this conversation as resolved.
Show resolved Hide resolved
for (uint32_t i = 0; i < mlas_coretype_tbl_size; i++) {
mlas_coretype_tbl[i] = mlas_core_unknown;
}
});

if (mlas_coretype_tbl_size == 0) {
// functionality missing, return default
return mlas_core_big;
}

unsigned cpu = 0;
if (syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
// failed to detect current core id. give up
return mlas_core_big;
}

if (cpu >= mlas_coretype_tbl_size) {
return mlas_core_big;
}

auto core_type = mlas_coretype_tbl[cpu];
if (core_type == mlas_core_unknown) {
auto uarch = MLAS_CPUIDINFO::GetCPUIDInfo().GetCurrentUarch();
chenfucn marked this conversation as resolved.
Show resolved Hide resolved
if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
uarch == cpuinfo_uarch_cortex_a55) {
core_type = mlas_core_little;
} else {
core_type = mlas_core_big;
}
mlas_coretype_tbl[cpu] = core_type;
}
chenfucn marked this conversation as resolved.
Show resolved Hide resolved
return core_type;

#else
return mlas_core_big;
#endif
}
4 changes: 2 additions & 2 deletions onnxruntime/core/mlas/lib/qgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ MlasSymmQgemmBatch(
if (ThreadPool == nullptr) {
// So our caller handles threaded job partition.
// Call single threaded operation directly
auto uarch = MlasGetCoreUArch();
auto uarch = MlasGetCoreType();
MLAS_SYMM_QGEMM_OPERATION* operation =
uarch == mlas_core_little ? dispatch->LitOperation : dispatch->BigOperation;

Expand Down Expand Up @@ -260,7 +260,7 @@ MlasSymmQgemmBatch(
ThreadsPerGemm = ThreadCountM * ThreadCountN;

MlasTrySimpleParallel(ThreadPool, ThreadsPerGemm * BatchN, [&](ptrdiff_t tid) {
auto uarch = MlasGetCoreUArch();
auto uarch = MlasGetCoreType();
MLAS_SYMM_QGEMM_OPERATION* operation =
uarch == mlas_core_little ? dispatch->LitOperation : dispatch->BigOperation;

Expand Down