Skip to content

Commit

Permalink
fp16 detect
Browse files Browse the repository at this point in the history
  • Loading branch information
chenfucn committed Feb 6, 2023
1 parent ed874c1 commit a451ce9
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 9 deletions.
38 changes: 38 additions & 0 deletions onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ void CPUIDInfo::ArmLinuxInit() {
if (pytorch_cpuinfo_init_) {
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
const uint32_t core_cnt = cpuinfo_get_cores_count();
core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
is_armv8_narrow_ld_.resize(core_cnt, false);
Expand All @@ -165,6 +166,7 @@ void CPUIDInfo::ArmLinuxInit() {
}
} else {
has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
has_fp16_ |= has_arm_neon_dot_;
}
}

Expand Down Expand Up @@ -220,9 +222,45 @@ void CPUIDInfo::ArmWindowsInit() {
lastUarch = uarch;
}
}

switch (lastUarch) {
case cpuinfo_uarch_cortex_a55:
case cpuinfo_uarch_cortex_a55r0:
case cpuinfo_uarch_cortex_a76:
case cpuinfo_uarch_neoverse_n1:
case cpuinfo_uarch_cortex_a77:
case cpuinfo_uarch_exynos_m4:
case cpuinfo_uarch_exynos_m5:
has_fp16_ = true;
break;
default:
break;
}
if (!has_fp16_) {
/*
* Detecting fp16 support. Different cores should have the same instruction set.
* So we just check the first ID_AA64PFR0_EL1
* Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0100), Op2(0b000),
*/
uint64_t ID_AA64PFR0_EL1;
unsigned long valsize = sizeof(uint64_t);
auto retCode = ::RegGetValueA(
HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"CP 4020", RRF_RT_REG_QWORD, nullptr,
&ID_AA64PFR0_EL1, &valsize);
if (retCode == ERROR_SUCCESS) {
// AdvSIMD, bits [23:20]
auto advSimd = ID_AA64PFR0_EL1 >> 20;
if ((advSimd & 0xfULL) == 1) {
has_fp16_ = true;
}
}
}
#endif /* Application Family or OneCore Family */

has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
has_fp16_ |= has_arm_neon_dot_;
}

#endif /* (arm or arm64) and windows */
Expand Down
6 changes: 5 additions & 1 deletion onnxruntime/core/common/cpuid_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class CPUIDInfo {
bool HasAVX512f() const { return has_avx512f_; }
bool HasAVX512_BF16() const {return has_avx512_bf16_;}
bool HasAVX512Skylake() const { return has_avx512_skylake_; }
bool HasF16C() const { return has_f16c_; }
bool HasF16C() const { return has_f16c_; } /*fp16 conversion inst*/
bool HasSSE3() const { return has_sse3_; }
bool HasSSE4_1() const { return has_sse4_1_; }
bool IsHybrid() const { return is_hybrid_; }
Expand Down Expand Up @@ -85,6 +85,9 @@ class CPUIDInfo {
return is_armv8_narrow_ld_[coreIdx];
}

bool HasFp16VectorAcceleration() const {
return has_fp16_;
}

private:
CPUIDInfo() {
Expand Down Expand Up @@ -118,6 +121,7 @@ class CPUIDInfo {
std::vector<bool> is_armv8_narrow_ld_;

bool has_arm_neon_dot_{false};
bool has_fp16_{false};

#ifdef CPUIDINFO_ARCH_X86

Expand Down
9 changes: 1 addition & 8 deletions onnxruntime/core/mlas/lib/halfgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,10 @@ Module Name:
bool MLASCALL
MlasFp16AccelerationSupported()
{
#ifdef MLAS_NEON64_INTRINSICS
// TODO!! Only support for ARMv8.2
// TODO!! how to detect ARMv8.0 ???
return true;
#else
return false;
#endif
return MLAS_CPUIDINFO::GetCPUIDInfo().HasFp16VectorAcceleration();
}



void
MLASCALL
MlasHalfGemmBatch(
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/mlas/lib/mlasi.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class MLASCPUIDInfo
// ARM
bool HasArmNeonDot() const { return has_arm_neon_dot_; }

bool HasFp16VectorAcceleration() const { return has_fp16_; }

uint32_t GetCurrentCoreIdx() const { return 0xFFFFFFFF; }

int32_t GetCurrentUarch() const { return -1; }
Expand All @@ -137,6 +139,7 @@ class MLASCPUIDInfo
MLASCPUIDInfo();

bool has_arm_neon_dot_{false};
bool has_fp16_{false};
};
using MLAS_CPUIDINFO = MLASCPUIDInfo;

Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ Module Name:
MLASCPUIDInfo::MLASCPUIDInfo()
{
has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);

// raw hack! Need CPUIDInfo implementation for more precise detection
has_fp16_ = has_arm_neon_dot_;
}
#endif

Expand Down

0 comments on commit a451ce9

Please sign in to comment.