Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cfu fp16 #14538

Merged
merged 19 commits into from
Feb 15, 2023
Prev Previous commit
comments
chenfucn committed Feb 13, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 7925c91bab6200c50c6bfd6eaa795fcf676e35a1
2 changes: 1 addition & 1 deletion onnxruntime/core/mlas/inc/mlas.h
Original file line number Diff line number Diff line change
@@ -615,7 +615,7 @@ MlasGemm(
// Currently only supported in ARM64
//
#if defined(MLAS_TARGET_ARM64)
constexpr size_t MLAS_SYMM_QGEMM_BUF_OVERRUN = 15;
constexpr size_t MLAS_SYMM_QGEMM_BUF_OVERRUN = 30;
#else
constexpr size_t MLAS_SYMM_QGEMM_BUF_OVERRUN = 0;
#endif
2 changes: 1 addition & 1 deletion onnxruntime/core/mlas/lib/aarch64/HalfGemmKernelNeon.S
Original file line number Diff line number Diff line change
@@ -283,7 +283,7 @@ x7 |v5.h[0]..v5.h[3]| |v30.h[0]..v30.h[7] v31.h[0]..v31.h[7]| x4
.LM6N16OutterLoopNTail:
subs x1,x1,16 // N -= 16
ldr x8,[sp,#.LHGemmKernelFrame_B]
b.LO .LM6StoreRemainderN // remaining k < 16
b.LO .LM6StoreRemainderN // remaining N < 16

cbnz x19,.LM6N16SkipAccumulateOutput
ldp q0,q1,[x3]
2 changes: 1 addition & 1 deletion onnxruntime/core/mlas/lib/arm64/HalfGemmKernelNeon.asm
Original file line number Diff line number Diff line change
@@ -283,7 +283,7 @@ M6N16LoopK_Epilogue
M6N16OutterLoopNTail
subs x1,x1,16 // N -= 16
ldr x8,[sp,#HGemmKernelFrame_B]
b.LO M6StoreRemainderN // remaining k < 16
b.LO M6StoreRemainderN // remaining N < 16

cbnz x19,M6N16SkipAccumulateOutput
ldp q0,q1,[x3]
8 changes: 7 additions & 1 deletion onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
@@ -53,7 +53,13 @@ MLASCPUIDInfo::MLASCPUIDInfo()
#endif

#if defined(BUILD_MLAS_NO_ONNXRUNTIME)
MLASCPUIDInfo::MLASCPUIDInfo() { has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0); }
MLASCPUIDInfo::MLASCPUIDInfo()
{
has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);

// raw hack! Need CPUIDInfo implementation for more precise detection
has_fp16_ = has_arm_neon_dot_;
}
#endif

#else