Skip to content

Commit

Permalink
fix loop error
Browse files Browse the repository at this point in the history
  • Loading branch information
fajin-corp committed Sep 24, 2024
1 parent 0f765ed commit 66323eb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
4 changes: 2 additions & 2 deletions onnxruntime/core/mlas/lib/sqnbitgemm_kernel_neon_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ MlasCastF16ToF32KernelNeon(const unsigned short* src, float* dest, size_t count)
}

// aligned src
for (; i + 4 < count; i += 4)
for (; i + 3 < count; i += 4)
{
float16x4_t fp16v4_0 = vreinterpret_f16_u16(vld1_u16(src + i));
float32x4_t fp32v4_0 = vcvt_f32_f16(fp16v4_0);
Expand Down Expand Up @@ -124,7 +124,7 @@ MlasCastF32ToF16KernelNeon(const float* src, unsigned short* dest, size_t count)
}

// aligned src
for (; i + 4 < count; i += 4)
for (; i + 3 < count; i += 4)
{
float32x4_t fp32v4_0 = vld1q_f32(src + i);
float16x4_t fp16v4_0 = vcvt_f16_f32(fp32v4_0);
Expand Down
8 changes: 7 additions & 1 deletion onnxruntime/test/mlas/unittest/test_sqnbitgemm_neon_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,13 @@ class MlasNeonFp16CastTest : public MlasTestBase {

void ExecuteShort(void) override {
TestFp16ToFp32(1 << 16);
TestFp32ToFp16((1 << 15) - 5);
TestFp16ToFp32(1);
TestFp16ToFp32(4);
TestFp16ToFp32(7);
TestFp32ToFp16(1 << 16);
TestFp32ToFp16(3);
TestFp32ToFp16(4);
TestFp32ToFp16(6);
}
};

Expand Down

0 comments on commit 66323eb

Please sign in to comment.