Skip to content

Commit

Permalink
Fix seg fault on avx2: rbp was over-written by new code
Browse files Browse the repository at this point in the history
  • Loading branch information
r-devulap committed Sep 11, 2024
1 parent 9a0b45b commit 6743659
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions onnxruntime/core/mlas/lib/amd64/QgemmU8X8KernelAvx2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ LSkipAccumulateOutputMasked8xNBlock1:
push_reg r12
push_reg r13
push_reg r14
alloc_stack (GemmU8X8KernelFrame.SavedR13)
alloc_stack (GemmU8X8KernelFrame.SavedR14)
save_xmm128 xmm6,GemmU8X8KernelFrame.SavedXmm6
save_xmm128 xmm7,GemmU8X8KernelFrame.SavedXmm7
save_xmm128 xmm8,GemmU8X8KernelFrame.SavedXmm8
Expand All @@ -1101,8 +1101,8 @@ LSkipAccumulateOutputMasked8xNBlock1:
mov r13,GemmU8X8KernelFrame.ZeroPointB[rsp]
vpcmpeqw ymm12,ymm12,ymm12 ; generate 256-bit word vector [0xFFFF]
vpsrlw ymm12,ymm12,15 ; generate 256-bit word vector [0x0001]
lea rbp,[r9*8]
lea r14,[rbp*2]
lea r14,[r9*8]
lea r14,[r14*2]
cmp DWORD PTR GemmU8X8KernelFrame.PreviousP1Home[rsp],0
je CheckCountM4OrMore ; U8S8 AVX2 kernel requires extra registers

Expand Down Expand Up @@ -1147,7 +1147,7 @@ ExitKernel:
movaps xmm13,GemmU8X8KernelFrame.SavedXmm13[rsp]
movaps xmm14,GemmU8X8KernelFrame.SavedXmm14[rsp]
movaps xmm15,GemmU8X8KernelFrame.SavedXmm15[rsp]
add rsp,(GemmU8X8KernelFrame.SavedR13)
add rsp,(GemmU8X8KernelFrame.SavedR14)

BEGIN_EPILOGUE

Expand Down

0 comments on commit 6743659

Please sign in to comment.