Skip to content

Commit

Permalink
cpu: x64: brgemm: fix zp_comp_b when bd_block >= 14 on avx2+
Browse files Browse the repository at this point in the history
  • Loading branch information
xuxinzen authored and vpirogov committed Jun 30, 2023
1 parent 6d2e567 commit ec0b2ee
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/cpu/x64/brgemm/jit_brgemm_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,12 @@ struct jit_brgemm_kernel_t : public jit_generator {
}
}

Vmm vmm_tmp(int i) { return Vmm(i); }
Vmm vmm_tmp(int i) {
assert(i >= 0
&& i < max_effective_vregs - brg.bd_block * brg.ld_block2);
return Vmm(i);
}

Vmm vmm_tail_mask() { return vmm_tmp(1); }
Vmm vmm_one_bytes() const noexcept { return Vmm(3); }
Vmm vmm_zp_a_shift() const noexcept { return Vmm(2); }
Expand Down Expand Up @@ -1222,7 +1227,7 @@ void jit_brgemm_kernel_t<isa, Wmm>::apply_compensation(
reg_aux_zp_comp_b, zp_comp_b_off, true);
uni_vpaddd(vmm, vmm, zp_comp_b_addr);
} else {
const auto vmm_zp_comp_b = vmm_tmp(2);
const auto vmm_zp_comp_b = vmm_tmp(0);
uni_vpbroadcastd(vmm_zp_comp_b,
ptr[reg_aux_zp_comp_b + zp_comp_b_off]);
uni_vpaddd(vmm, vmm, vmm_zp_comp_b);
Expand Down

0 comments on commit ec0b2ee

Please sign in to comment.