Skip to content

Commit

Permalink
metal : fix mat-vec Q4_K kernel for QK_K == 64
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Jan 2, 2024
1 parent a8b9bb4 commit dd59578
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions ggml-metal.metal
Original file line number Diff line number Diff line change
Expand Up @@ -3018,8 +3018,8 @@ void kernel_mul_mv_q4_K_f32_impl(
constant uint & r2,
constant uint & r3,
uint3 tgpig[[threadgroup_position_in_grid]],
uint tiisg[[thread_index_in_simdgroup]],
uint sgitg[[simdgroup_index_in_threadgroup]]) {
uint tiisg[[thread_index_in_simdgroup]],
uint sgitg[[simdgroup_index_in_threadgroup]]) {

const int ix = tiisg/4; // 0...7
const int it = tiisg%4; // 0...3
Expand All @@ -3028,7 +3028,7 @@ void kernel_mul_mv_q4_K_f32_impl(
const int r0 = tgpig.x;
const int r1 = tgpig.y;
const int im = tgpig.z;
const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
const int first_row = r0 * N_DST;
const int ib_row = first_row * nb;

const uint i12 = im%ne12;
Expand Down Expand Up @@ -3094,7 +3094,7 @@ void kernel_mul_mv_q4_K_f32_impl(
for (int row = 0; row < N_DST; ++row) {
all_sum = simd_sum(sumf[row]);
if (tiisg == 0) {
dst[r1*ne0+ im*ne0*ne1 + first_row + row] = all_sum;
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
}
}
}
Expand Down

0 comments on commit dd59578

Please sign in to comment.