From a4539e1636d94f30bf1c968e33c9c4fc858a423d Mon Sep 17 00:00:00 2001 From: Slaren <2141330+slaren@users.noreply.github.com> Date: Sat, 8 Apr 2023 13:41:57 +0200 Subject: [PATCH] Use the work buffer instead to fix MSVC build --- ggml.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ggml.c b/ggml.c index 3b32aff9b689b7..dd089b2a78439a 100644 --- a/ggml.c +++ b/ggml.c @@ -5350,6 +5350,8 @@ static void ggml_compute_forward_add_q_f32( const int ir0 = dr*ith; const int ir1 = MIN(ir0 + dr, nr); + float * wdata = (float*) params->wdata + ne00 * ith; + for (int ir = ir0; ir < ir1; ++ir) { // src0 indices const int i03 = ir/(ne02*ne01); @@ -5372,12 +5374,11 @@ static void ggml_compute_forward_add_q_f32( assert(ne00 % 32 == 0); // unquantize row from src0 to temp buffer - float tmp[ne00]; - dequantize_row_q(src0_row, tmp, ne00); + dequantize_row_q(src0_row, wdata, ne00); // add src1 - ggml_vec_acc_f32(ne00, tmp, src1_row); + ggml_vec_acc_f32(ne00, wdata, src1_row); // quantize row to dst - quantize_row_q(tmp, dst_row, ne00); + quantize_row_q(wdata, dst_row, ne00); } } @@ -9566,6 +9567,14 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) case GGML_OP_ADD: { node->n_tasks = n_threads; + + size_t cur = 0; + + if (node->src0->type == GGML_TYPE_Q4_0 || node->src0->type == GGML_TYPE_Q4_1) { + cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src0->ne[0] * n_threads; + } + + work_size = MAX(work_size, cur); } break; case GGML_OP_SUB: case GGML_OP_MUL: