From c9c0b09c5e64114eada1b6beb7f6db36331e0fac Mon Sep 17 00:00:00 2001 From: Kealan Barbieri Date: Tue, 14 Nov 2023 17:25:32 -0800 Subject: [PATCH] gpu: jit: handle tails in zero_out --- src/gpu/jit/codegen/codegen.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gpu/jit/codegen/codegen.cpp b/src/gpu/jit/codegen/codegen.cpp index be25c47fe21..dcc7f000513 100644 --- a/src/gpu/jit/codegen/codegen.cpp +++ b/src/gpu/jit/codegen/codegen.cpp @@ -511,9 +511,10 @@ class ir_to_ngen_t : public ir_visitor_t { int grf_size = ngen::GRF::bytes(hw); int step = 2 * grf_size; for (int i = 0; i < size; i += step) { - int exec_size = std::min(step, size - i) / type.size(); + step = std::min(step, size - i); + step = utils::rnd_down_pow2(step); + int exec_size = step / type.size(); auto sub_rd_mov = rd.format(i, to_ngen(type), exec_size).reg_data(); - ir_assert(math::is_pow2(exec_size)); host_->emov(exec_size, sub_rd_mov, ngen::Immediate(0.0f)); } }