From ea4b71321ac23c070882b87798a5e3e5d126fe27 Mon Sep 17 00:00:00 2001 From: intellinjun <105184542+intellinjun@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:36:41 +0800 Subject: [PATCH] enable qwen-1.8b (#72) Signed-off-by: intellinjun --- neural_speed/models/qwen/qwen.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/neural_speed/models/qwen/qwen.h b/neural_speed/models/qwen/qwen.h index 46a745630..ec2c65357 100644 --- a/neural_speed/models/qwen/qwen.h +++ b/neural_speed/models/qwen/qwen.h @@ -30,6 +30,8 @@ static const model_scratch qwen_mem_req(int n_layers) { return {2048ull * MB, 2048ull * MB, 4096ull * MB}; case 32: return {1024ull * MB, 1024ull * MB, 1608ull * MB}; + case 24: + return {512ull * MB, 512ull * MB, 1026ull * MB}; default: MODEL_ASSERT(false); }