From 62aa85df23ce3f5db353ce9a4bfb8cd88395c376 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Wed, 13 Mar 2024 15:23:08 +0800 Subject: [PATCH] Fix opt gptq double quant example config (#1667) Signed-off-by: Tang, Kaihui --- .../language-modeling/quantization/llm/run_quant.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh index ec2c35f6937..32d9fba51ef 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh @@ -45,16 +45,16 @@ function run_tuning { if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then model_name_or_path="facebook/opt-125m" approach="weight_only" - extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length" + extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length" elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then model_name_or_path="facebook/opt-125m" approach="weight_only" - extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length" + extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length" extra_cmd=$extra_cmd" --double_quant_type BNB_NF4" elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then model_name_or_path="facebook/opt-125m" approach="weight_only" - extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length" + extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder" extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K" elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then model_name_or_path="meta-llama/Llama-2-7b-hf"