From b99abae5d937380cf9df80c9050fce18bddfb72d Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 6 Aug 2024 16:02:03 +0800 Subject: [PATCH] Fix `opt_125m_woq_gptq_int4_dq_ggml` issue (#1965) Signed-off-by: Kaihui-intel --- .azure-pipelines/model-test-3x.yml | 2 +- .../language-modeling/quantization/weight_only/run_quant.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/model-test-3x.yml b/.azure-pipelines/model-test-3x.yml index facd89a0798..55320d9247c 100644 --- a/.azure-pipelines/model-test-3x.yml +++ b/.azure-pipelines/model-test-3x.yml @@ -10,7 +10,7 @@ pr: include: - neural_compressor/common - neural_compressor/torch - - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm + - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only - setup.py - requirements_pt.txt - .azure-pipelines/scripts/models diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh index 079a1d28406..a860712b697 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh @@ -50,7 +50,7 @@ function run_tuning { extra_cmd=$extra_cmd" --double_quant_type BNB_NF4" elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then model_name_or_path="facebook/opt-125m" - extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder" + extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.8 --gptq_actorder" extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K" elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then model_name_or_path="meta-llama/Llama-2-7b-hf"