From b99abae5d937380cf9df80c9050fce18bddfb72d Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 6 Aug 2024 16:02:03 +0800
Subject: [PATCH] Fix `opt_125m_woq_gptq_int4_dq_ggml` issue (#1965)

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .azure-pipelines/model-test-3x.yml                              | 2 +-
 .../language-modeling/quantization/weight_only/run_quant.sh     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.azure-pipelines/model-test-3x.yml b/.azure-pipelines/model-test-3x.yml
index facd89a0798..55320d9247c 100644
--- a/.azure-pipelines/model-test-3x.yml
+++ b/.azure-pipelines/model-test-3x.yml
@@ -10,7 +10,7 @@ pr:
     include:
       - neural_compressor/common
       - neural_compressor/torch
-      - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm
+      - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only
       - setup.py
       - requirements_pt.txt
       - .azure-pipelines/scripts/models
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
index 079a1d28406..a860712b697 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
@@ -50,7 +50,7 @@ function run_tuning {
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
         model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.8 --gptq_actorder"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
     elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"