fix SQ baichaun without

Signed-off-by: changwangss <[email protected]>
intel · Jun 6, 2024 · b1adc05 · b1adc05
1 parent b1d3d3c
commit b1adc05
Show file tree

Hide file tree

Showing 5 changed files with 4 additions and 32 deletions.
diff --git a/examples/huggingface/pytorch/text-generation/quantization/requirements_sq.txt b/examples/huggingface/pytorch/text-generation/quantization/requirements_sq.txt
@@ -5,7 +5,7 @@ protobuf
 sentencepiece != 0.1.92
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.3.0+cpu
-transformers
+transformers==4.38.1
 intel_extension_for_pytorch==2.3.0
 optimum-intel==1.16.1
 bitsandbytes  #baichuan

diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh
@@ -119,14 +119,12 @@ function run_benchmark {
     elif [ "${topology}" = "llama_7b" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-chat-hf"
         script="run_generation_sq.py"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "llama2_7b_gptq" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
         script="run_generation_cpu_woq.py"
     elif [ "${topology}" = "llama_13b" ]; then
         model_name_or_path="meta-llama/Llama-2-13b-chat-hf"
         script="run_generation_sq.py"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "dolly_v2_3b" ]; then
         model_name_or_path="/tf_dataset2/models/pytorch/dolly_v2_3b"
         script="run_generation_sq.py"
@@ -137,47 +135,36 @@ function run_benchmark {
         model_name_or_path="THUDM/chatglm3-6b"
         script="run_generation_sq.py"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "chatglm2_6b" ]; then
         model_name_or_path="THUDM/chatglm2-6b"
         script="run_generation_sq.py"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "chatglm_6b" ]; then
         model_name_or_path="THUDM/chatglm-6b"
         script="run_generation_sq.py"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.33
     elif [ "${topology}" = "falcon_7b" ]; then
         model_name_or_path="tiiuae/falcon-7b-instruct"
         script="run_generation_sq.py"
-        pip install transformers==4.33
     elif [ "${topology}" = "baichuan_7b" ]; then
         model_name_or_path="baichuan-inc/Baichuan-7B"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.33
         script="run_generation_sq.py"
     elif [ "${topology}" = "baichuan_13b" ]; then
         model_name_or_path="baichuan-inc/Baichuan-13B-Base"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        extra_cmd=$extra_cmd" --_commit_hash 14d5b0e204542744900f6fb52422c6d633bdcb00"
-        pip install transformers==4.33
         script="run_generation_sq.py"
     elif [ "${topology}" = "baichuan2_7b" ]; then
         model_name_or_path="baichuan-inc/Baichuan2-7B-Base"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.33
         script="run_generation_sq.py"
     elif [ "${topology}" = "baichuan2_13b" ]; then
         model_name_or_path="baichuan-inc/Baichuan2-13B-Base"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.35.2
         script="run_generation_sq.py"
     elif [ "${topology}" = "qwen_7b" ]; then
         model_name_or_path="Qwen/Qwen-7B"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        extra_cmd=$extra_cmd" --_commit_hash f7bc352f27bb1c02ee371a4576942a7d96c8bb97"
-	    pip install transformers==4.35.2
         script="run_generation_sq.py"
     elif [ "${topology}" = "mistral_7b" ]; then
         model_name_or_path="Intel/neural-chat-7b-v3"

diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh
@@ -133,15 +133,13 @@ function run_tuning {
         model_name_or_path="/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat/Llama-2-7b-chat-hf"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
-        pip install transformers==4.35.2
         script="run_generation_sq.py"
     elif [ "${topology}" = "llama_13b" ]; then
         alpha=0.8
         model_name_or_path="meta-llama/Llama-2-13b-chat-hf"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         script="run_generation_sq.py"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "dolly_v2_3b" ]; then
         alpha=0.6
         model_name_or_path="/tf_dataset2/models/pytorch/dolly_v2_3b"
@@ -161,29 +159,25 @@ function run_tuning {
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
         script="run_generation_sq.py"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "chatglm2_6b" ]; then
         alpha=0.75
         model_name_or_path="THUDM/chatglm2-6b"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
         script="run_generation_sq.py"
-        pip install transformers==4.35.2
     elif [ "${topology}" = "chatglm_6b" ]; then
         alpha=0.75
         model_name_or_path="THUDM/chatglm-6b"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.33
         script="run_generation_sq.py"
     elif [ "${topology}" = "falcon_7b" ]; then
         alpha=0.7
         model_name_or_path="tiiuae/falcon-7b-instruct"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
-	    pip install transformers==4.33.3
         script="run_generation_sq.py"
     elif [ "${topology}" = "baichuan_7b" ]; then
         alpha=0.85
@@ -192,23 +186,19 @@ function run_tuning {
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
         script="run_generation_sq.py"
-        pip install transformers==4.33
     elif [ "${topology}" = "baichuan_13b" ]; then
         alpha=0.85
         model_name_or_path="baichuan-inc/Baichuan-13B-Base"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        extra_cmd=$extra_cmd" --_commit_hash 14d5b0e204542744900f6fb52422c6d633bdcb00"
-        pip install transformers==4.33
         script="run_generation_sq.py"
     elif [ "${topology}" = "baichuan2_7b" ]; then
         alpha=0.85
         model_name_or_path="baichuan-inc/Baichuan2-7B-Base"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        pip install transformers==4.33
         script="run_generation_sq.py"
     elif [ "${topology}" = "baichuan2_13b" ]; then
         alpha=0.55
@@ -224,9 +214,7 @@ function run_tuning {
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
-        extra_cmd=$extra_cmd" --_commit_hash f7bc352f27bb1c02ee371a4576942a7d96c8bb97"
-	      pip install transformers==4.35.2
-          script="run_generation_sq.py"
+        script="run_generation_sq.py"
     elif [ "${topology}" = "mistral_7b" ]; then
         alpha=0.8
         model_name_or_path="Intel/neural-chat-7b-v3"
@@ -240,15 +228,13 @@ function run_tuning {
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
-	    pip install transformers==4.36.1
         script="run_generation_sq.py"
     elif [ "${topology}" = "phi_1_5b" ]; then
         alpha=0.5
         model_name_or_path="susnato/phi-1_5_dev"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"
-	    pip install transformers==4.36.1
         script="run_generation_sq.py"
     elif [ "${topology}" = "llama2_7b_gptq" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"

diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -855,7 +855,7 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
                     logger.info(
                         "quantization_config.ipex_opt_llm set to True and ipex.optimize_transformers is used."
                     )
-                    logger.warning("The suggested transformers version is 4.35.2.")
+                    logger.warning("The suggested transformers version is 4.38.1.")
                 else:
                     quantization_config.ipex_opt_llm = False
             if quantization_config.ipex_opt_llm:

diff --git a/intel_extension_for_transformers/transformers/utils/utility.py b/intel_extension_for_transformers/transformers/utils/utility.py
@@ -313,8 +313,7 @@ def generate_dummy_past_key_values_for_opt_llm(config, input_bs, num_beams=1):
     "imagegpt",
     "llama",
     "mistral",
-    "chatglm",
-    "baichuan"
+    "chatglm"
 }
 
 def get_example_inputs(model_config, batch_size=1, tokenizer=None, num_beams=4):