From aa3a60cc4d817340a33459a8fbefce31e63b3807 Mon Sep 17 00:00:00 2001
From: wenhuach21 <wenhua.cheng@intel.com>
Date: Wed, 13 Nov 2024 08:49:03 +0800
Subject: [PATCH 1/5] fix eval device issue

---
 README.md                          | 64 +++++++++++++++---------------
 auto_round/script/llm.py           | 18 +++++----
 auto_round/script/mllm.py          |  4 +-
 examples/language-modeling/main.py |  5 +--
 4 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index f5031df8..0e823178 100644
--- a/README.md
+++ b/README.md
@@ -291,39 +291,39 @@ Please note that an asterisk (*) indicates third-party quantized models, which m
 different recipe. We greatly appreciate their efforts and encourage more users to share their models, as we cannot
 release most of the models ourselves.
 
- Model                                  | Supported                                                                                                                                                                                                                                                                                                                 |
-|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| meta-llama/Meta-Llama-3.1-70B-Instruct | [recipe](https://huggingface.co/Intel/Meta-Llama-3.1-70B-Instruct-int4-inc)                                                                                                                                                                                                                                               |
+ Model                                  | Supported                                                                                                                                                                                                                                                                                                     |
+|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| meta-llama/Meta-Llama-3.1-70B-Instruct | [recipe](https://huggingface.co/Intel/Meta-Llama-3.1-70B-Instruct-int4-inc)                                                                                                                                                                                                                                   |
 | meta-llama/Meta-Llama-3.1-8B-Instruct  | [model-kaitchup-autogptq-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-Instruct-autoround-gptq-4bit-asym), [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-Instruct-autoround-gptq-4bit-sym), [recipe](https://huggingface.co/Intel/Meta-Llama-3.1-8B-Instruct-int4-inc) |
-| meta-llama/Meta-Llama-3.1-8B           | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-autoround-gptq-4bit-sym)                                                                                                                                                                                                            |
-| Qwen/Qwen-VL                           | [accuracy](./examples/multimodal-modeling/Qwen-VL/README.md), [recipe](./examples/multimodal-modeling/Qwen-VL/run_autoround.sh)                                                                                                                                                                                           
-| Qwen/Qwen2-7B                          | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc), [model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc)                                                                                                                                                                     |
-| THUDM/glm-4-9b-chat                    | [recipe](./docs/glm-4-9b-chat-recipe.md)                                                                                                                                                                                                                                                                                  |
-| Qwen/Qwen2-57B-A14B-Instruct           | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc),[model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc)                                                                                                                                        |
-| 01-ai/Yi-1.5-9B                        | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-4bit-gptq-autoround)                                                                                                                                                                                                                                |
-| 01-ai/Yi-1.5-9B-Chat                   | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-Chat-4bit-gptq-autoround)                                                                                                                                                                                                                           |
-| Intel/neural-chat-7b-v3-3              | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-3-int4-inc)                                                                                                                                                                                                                                          |
-| Intel/neural-chat-7b-v3-1              | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-1-int4-inc)                                                                                                                                                                                                                                          |
-| TinyLlama-1.1B-intermediate            | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/TinyLlama-1.1B-intermediate-step-1341k-3T-autoround-lm_head-symFalse)                                                                                                                                                                                         |
-| mistralai/Mistral-7B-v0.1              | [model-autogptq-lmhead-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc-lmhead), [model-autogptq-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc)                                                                                                                                                  |
-| google/gemma-2b                        | [model-autogptq-int4](https://huggingface.co/Intel/gemma-2b-int4-inc)                                                                                                                                                                                                                                                     |
-| tiiuae/falcon-7b                       | [model-autogptq-int4-G64](https://huggingface.co/Intel/falcon-7b-int4-inc)                                                                                                                                                                                                                                                |
-| sapienzanlp/modello-italia-9b          | [model-fbaldassarri-autogptq-int4*](https://huggingface.co/fbaldassarri/modello-italia-9b-autoround-w4g128-cpu)                                                                                                                                                                                                           |
-| microsoft/phi-2                        | [model-autoround-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc) [model-autogptq-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc)                                                                                                                                                                            |
-| microsoft/Phi-3.5-mini-instruct        | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Phi-3.5-Mini-instruct-AutoRound-4bit)                                                                                                                                                                                                                 |
-| microsoft/Phi-3-vision-128k-instruct   | [recipe](./examples/multimodal-modeling/Phi-3-vision/run_autoround.sh)                                                                                                                                                                                                                                                    
-| mistralai/Mistral-7B-Instruct-v0.2     | [accuracy](./docs/Mistral-7B-Instruct-v0.2-acc.md), [recipe](./examples/language-modeling/scripts/Mistral-7B-Instruct-v0.2.sh),  [example](./examples/language-modeling/)                                                                                                                                                 |
-| mistralai/Mixtral-8x7B-Instruct-v0.1   | [accuracy](./docs/Mixtral-8x7B-Instruct-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-Instruct-v0.1.sh),  [example](./examples/language-modeling/)                                                                                                                                             |
-| mistralai/Mixtral-8x7B-v0.1            | [accuracy](./docs/Mixtral-8x7B-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-v0.1.sh), [example](./examples/language-modeling/)                                                                                                                                                                |
-| meta-llama/Meta-Llama-3-8B-Instruct    | [accuracy](./docs/Meta-Llama-3-8B-Instruct-acc.md), [recipe](./examples/language-modeling/scripts/Meta-Llama-3-8B-Instruct.sh), [example](./examples/language-modeling/)                                                                                                                                                  |
-| google/gemma-7b                        | [accuracy](./docs/gemma-7b-acc.md), [recipe](./examples/language-modeling/scripts/gemma-7b.sh),  [example](./examples/language-modeling/)                                                                                                                                                                                 |
-| meta-llama/Llama-2-7b-chat-hf          | [accuracy](./docs/Llama-2-7b-chat-hf-acc.md), [recipe](./examples/language-modeling/scripts/Llama-2-7b-chat-hf.sh), [example](./examples/language-modeling/)                                                                                                                                                              |
-| Qwen/Qwen1.5-7B-Chat                   | [accuracy](./docs/Qwen1.5-7B-Chat-acc.md), [sym recipe](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-sym.sh), [asym recipe ](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-asym.sh), [example](./examples/language-modeling/)                                                                              |
-| baichuan-inc/Baichuan2-7B-Chat         | [accuracy](./docs/baichuan2-7b-chat-acc.md), [recipe](./examples/language-modeling/scripts/baichuan2-7b-chat.sh), [example](./examples/language-modeling/)                                                                                                                                                                |         
-| 01-ai/Yi-6B-Chat                       | [accuracy](./docs/Yi-6B-Chat-acc.md), [recipe](./examples/language-modeling/scripts/Yi-6B-Chat.sh), [example](./examples/language-modeling/)                                                                                                                                                                              |                                     
-| facebook/opt-2.7b                      | [accuracy](./docs/opt-2.7b-acc.md), [recipe](./examples/language-modeling/scripts/opt-2.7b.sh), [example](./examples/language-modeling/)                                                                                                                                                                                  |
-| bigscience/bloom-3b                    | [accuracy](./docs/bloom-3B-acc.md), [recipe](./examples/language-modeling/scripts/bloom-3b.sh), [example](./examples/language-modeling/)                                                                                                                                                                                  |
-| EleutherAI/gpt-j-6b                    | [accuracy](./docs/gpt-j-6B-acc.md), [recipe](./examples/language-modeling/scripts/gpt-j-6b.sh), [example](./examples/language-modeling/)                                                                                                                                                                                  | 
+| meta-llama/Meta-Llama-3.1-8B           | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-autoround-gptq-4bit-sym)                                                                                                                                                                                                |
+| Qwen/Qwen-VL                           | [accuracy](./examples/multimodal-modeling/Qwen-VL/README.md), [recipe](./examples/multimodal-modeling/Qwen-VL/run_autoround.sh)                                                                                                                                                                               
+| Qwen/Qwen2-7B                          | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc), [model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc)                                                                                                                                                         |
+| THUDM/glm-4-9b-chat                    | [recipe](./docs/glm-4-9b-chat-recipe.md)                                                                                                                                                                                                                                                                      |
+| Qwen/Qwen2-57B-A14B-Instruct           | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc),[model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc)                                                                                                                            |
+| 01-ai/Yi-1.5-9B                        | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-4bit-gptq-autoround)                                                                                                                                                                                                                    |
+| 01-ai/Yi-1.5-9B-Chat                   | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-Chat-4bit-gptq-autoround)                                                                                                                                                                                                               |
+| Intel/neural-chat-7b-v3-3              | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-3-int4-inc)                                                                                                                                                                                                                              |
+| Intel/neural-chat-7b-v3-1              | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-1-int4-inc)                                                                                                                                                                                                                              |
+| TinyLlama-1.1B-intermediate            | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/TinyLlama-1.1B-intermediate-step-1341k-3T-autoround-lm_head-symFalse)                                                                                                                                                                             |
+| mistralai/Mistral-7B-v0.1              | [model-autogptq-lmhead-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc-lmhead), [model-autogptq-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc)                                                                                                                                      |
+| google/gemma-2b                        | [model-autogptq-int4](https://huggingface.co/Intel/gemma-2b-int4-inc)                                                                                                                                                                                                                                         |
+| tiiuae/falcon-7b                       | [model-autogptq-int4-G64](https://huggingface.co/Intel/falcon-7b-int4-inc)                                                                                                                                                                                                                                    |
+| sapienzanlp/modello-italia-9b          | [model-fbaldassarri-autogptq-int4*](https://huggingface.co/fbaldassarri/modello-italia-9b-autoround-w4g128-cpu)                                                                                                                                                                                               |
+| microsoft/phi-2                        | [model-autoround-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc) [model-autogptq-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc)                                                                                                                                                                |
+| microsoft/Phi-3.5-mini-instruct        | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Phi-3.5-Mini-instruct-AutoRound-4bit)                                                                                                                                                                                                     |
+| microsoft/Phi-3-vision-128k-instruct   | [recipe](./examples/multimodal-modeling/Phi-3-vision/run_autoround.sh)                                                                                                                                                                                                                                        
+| mistralai/Mistral-7B-Instruct-v0.2     | [accuracy](./docs/Mistral-7B-Instruct-v0.2-acc.md), [recipe](./examples/language-modeling/scripts/Mistral-7B-Instruct-v0.2.sh)                                                                                                                                      |
+| mistralai/Mixtral-8x7B-Instruct-v0.1   | [accuracy](./docs/Mixtral-8x7B-Instruct-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-Instruct-v0.1.sh)                                                                                                                                  |
+| mistralai/Mixtral-8x7B-v0.1            | [accuracy](./docs/Mixtral-8x7B-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-v0.1.sh)                                                                                                                                                    |
+| meta-llama/Meta-Llama-3-8B-Instruct    | [accuracy](./docs/Meta-Llama-3-8B-Instruct-acc.md), [recipe](./examples/language-modeling/scripts/Meta-Llama-3-8B-Instruct.sh)                                                                                                                                      |
+| google/gemma-7b                        | [accuracy](./docs/gemma-7b-acc.md), [recipe](./examples/language-modeling/scripts/gemma-7b.sh)                                                                                                                                                                      |
+| meta-llama/Llama-2-7b-chat-hf          | [accuracy](./docs/Llama-2-7b-chat-hf-acc.md), [recipe](./examples/language-modeling/scripts/Llama-2-7b-chat-hf.sh)                                                                                                                                                  |
+| Qwen/Qwen1.5-7B-Chat                   | [accuracy](./docs/Qwen1.5-7B-Chat-acc.md), [sym recipe](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-sym.sh), [asym recipe ](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-asym.sh)                                                                  |
+| baichuan-inc/Baichuan2-7B-Chat         | [accuracy](./docs/baichuan2-7b-chat-acc.md), [recipe](./examples/language-modeling/scripts/baichuan2-7b-chat.sh)                                                                                                                                                    |         
+| 01-ai/Yi-6B-Chat                       | [accuracy](./docs/Yi-6B-Chat-acc.md), [recipe](./examples/language-modeling/scripts/Yi-6B-Chat.sh)                                                                                                                                                                  |                                     
+| facebook/opt-2.7b                      | [accuracy](./docs/opt-2.7b-acc.md), [recipe](./examples/language-modeling/scripts/opt-2.7b.sh)                                                                                                                                                                      |
+| bigscience/bloom-3b                    | [accuracy](./docs/bloom-3B-acc.md), [recipe](./examples/language-modeling/scripts/bloom-3b.sh)                                                                                                                                                                    |
+| EleutherAI/gpt-j-6b                    | [accuracy](./docs/gpt-j-6B-acc.md), [recipe](./examples/language-modeling/scripts/gpt-j-6b.sh)                                                                                                                                                                  | 
 
 ## Integration
 
diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
index faf616eb..eb137b33 100644
--- a/auto_round/script/llm.py
+++ b/auto_round/script/llm.py
@@ -27,6 +27,8 @@
 # limitations under the License.
 import argparse
 
+from auto_round.utils import detect_device
+
 
 class BasicArgumentParser(argparse.ArgumentParser):
     def __init__(self, *args, **kwargs):
@@ -215,9 +217,8 @@ def tune(args):
     tasks = args.tasks
     if args.format is None:
         args.format = "auto_round"
-    supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:gptq", "auto_round:auto_gptq",
-                         "auto_round:auto_gptq:marlin", "auto_round:gptq:marlin", "auto_round:auto_awq",
-                         "auto_round:awq", "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"]
+    supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:auto_gptq", "auto_round:auto_awq",
+                         "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"]
     formats = args.format.replace(' ', '').split(",")
     for format in formats:
         if format not in supported_formats:
@@ -233,7 +234,7 @@ def tune(args):
 
     ##must set this before import torch
     import os
-    devices = args.device.replace(" ","").split(',')
+    devices = args.device.replace(" ", "").split(',')
     use_auto_mapping = False
     if all(s.isdigit() for s in devices):
         if "CUDA_VISIBLE_DEVICES" in os.environ:
@@ -247,7 +248,7 @@ def tune(args):
                     "Invalid '--device' value: It must be smaller than the number of available devices. "
                     "For example, with CUDA_VISIBLE_DEVICES=4,5, "
                     "--device 0,1 is valid, but --device 4,5 is not supported.")
-            visible_devices =','.join(pick_device)
+            visible_devices = ','.join(pick_device)
             os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices
         else:
             os.environ["CUDA_VISIBLE_DEVICES"] = args.device
@@ -451,7 +452,7 @@ def tune(args):
 
 def eval(args):
     import os
-    devices = args.device.replace(" ","").split(',')
+    devices = args.device.replace(" ", "").split(',')
     parallelism = False
 
     if all(s.isdigit() for s in devices):
@@ -466,7 +467,7 @@ def eval(args):
                     "Invalid '--device' value: It must be smaller than the number of available devices. "
                     "For example, with CUDA_VISIBLE_DEVICES=4,5, "
                     "--device 0,1 is valid, but --device 4,5 is not supported.")
-            visible_devices =','.join(pick_device)
+            visible_devices = ','.join(pick_device)
             os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices
         else:
             os.environ["CUDA_VISIBLE_DEVICES"] = args.device
@@ -474,7 +475,8 @@ def eval(args):
             devices = args.device.replace(" ", "").split(',')
         parallelism = True
         device_str = None
-
+    else:
+        device_str = detect_device(args.device.replace(" ", ""))
 
     from auto_round.eval.evaluation import simple_evaluate
 
diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index ca5d0351..021c3e2f 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -209,9 +209,7 @@ def setup_parser():
 def tune(args):
     if args.format is None:
         args.format = "auto_round"
-    supported_formats = ["auto_round", "auto_round:gptq", "auto_round:auto_gptq",
-                         "auto_round:auto_gptq:marlin", "auto_round:gptq:marlin", "auto_round:auto_awq",
-                         "auto_round:awq"]
+    supported_formats = ["auto_round" "auto_round:auto_gptq", "auto_round:auto_awq"]
     if not args.quant_nontext_module:
         supported_formats.extend(["auto_gptq", "auto_gptq:marlin"])
 
diff --git a/examples/language-modeling/main.py b/examples/language-modeling/main.py
index 50330715..4a3ca875 100644
--- a/examples/language-modeling/main.py
+++ b/examples/language-modeling/main.py
@@ -151,9 +151,8 @@
 
     if args.format is None:
         args.format = "auto_round"
-    supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:gptq", "auto_round:auto_gptq",
-                         "auto_round:auto_gptq:marlin", "auto_round:gptq:marlin", "auto_round:auto_awq",
-                         "auto_round:awq", "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"]
+    supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:auto_gptq","auto_round:auto_awq",
+                         "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"]
     formats = args.format.replace(' ', '').split(",")
     for format in formats:
         if format not in supported_formats:

From 0976a5acd9f7b04ff356864ae758e081bbb04f8a Mon Sep 17 00:00:00 2001
From: wenhuach21 <wenhua.cheng@intel.com>
Date: Wed, 13 Nov 2024 08:50:58 +0800
Subject: [PATCH 2/5] fix

---
 auto_round/script/mllm.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index 021c3e2f..d3337468 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -51,9 +51,9 @@ def __init__(self, *args, **kwargs):
                           help="whether to use asym quantization")
 
         self.add_argument("--dataset", type=str, default="liuhaotian/llava_conv_58k",
-                            help="the dataset for quantization training."
-                            " current support llava_conv_58k,llava_instruct_80k "
-                            "It can be a custom one.")
+                          help="the dataset for quantization training."
+                               " current support llava_conv_58k,llava_instruct_80k "
+                               "It can be a custom one.")
 
         self.add_argument("--lr", default=None, type=float,
                           help="learning rate, if None, it will be set to 1.0/iters automatically")
@@ -143,12 +143,12 @@ def __init__(self, *args, **kwargs):
                                "Can be a dir path or multiple dir path with format as "
                                "'image=path_to_image,video=path_to_video,audio=path_to_audio'"
                                "By default, it will search in the relative path, "
-                            "and if not find, will automatic download.")
+                               "and if not find, will automatic download.")
 
         self.add_argument("--template", default=None, type=str,
                           help="the template for building training dataset. It can be a custom one.")
-        
-        self.add_argument("--truncation", action="store_true", 
+
+        self.add_argument("--truncation", action="store_true",
                           help="whether to truncate sequences at the maximum length.")
 
         ## ======================= VLM eval=======================
@@ -209,7 +209,7 @@ def setup_parser():
 def tune(args):
     if args.format is None:
         args.format = "auto_round"
-    supported_formats = ["auto_round" "auto_round:auto_gptq", "auto_round:auto_awq"]
+    supported_formats = ["auto_round", "auto_round:auto_gptq", "auto_round:auto_awq"]
     if not args.quant_nontext_module:
         supported_formats.extend(["auto_gptq", "auto_gptq:marlin"])
 
@@ -259,7 +259,7 @@ def tune(args):
     if "llava" in model_name:
         from llava.model.builder import load_pretrained_model  # pylint: disable=E0401
         tokenizer, model, image_processor, _ = load_pretrained_model(model_name, model_base=None, model_name=model_name,
-            torch_dtype=torch_dtype)
+                                                                     torch_dtype=torch_dtype)
         model_type = "llava"
     else:
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code)
@@ -275,9 +275,9 @@ def tune(args):
             cls = MllamaForConditionalGeneration
         else:
             cls = AutoModelForCausalLM
-    
+
     model = cls.from_pretrained(
-            model_name,trust_remote_code=not args.disable_trust_remote_code, torch_dtype=torch_dtype,
+        model_name, trust_remote_code=not args.disable_trust_remote_code, torch_dtype=torch_dtype,
         device_map="auto" if use_auto_mapping else None)
     if "cogvlm2" in model_name:
         model.config.model_type = "cogvlm2"
@@ -327,10 +327,10 @@ def tune(args):
     if args.quant_lm_head and args.low_gpu_mem_usage:
         print(f"warning, low_gpu_mem_usage=False is strongly recommended if the whole model could be loaded to "
               f"gpu")
-    
-    autoround = round(model, tokenizer, image_processor=image_processor, dataset=args.dataset, 
+
+    autoround = round(model, tokenizer, image_processor=image_processor, dataset=args.dataset,
                       extra_data_dir=args.extra_data_dir, bits=args.bits, group_size=args.group_size,
-                      sym=not args.asym, batch_size=args.batch_size, seqlen=seqlen, nblocks=args.nblocks, 
+                      sym=not args.asym, batch_size=args.batch_size, seqlen=seqlen, nblocks=args.nblocks,
                       iters=args.iters, lr=args.lr, minmax_lr=args.minmax_lr, amp=not args.disable_amp,
                       enable_quanted_input=not args.disable_quanted_input, truncation=args.truncation,
                       nsamples=args.nsamples, low_gpu_mem_usage=args.low_gpu_mem_usage,

From 68ca657f80e31e9f39be65dd22862360db815636 Mon Sep 17 00:00:00 2001
From: wenhuach21 <wenhua.cheng@intel.com>
Date: Wed, 13 Nov 2024 10:24:06 +0800
Subject: [PATCH 3/5] fix preci issue

---
 auto_round/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/auto_round/utils.py b/auto_round/utils.py
index 3be834f8..cafd2430 100644
--- a/auto_round/utils.py
+++ b/auto_round/utils.py
@@ -890,11 +890,11 @@ def torch_version_at_least(version_string):
 
 def check_hpu_compile_mode():
     assert (
-            os.getenv["PT_HPU_LAZY_MODE"] == "0"
+            os.getenv["PT_HPU_LAZY_MODE"] == "0"  # pylint: disable==E1136
     ), "Please set `PT_HPU_LAZY_MODE=0` to use HPU compile mode"
     # Note: this is a temporary solution, will be removed in the future
     assert (
-            os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1"
+            os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable==E1136
     ), "Please set `PT_ENABLE_INT64_SUPPORT=1` to use HPU compile mode"
 
 

From c868ef08e25794c04e44307ec3f051d4a107a08f Mon Sep 17 00:00:00 2001
From: wenhuach21 <wenhua.cheng@intel.com>
Date: Wed, 13 Nov 2024 11:18:17 +0800
Subject: [PATCH 4/5] fix

---
 auto_round/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/auto_round/utils.py b/auto_round/utils.py
index cafd2430..ccf6de27 100644
--- a/auto_round/utils.py
+++ b/auto_round/utils.py
@@ -890,11 +890,11 @@ def torch_version_at_least(version_string):
 
 def check_hpu_compile_mode():
     assert (
-            os.getenv["PT_HPU_LAZY_MODE"] == "0"  # pylint: disable==E1136
+            os.getenv["PT_HPU_LAZY_MODE"] == "0"  # pylint: disable=E1136
     ), "Please set `PT_HPU_LAZY_MODE=0` to use HPU compile mode"
     # Note: this is a temporary solution, will be removed in the future
     assert (
-            os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable==E1136
+            os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable=E1136
     ), "Please set `PT_ENABLE_INT64_SUPPORT=1` to use HPU compile mode"
 
 

From 052376e01e0ffb15a02bf58e8d3dda5e22bfaa91 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi4.liu@intel.com>
Date: Wed, 13 Nov 2024 13:16:20 +0800
Subject: [PATCH 5/5] Update utils.py

---
 auto_round/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/auto_round/utils.py b/auto_round/utils.py
index ccf6de27..4037a19b 100644
--- a/auto_round/utils.py
+++ b/auto_round/utils.py
@@ -890,11 +890,11 @@ def torch_version_at_least(version_string):
 
 def check_hpu_compile_mode():
     assert (
-            os.getenv["PT_HPU_LAZY_MODE"] == "0"  # pylint: disable=E1136
+            os.getenv("PT_HPU_LAZY_MODE") == "0"
     ), "Please set `PT_HPU_LAZY_MODE=0` to use HPU compile mode"
     # Note: this is a temporary solution, will be removed in the future
     assert (
-            os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable=E1136
+            os.getenv("PT_ENABLE_INT64_SUPPORT") == "1"
     ), "Please set `PT_ENABLE_INT64_SUPPORT=1` to use HPU compile mode"