fix bug of only_text_test check due to inference issue on cpu (#362)

intel · Dec 3, 2024 · 3acb119 · 3acb119
1 parent d080ea0
commit 3acb119
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 10 deletions.
diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py
@@ -30,13 +30,15 @@
 from ..low_cpu_mem.utils import get_layers_before_block
 
 
-def _only_text_test(model, tokenizer):
+def _only_text_test(model, tokenizer, device):
     """Test if the model whether can use text-only datasets."""
     try:
         text = ["only text", "test"]
         tokenizer.padding_side = 'left'
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
+        if device != model.device.type:
+            model = model.to(device)
         inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(model.device)
         model(**inputs)
         return True
@@ -155,7 +157,8 @@ def __init__(
         from ..calib_dataset import CALIB_DATASETS
         from .mllm_dataset import MLLM_DATASET
         if isinstance(dataset, str):
-            if quant_nontext_module or (dataset in CALIB_DATASETS.keys() and not _only_text_test(model, tokenizer)):
+            if quant_nontext_module or \
+                (dataset in CALIB_DATASETS.keys() and not _only_text_test(model, tokenizer, device)):
                 if quant_nontext_module:
                     logger.warning(f"Text only dataset cannot be used for calibrating non-text modules,"
                                 "switching to liuhaotian/llava_conv_58k")

diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -45,10 +45,12 @@ def __init__(self, *args, **kwargs):
         self.add_argument("--eval_bs", default=None, type=int,
                           help="batch size in evaluation")
 
-        self.add_argument("--device", "--devices", default="auto", type=str,
-                          help="the device to be used for tuning. The default is set to auto,"
-                               "allowing for automatic detection."
-                               "Currently, device settings support CPU, GPU, and HPU.")
+        self.add_argument("--device", "--devices", default="0", type=str,
+                          help="the device to be used for tuning. "
+                          "Currently, device settings support CPU, GPU, and HPU."
+                          "The default is set to cuda:0,"
+                          "allowing for automatic detection and switch to HPU or CPU."
+                          "set --device 0,1,2 to use multiple cards.")
 
         self.add_argument("--asym", action='store_true',
                           help="whether to use asym quantization")
@@ -268,6 +270,8 @@ def tune(args):
             devices = args.device.replace(" ", "").split(',')
         if len(devices) > 1:  ##for 70B model on single card, use auto will cause some layer offload to cpu
             use_auto_mapping = True
+    elif args.device == "auto":
+        use_auto_mapping == True
 
     import re
     import torch

diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
@@ -44,10 +44,12 @@ def __init__(self, *args, **kwargs):
         self.add_argument("--eval_bs", default=None, type=int,
                           help="batch size in evaluation")
 
-        self.add_argument("--device", "--devices", default="auto", type=str,
-                          help="the device to be used for tuning. The default is set to auto,"
-                               "allowing for automatic detection."
-                               "Currently, device settings support CPU, GPU, and HPU.")
+        self.add_argument("--device", "--devices", default="0", type=str,
+                          help="the device to be used for tuning. "
+                          "Currently, device settings support CPU, GPU, and HPU."
+                          "The default is set to cuda:0,"
+                          "allowing for automatic detection and switch to HPU or CPU."
+                          "set --device 0,1,2 to use multiple cards.")
 
         self.add_argument("--asym", action='store_true',
                           help="whether to use asym quantization")
@@ -269,6 +271,8 @@ def tune(args):
             args.device = ",".join(map(str, range(len(devices))))
             devices = args.device.replace(" ", "").split(',')
         use_auto_mapping = True
+    elif args.device == "auto":
+        use_auto_mapping == True
 
     device_str = detect_device(devices[0])