Skip to content

Commit

Permalink
fix bug of only_text_test check due to inference issue on cpu (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
n1ck-guo authored Dec 3, 2024
1 parent d080ea0 commit 3acb119
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 10 deletions.
7 changes: 5 additions & 2 deletions auto_round/mllm/autoround_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
from ..low_cpu_mem.utils import get_layers_before_block


def _only_text_test(model, tokenizer):
def _only_text_test(model, tokenizer, device):
"""Test if the model whether can use text-only datasets."""
try:
text = ["only text", "test"]
tokenizer.padding_side = 'left'
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
if device != model.device.type:
model = model.to(device)
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(model.device)
model(**inputs)
return True
Expand Down Expand Up @@ -155,7 +157,8 @@ def __init__(
from ..calib_dataset import CALIB_DATASETS
from .mllm_dataset import MLLM_DATASET
if isinstance(dataset, str):
if quant_nontext_module or (dataset in CALIB_DATASETS.keys() and not _only_text_test(model, tokenizer)):
if quant_nontext_module or \
(dataset in CALIB_DATASETS.keys() and not _only_text_test(model, tokenizer, device)):
if quant_nontext_module:
logger.warning(f"Text only dataset cannot be used for calibrating non-text modules,"
"switching to liuhaotian/llava_conv_58k")
Expand Down
12 changes: 8 additions & 4 deletions auto_round/script/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ def __init__(self, *args, **kwargs):
self.add_argument("--eval_bs", default=None, type=int,
help="batch size in evaluation")

self.add_argument("--device", "--devices", default="auto", type=str,
help="the device to be used for tuning. The default is set to auto,"
"allowing for automatic detection."
"Currently, device settings support CPU, GPU, and HPU.")
self.add_argument("--device", "--devices", default="0", type=str,
help="the device to be used for tuning. "
"Currently, device settings support CPU, GPU, and HPU."
"The default is set to cuda:0,"
"allowing for automatic detection and switch to HPU or CPU."
"set --device 0,1,2 to use multiple cards.")

self.add_argument("--asym", action='store_true',
help="whether to use asym quantization")
Expand Down Expand Up @@ -268,6 +270,8 @@ def tune(args):
devices = args.device.replace(" ", "").split(',')
if len(devices) > 1: ##for 70B model on single card, use auto will cause some layer offload to cpu
use_auto_mapping = True
elif args.device == "auto":
use_auto_mapping == True

import re
import torch
Expand Down
12 changes: 8 additions & 4 deletions auto_round/script/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ def __init__(self, *args, **kwargs):
self.add_argument("--eval_bs", default=None, type=int,
help="batch size in evaluation")

self.add_argument("--device", "--devices", default="auto", type=str,
help="the device to be used for tuning. The default is set to auto,"
"allowing for automatic detection."
"Currently, device settings support CPU, GPU, and HPU.")
self.add_argument("--device", "--devices", default="0", type=str,
help="the device to be used for tuning. "
"Currently, device settings support CPU, GPU, and HPU."
"The default is set to cuda:0,"
"allowing for automatic detection and switch to HPU or CPU."
"set --device 0,1,2 to use multiple cards.")

self.add_argument("--asym", action='store_true',
help="whether to use asym quantization")
Expand Down Expand Up @@ -269,6 +271,8 @@ def tune(args):
args.device = ",".join(map(str, range(len(devices))))
devices = args.device.replace(" ", "").split(',')
use_auto_mapping = True
elif args.device == "auto":
use_auto_mapping == True

device_str = detect_device(devices[0])

Expand Down

0 comments on commit 3acb119

Please sign in to comment.