From aa3a60cc4d817340a33459a8fbefce31e63b3807 Mon Sep 17 00:00:00 2001 From: wenhuach21 Date: Wed, 13 Nov 2024 08:49:03 +0800 Subject: [PATCH 1/5] fix eval device issue --- README.md | 64 +++++++++++++++--------------- auto_round/script/llm.py | 18 +++++---- auto_round/script/mllm.py | 4 +- examples/language-modeling/main.py | 5 +-- 4 files changed, 45 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index f5031df8..0e823178 100644 --- a/README.md +++ b/README.md @@ -291,39 +291,39 @@ Please note that an asterisk (*) indicates third-party quantized models, which m different recipe. We greatly appreciate their efforts and encourage more users to share their models, as we cannot release most of the models ourselves. - Model | Supported | -|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| meta-llama/Meta-Llama-3.1-70B-Instruct | [recipe](https://huggingface.co/Intel/Meta-Llama-3.1-70B-Instruct-int4-inc) | + Model | Supported | +|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| meta-llama/Meta-Llama-3.1-70B-Instruct | [recipe](https://huggingface.co/Intel/Meta-Llama-3.1-70B-Instruct-int4-inc) | | meta-llama/Meta-Llama-3.1-8B-Instruct | [model-kaitchup-autogptq-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-Instruct-autoround-gptq-4bit-asym), [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-Instruct-autoround-gptq-4bit-sym), [recipe](https://huggingface.co/Intel/Meta-Llama-3.1-8B-Instruct-int4-inc) | -| meta-llama/Meta-Llama-3.1-8B | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-autoround-gptq-4bit-sym) | -| Qwen/Qwen-VL | [accuracy](./examples/multimodal-modeling/Qwen-VL/README.md), [recipe](./examples/multimodal-modeling/Qwen-VL/run_autoround.sh) -| Qwen/Qwen2-7B | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc), [model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc) | -| THUDM/glm-4-9b-chat | [recipe](./docs/glm-4-9b-chat-recipe.md) | -| Qwen/Qwen2-57B-A14B-Instruct | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc),[model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc) | -| 01-ai/Yi-1.5-9B | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-4bit-gptq-autoround) | -| 01-ai/Yi-1.5-9B-Chat | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-Chat-4bit-gptq-autoround) | -| Intel/neural-chat-7b-v3-3 | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-3-int4-inc) | -| Intel/neural-chat-7b-v3-1 | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-1-int4-inc) | -| TinyLlama-1.1B-intermediate | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/TinyLlama-1.1B-intermediate-step-1341k-3T-autoround-lm_head-symFalse) | -| mistralai/Mistral-7B-v0.1 | [model-autogptq-lmhead-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc-lmhead), [model-autogptq-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc) | -| google/gemma-2b | [model-autogptq-int4](https://huggingface.co/Intel/gemma-2b-int4-inc) | -| tiiuae/falcon-7b | [model-autogptq-int4-G64](https://huggingface.co/Intel/falcon-7b-int4-inc) | -| sapienzanlp/modello-italia-9b | [model-fbaldassarri-autogptq-int4*](https://huggingface.co/fbaldassarri/modello-italia-9b-autoround-w4g128-cpu) | -| microsoft/phi-2 | [model-autoround-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc) [model-autogptq-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc) | -| microsoft/Phi-3.5-mini-instruct | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Phi-3.5-Mini-instruct-AutoRound-4bit) | -| microsoft/Phi-3-vision-128k-instruct | [recipe](./examples/multimodal-modeling/Phi-3-vision/run_autoround.sh) -| mistralai/Mistral-7B-Instruct-v0.2 | [accuracy](./docs/Mistral-7B-Instruct-v0.2-acc.md), [recipe](./examples/language-modeling/scripts/Mistral-7B-Instruct-v0.2.sh), [example](./examples/language-modeling/) | -| mistralai/Mixtral-8x7B-Instruct-v0.1 | [accuracy](./docs/Mixtral-8x7B-Instruct-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-Instruct-v0.1.sh), [example](./examples/language-modeling/) | -| mistralai/Mixtral-8x7B-v0.1 | [accuracy](./docs/Mixtral-8x7B-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-v0.1.sh), [example](./examples/language-modeling/) | -| meta-llama/Meta-Llama-3-8B-Instruct | [accuracy](./docs/Meta-Llama-3-8B-Instruct-acc.md), [recipe](./examples/language-modeling/scripts/Meta-Llama-3-8B-Instruct.sh), [example](./examples/language-modeling/) | -| google/gemma-7b | [accuracy](./docs/gemma-7b-acc.md), [recipe](./examples/language-modeling/scripts/gemma-7b.sh), [example](./examples/language-modeling/) | -| meta-llama/Llama-2-7b-chat-hf | [accuracy](./docs/Llama-2-7b-chat-hf-acc.md), [recipe](./examples/language-modeling/scripts/Llama-2-7b-chat-hf.sh), [example](./examples/language-modeling/) | -| Qwen/Qwen1.5-7B-Chat | [accuracy](./docs/Qwen1.5-7B-Chat-acc.md), [sym recipe](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-sym.sh), [asym recipe ](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-asym.sh), [example](./examples/language-modeling/) | -| baichuan-inc/Baichuan2-7B-Chat | [accuracy](./docs/baichuan2-7b-chat-acc.md), [recipe](./examples/language-modeling/scripts/baichuan2-7b-chat.sh), [example](./examples/language-modeling/) | -| 01-ai/Yi-6B-Chat | [accuracy](./docs/Yi-6B-Chat-acc.md), [recipe](./examples/language-modeling/scripts/Yi-6B-Chat.sh), [example](./examples/language-modeling/) | -| facebook/opt-2.7b | [accuracy](./docs/opt-2.7b-acc.md), [recipe](./examples/language-modeling/scripts/opt-2.7b.sh), [example](./examples/language-modeling/) | -| bigscience/bloom-3b | [accuracy](./docs/bloom-3B-acc.md), [recipe](./examples/language-modeling/scripts/bloom-3b.sh), [example](./examples/language-modeling/) | -| EleutherAI/gpt-j-6b | [accuracy](./docs/gpt-j-6B-acc.md), [recipe](./examples/language-modeling/scripts/gpt-j-6b.sh), [example](./examples/language-modeling/) | +| meta-llama/Meta-Llama-3.1-8B | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Meta-Llama-3.1-8B-autoround-gptq-4bit-sym) | +| Qwen/Qwen-VL | [accuracy](./examples/multimodal-modeling/Qwen-VL/README.md), [recipe](./examples/multimodal-modeling/Qwen-VL/run_autoround.sh) +| Qwen/Qwen2-7B | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc), [model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-7B-int4-inc) | +| THUDM/glm-4-9b-chat | [recipe](./docs/glm-4-9b-chat-recipe.md) | +| Qwen/Qwen2-57B-A14B-Instruct | [model-autoround-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc),[model-autogptq-sym-int4](https://huggingface.co/Intel/Qwen2-57B-A14B-Instruct-int4-inc) | +| 01-ai/Yi-1.5-9B | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-4bit-gptq-autoround) | +| 01-ai/Yi-1.5-9B-Chat | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/Yi-1.5-9B-Chat-4bit-gptq-autoround) | +| Intel/neural-chat-7b-v3-3 | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-3-int4-inc) | +| Intel/neural-chat-7b-v3-1 | [model-autogptq-int4](https://huggingface.co/Intel/neural-chat-7b-v3-1-int4-inc) | +| TinyLlama-1.1B-intermediate | [model-LnL-AI-autogptq-int4*](https://huggingface.co/LnL-AI/TinyLlama-1.1B-intermediate-step-1341k-3T-autoround-lm_head-symFalse) | +| mistralai/Mistral-7B-v0.1 | [model-autogptq-lmhead-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc-lmhead), [model-autogptq-int4](https://huggingface.co/Intel/Mistral-7B-v0.1-int4-inc) | +| google/gemma-2b | [model-autogptq-int4](https://huggingface.co/Intel/gemma-2b-int4-inc) | +| tiiuae/falcon-7b | [model-autogptq-int4-G64](https://huggingface.co/Intel/falcon-7b-int4-inc) | +| sapienzanlp/modello-italia-9b | [model-fbaldassarri-autogptq-int4*](https://huggingface.co/fbaldassarri/modello-italia-9b-autoround-w4g128-cpu) | +| microsoft/phi-2 | [model-autoround-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc) [model-autogptq-sym-int4](https://huggingface.co/Intel/phi-2-int4-inc) | +| microsoft/Phi-3.5-mini-instruct | [model-kaitchup-autogptq-sym-int4*](https://huggingface.co/kaitchup/Phi-3.5-Mini-instruct-AutoRound-4bit) | +| microsoft/Phi-3-vision-128k-instruct | [recipe](./examples/multimodal-modeling/Phi-3-vision/run_autoround.sh) +| mistralai/Mistral-7B-Instruct-v0.2 | [accuracy](./docs/Mistral-7B-Instruct-v0.2-acc.md), [recipe](./examples/language-modeling/scripts/Mistral-7B-Instruct-v0.2.sh) | +| mistralai/Mixtral-8x7B-Instruct-v0.1 | [accuracy](./docs/Mixtral-8x7B-Instruct-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-Instruct-v0.1.sh) | +| mistralai/Mixtral-8x7B-v0.1 | [accuracy](./docs/Mixtral-8x7B-v0.1-acc.md), [recipe](./examples/language-modeling/scripts/Mixtral-8x7B-v0.1.sh) | +| meta-llama/Meta-Llama-3-8B-Instruct | [accuracy](./docs/Meta-Llama-3-8B-Instruct-acc.md), [recipe](./examples/language-modeling/scripts/Meta-Llama-3-8B-Instruct.sh) | +| google/gemma-7b | [accuracy](./docs/gemma-7b-acc.md), [recipe](./examples/language-modeling/scripts/gemma-7b.sh) | +| meta-llama/Llama-2-7b-chat-hf | [accuracy](./docs/Llama-2-7b-chat-hf-acc.md), [recipe](./examples/language-modeling/scripts/Llama-2-7b-chat-hf.sh) | +| Qwen/Qwen1.5-7B-Chat | [accuracy](./docs/Qwen1.5-7B-Chat-acc.md), [sym recipe](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-sym.sh), [asym recipe ](./examples/language-modeling/scripts/Qwen1.5-7B-Chat-asym.sh) | +| baichuan-inc/Baichuan2-7B-Chat | [accuracy](./docs/baichuan2-7b-chat-acc.md), [recipe](./examples/language-modeling/scripts/baichuan2-7b-chat.sh) | +| 01-ai/Yi-6B-Chat | [accuracy](./docs/Yi-6B-Chat-acc.md), [recipe](./examples/language-modeling/scripts/Yi-6B-Chat.sh) | +| facebook/opt-2.7b | [accuracy](./docs/opt-2.7b-acc.md), [recipe](./examples/language-modeling/scripts/opt-2.7b.sh) | +| bigscience/bloom-3b | [accuracy](./docs/bloom-3B-acc.md), [recipe](./examples/language-modeling/scripts/bloom-3b.sh) | +| EleutherAI/gpt-j-6b | [accuracy](./docs/gpt-j-6B-acc.md), [recipe](./examples/language-modeling/scripts/gpt-j-6b.sh) | ## Integration diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py index faf616eb..eb137b33 100644 --- a/auto_round/script/llm.py +++ b/auto_round/script/llm.py @@ -27,6 +27,8 @@ # limitations under the License. import argparse +from auto_round.utils import detect_device + class BasicArgumentParser(argparse.ArgumentParser): def __init__(self, *args, **kwargs): @@ -215,9 +217,8 @@ def tune(args): tasks = args.tasks if args.format is None: args.format = "auto_round" - supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:gptq", "auto_round:auto_gptq", - "auto_round:auto_gptq:marlin", "auto_round:gptq:marlin", "auto_round:auto_awq", - "auto_round:awq", "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"] + supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:auto_gptq", "auto_round:auto_awq", + "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"] formats = args.format.replace(' ', '').split(",") for format in formats: if format not in supported_formats: @@ -233,7 +234,7 @@ def tune(args): ##must set this before import torch import os - devices = args.device.replace(" ","").split(',') + devices = args.device.replace(" ", "").split(',') use_auto_mapping = False if all(s.isdigit() for s in devices): if "CUDA_VISIBLE_DEVICES" in os.environ: @@ -247,7 +248,7 @@ def tune(args): "Invalid '--device' value: It must be smaller than the number of available devices. " "For example, with CUDA_VISIBLE_DEVICES=4,5, " "--device 0,1 is valid, but --device 4,5 is not supported.") - visible_devices =','.join(pick_device) + visible_devices = ','.join(pick_device) os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices else: os.environ["CUDA_VISIBLE_DEVICES"] = args.device @@ -451,7 +452,7 @@ def tune(args): def eval(args): import os - devices = args.device.replace(" ","").split(',') + devices = args.device.replace(" ", "").split(',') parallelism = False if all(s.isdigit() for s in devices): @@ -466,7 +467,7 @@ def eval(args): "Invalid '--device' value: It must be smaller than the number of available devices. " "For example, with CUDA_VISIBLE_DEVICES=4,5, " "--device 0,1 is valid, but --device 4,5 is not supported.") - visible_devices =','.join(pick_device) + visible_devices = ','.join(pick_device) os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices else: os.environ["CUDA_VISIBLE_DEVICES"] = args.device @@ -474,7 +475,8 @@ def eval(args): devices = args.device.replace(" ", "").split(',') parallelism = True device_str = None - + else: + device_str = detect_device(args.device.replace(" ", "")) from auto_round.eval.evaluation import simple_evaluate diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index ca5d0351..021c3e2f 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -209,9 +209,7 @@ def setup_parser(): def tune(args): if args.format is None: args.format = "auto_round" - supported_formats = ["auto_round", "auto_round:gptq", "auto_round:auto_gptq", - "auto_round:auto_gptq:marlin", "auto_round:gptq:marlin", "auto_round:auto_awq", - "auto_round:awq"] + supported_formats = ["auto_round" "auto_round:auto_gptq", "auto_round:auto_awq"] if not args.quant_nontext_module: supported_formats.extend(["auto_gptq", "auto_gptq:marlin"]) diff --git a/examples/language-modeling/main.py b/examples/language-modeling/main.py index 50330715..4a3ca875 100644 --- a/examples/language-modeling/main.py +++ b/examples/language-modeling/main.py @@ -151,9 +151,8 @@ if args.format is None: args.format = "auto_round" - supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:gptq", "auto_round:auto_gptq", - "auto_round:auto_gptq:marlin", "auto_round:gptq:marlin", "auto_round:auto_awq", - "auto_round:awq", "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"] + supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:auto_gptq","auto_round:auto_awq", + "auto_gptq:marlin", "itrex", "iterx_xpu", "fake"] formats = args.format.replace(' ', '').split(",") for format in formats: if format not in supported_formats: From 0976a5acd9f7b04ff356864ae758e081bbb04f8a Mon Sep 17 00:00:00 2001 From: wenhuach21 Date: Wed, 13 Nov 2024 08:50:58 +0800 Subject: [PATCH 2/5] fix --- auto_round/script/mllm.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index 021c3e2f..d3337468 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -51,9 +51,9 @@ def __init__(self, *args, **kwargs): help="whether to use asym quantization") self.add_argument("--dataset", type=str, default="liuhaotian/llava_conv_58k", - help="the dataset for quantization training." - " current support llava_conv_58k,llava_instruct_80k " - "It can be a custom one.") + help="the dataset for quantization training." + " current support llava_conv_58k,llava_instruct_80k " + "It can be a custom one.") self.add_argument("--lr", default=None, type=float, help="learning rate, if None, it will be set to 1.0/iters automatically") @@ -143,12 +143,12 @@ def __init__(self, *args, **kwargs): "Can be a dir path or multiple dir path with format as " "'image=path_to_image,video=path_to_video,audio=path_to_audio'" "By default, it will search in the relative path, " - "and if not find, will automatic download.") + "and if not find, will automatic download.") self.add_argument("--template", default=None, type=str, help="the template for building training dataset. It can be a custom one.") - - self.add_argument("--truncation", action="store_true", + + self.add_argument("--truncation", action="store_true", help="whether to truncate sequences at the maximum length.") ## ======================= VLM eval======================= @@ -209,7 +209,7 @@ def setup_parser(): def tune(args): if args.format is None: args.format = "auto_round" - supported_formats = ["auto_round" "auto_round:auto_gptq", "auto_round:auto_awq"] + supported_formats = ["auto_round", "auto_round:auto_gptq", "auto_round:auto_awq"] if not args.quant_nontext_module: supported_formats.extend(["auto_gptq", "auto_gptq:marlin"]) @@ -259,7 +259,7 @@ def tune(args): if "llava" in model_name: from llava.model.builder import load_pretrained_model # pylint: disable=E0401 tokenizer, model, image_processor, _ = load_pretrained_model(model_name, model_base=None, model_name=model_name, - torch_dtype=torch_dtype) + torch_dtype=torch_dtype) model_type = "llava" else: config = AutoConfig.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code) @@ -275,9 +275,9 @@ def tune(args): cls = MllamaForConditionalGeneration else: cls = AutoModelForCausalLM - + model = cls.from_pretrained( - model_name,trust_remote_code=not args.disable_trust_remote_code, torch_dtype=torch_dtype, + model_name, trust_remote_code=not args.disable_trust_remote_code, torch_dtype=torch_dtype, device_map="auto" if use_auto_mapping else None) if "cogvlm2" in model_name: model.config.model_type = "cogvlm2" @@ -327,10 +327,10 @@ def tune(args): if args.quant_lm_head and args.low_gpu_mem_usage: print(f"warning, low_gpu_mem_usage=False is strongly recommended if the whole model could be loaded to " f"gpu") - - autoround = round(model, tokenizer, image_processor=image_processor, dataset=args.dataset, + + autoround = round(model, tokenizer, image_processor=image_processor, dataset=args.dataset, extra_data_dir=args.extra_data_dir, bits=args.bits, group_size=args.group_size, - sym=not args.asym, batch_size=args.batch_size, seqlen=seqlen, nblocks=args.nblocks, + sym=not args.asym, batch_size=args.batch_size, seqlen=seqlen, nblocks=args.nblocks, iters=args.iters, lr=args.lr, minmax_lr=args.minmax_lr, amp=not args.disable_amp, enable_quanted_input=not args.disable_quanted_input, truncation=args.truncation, nsamples=args.nsamples, low_gpu_mem_usage=args.low_gpu_mem_usage, From 68ca657f80e31e9f39be65dd22862360db815636 Mon Sep 17 00:00:00 2001 From: wenhuach21 Date: Wed, 13 Nov 2024 10:24:06 +0800 Subject: [PATCH 3/5] fix preci issue --- auto_round/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/auto_round/utils.py b/auto_round/utils.py index 3be834f8..cafd2430 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -890,11 +890,11 @@ def torch_version_at_least(version_string): def check_hpu_compile_mode(): assert ( - os.getenv["PT_HPU_LAZY_MODE"] == "0" + os.getenv["PT_HPU_LAZY_MODE"] == "0" # pylint: disable==E1136 ), "Please set `PT_HPU_LAZY_MODE=0` to use HPU compile mode" # Note: this is a temporary solution, will be removed in the future assert ( - os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" + os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable==E1136 ), "Please set `PT_ENABLE_INT64_SUPPORT=1` to use HPU compile mode" From c868ef08e25794c04e44307ec3f051d4a107a08f Mon Sep 17 00:00:00 2001 From: wenhuach21 Date: Wed, 13 Nov 2024 11:18:17 +0800 Subject: [PATCH 4/5] fix --- auto_round/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/auto_round/utils.py b/auto_round/utils.py index cafd2430..ccf6de27 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -890,11 +890,11 @@ def torch_version_at_least(version_string): def check_hpu_compile_mode(): assert ( - os.getenv["PT_HPU_LAZY_MODE"] == "0" # pylint: disable==E1136 + os.getenv["PT_HPU_LAZY_MODE"] == "0" # pylint: disable=E1136 ), "Please set `PT_HPU_LAZY_MODE=0` to use HPU compile mode" # Note: this is a temporary solution, will be removed in the future assert ( - os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable==E1136 + os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable=E1136 ), "Please set `PT_ENABLE_INT64_SUPPORT=1` to use HPU compile mode" From 052376e01e0ffb15a02bf58e8d3dda5e22bfaa91 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 13 Nov 2024 13:16:20 +0800 Subject: [PATCH 5/5] Update utils.py --- auto_round/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/auto_round/utils.py b/auto_round/utils.py index ccf6de27..4037a19b 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -890,11 +890,11 @@ def torch_version_at_least(version_string): def check_hpu_compile_mode(): assert ( - os.getenv["PT_HPU_LAZY_MODE"] == "0" # pylint: disable=E1136 + os.getenv("PT_HPU_LAZY_MODE") == "0" ), "Please set `PT_HPU_LAZY_MODE=0` to use HPU compile mode" # Note: this is a temporary solution, will be removed in the future assert ( - os.getenv["PT_ENABLE_INT64_SUPPORT"] == "1" # pylint: disable=E1136 + os.getenv("PT_ENABLE_INT64_SUPPORT") == "1" ), "Please set `PT_ENABLE_INT64_SUPPORT=1` to use HPU compile mode"