Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace error with warning for Intel CPU check #737

Merged
merged 6 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/quantization/transformers_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@

model = AutoModelForCausalLM.from_pretrained("./opt-125m-gptq", device_map="auto")

print(tokenizer.decode(model.generate(**tokenizer("gptqmodel is", return_tensors="pt").to(model.device))[0]))
print(tokenizer.decode(model.generate(**tokenizer("gptqmodel is", return_tensors="pt").to(model.device))[0]))
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/qlinear_bitblas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import torch.nn as nn
from gptqmodel.nn_modules.qlinear import BaseQuantLinear

from ...utils.logger import setup_logger
from ...models._const import DEVICE
from ...utils.logger import setup_logger

logger = setup_logger()

Expand Down
4 changes: 2 additions & 2 deletions gptqmodel/nn_modules/qlinear/qlinear_cuda.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from gptqmodel.nn_modules.qlinear import BaseQuantLinear
from gptqmodel.utils.logger import setup_logger
from gptqmodel.nn_modules.qlinear.qlinear_torch import TorchQuantLinear
from gptqmodel.utils.logger import setup_logger

from ...models._const import DEVICE

logger = setup_logger()
Expand Down
1 change: 1 addition & 0 deletions gptqmodel/nn_modules/qlinear/qlinear_exllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import torch.nn.functional as F
import transformers
from gptqmodel.nn_modules.qlinear import BaseQuantLinear

from ...models._const import DEVICE

exllama_import_exception = None
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/qlinear_exllamav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import torch.nn.functional as F
from gptqmodel.nn_modules.qlinear import BaseQuantLinear

from ...utils.logger import setup_logger
from ...models._const import DEVICE
from ...utils.logger import setup_logger

exllama_v2_import_exception = None
try:
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/qlinear_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

import numpy as np
import torch
from gptqmodel.nn_modules.qlinear import BaseQuantLinear
from torch.nn.parameter import Parameter

from gptqmodel.nn_modules.qlinear import BaseQuantLinear
from ...models._const import DEVICE

marlin_import_exception = None
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/qlinear_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import torch
import torch.nn as nn
import transformers

from gptqmodel.models._const import DEVICE
from gptqmodel.nn_modules.qlinear import BaseQuantLinear
from gptqmodel.utils.logger import setup_logger

from ...models._const import DEVICE

logger = setup_logger()
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/qlinear_tritonv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import transformers
from packaging import version

from ...models._const import DEVICE
from ...utils.logger import setup_logger
from ..triton_utils.mixin import TritonModuleMixin
from . import BaseQuantLinear
from ...models._const import DEVICE

try:
from triton import __version__ as triton_version
Expand Down
15 changes: 9 additions & 6 deletions gptqmodel/utils/importer.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from collections import OrderedDict
import torch
from typing import Optional, Union

from .backend import BACKEND
import torch

from ..nn_modules.qlinear.qlinear_bitblas import BitBLASQuantLinear
from ..nn_modules.qlinear.qlinear_cuda import CudaQuantLinear
from ..nn_modules.qlinear.qlinear_exllama import ExllamaQuantLinear
from ..nn_modules.qlinear.qlinear_exllamav2 import ExllamaV2QuantLinear
from ..nn_modules.qlinear.qlinear_ipex import IPEXQuantLinear
from ..nn_modules.qlinear.qlinear_marlin import MarlinQuantLinear
from ..nn_modules.qlinear.qlinear_tritonv2 import TRITON_AVAILABLE, TRITON_INSTALL_HINT, TritonV2QuantLinear
from ..nn_modules.qlinear.qlinear_torch import TorchQuantLinear
from ..nn_modules.qlinear.qlinear_tritonv2 import TRITON_AVAILABLE, TRITON_INSTALL_HINT, TritonV2QuantLinear
from ..quantization import FORMAT
from ..utils.logger import setup_logger
from .backend import BACKEND

logger = setup_logger()

Expand Down Expand Up @@ -125,10 +126,12 @@ def select_quant_linear(
if hasattr(torch, "xpu") and torch.xpu.is_available():
return IPEXQuantLinear

# Fallback to IPEX/CPU if cpu supports AVX512
# Fallback to IPEX/CPU
from device_smi import Device
if "avx512_vnni" not in Device("cpu").features:
raise ValueError("IPEX/CPU requires minimum avx512_vnni support.")

cpu_vendor = Device("cpu").vendor
if cpu_vendor != "intel":
logger.warning(f"Intel/IPEX cpu kernel is only validated and optimized for Intel cpu. Running on non-Intel cpu is not guaranteed. Current cpu vendor: `{cpu_vendor}`.")

return IPEXQuantLinear
elif backend == BACKEND.TORCH:
Expand Down
4 changes: 2 additions & 2 deletions gptqmodel/utils/marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import torch
from accelerate.utils import find_tied_parameters

from .model import recurse_getattr, recurse_setattr
from .progress import ProgressBar
from ..nn_modules.qlinear.qlinear_marlin import MarlinQuantLinear, _get_perms, unpack_qzeros
from ..quantization import FORMAT, QuantizeConfig
from ..utils.logger import setup_logger
from .model import recurse_getattr, recurse_setattr
from .progress import ProgressBar

logger = setup_logger()

Expand Down
11 changes: 5 additions & 6 deletions gptqmodel/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,17 @@
from transformers import AutoConfig, PretrainedConfig
from transformers.utils.hub import cached_file

from .backend import BACKEND
from .exllama import exllama_set_max_input_length
from .importer import select_quant_linear
from .logger import setup_logger
from .progress import ProgressBar
from ..models._const import CPU, EXLLAMA_DEFAULT_MAX_INPUT_LENGTH, EXPERT_INDEX_PLACEHOLDER, SUPPORTED_MODELS
from ..nn_modules.qlinear import BaseQuantLinear
from ..nn_modules.qlinear.qlinear_exllama import ExllamaQuantLinear
from ..nn_modules.qlinear.qlinear_exllamav2 import ExllamaV2QuantLinear
from ..nn_modules.qlinear.qlinear_marlin import MarlinQuantLinear
from ..nn_modules.qlinear.qlinear_ipex import IPEXQuantLinear
from ..nn_modules.qlinear.qlinear_marlin import MarlinQuantLinear
from ..quantization import FORMAT, QuantizeConfig
from .backend import BACKEND
from .importer import select_quant_linear
from .logger import setup_logger
from .progress import ProgressBar

logger = setup_logger()

Expand Down
3 changes: 2 additions & 1 deletion tests/test_transformers_integration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest

from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig


Expand Down Expand Up @@ -68,4 +69,4 @@ def test_quantize_ipex(self):
self._test_quantize(device_map="cpu")

def test_quantize_cuda(self):
self._test_quantize(device_map="cuda")
self._test_quantize(device_map="cuda")
Loading