Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jul 12, 2024
1 parent 483c219 commit 35cb79d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 45 deletions.
4 changes: 2 additions & 2 deletions neural_compressor/torch/algorithms/weight_only/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ def tmp(_, inp, out):
state_dict = torch.load(LWQ_WORKSPACE + f"/{self.get_full_layer_name(layer_name, block_idx)}.pt")
Q = state_dict["weight"].data
bias = state_dict["bias"] if "bias" in state_dict.keys() else None

else:
Q = sub_layers[layer_name].weight.data
if weight_config_this_layer["act_order"]:
Expand Down Expand Up @@ -614,7 +614,7 @@ def tmp(_, inp, out):

if not self.use_layer_wise:
bias = sub_layers[layer_name].bias

new_module = WeightOnlyLinear(
in_features,
out_features,
Expand Down
77 changes: 39 additions & 38 deletions neural_compressor/torch/algorithms/weight_only/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
# since the model classes inherit torch.nn.Module.
import math

import numba
import numpy as np
import torch
from torch.autograd import Function
from torch.nn import functional as F
import numba

from neural_compressor.torch.utils import accelerator, logger

Expand Down Expand Up @@ -301,11 +301,11 @@ def unpack_tensor_with_torch(self, packed_tensor):
unpacked_tensor[:, index].copy_(tmp.type(target_dtype))
accelerator.synchronize()
return unpacked_tensor

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b4_c32(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -319,11 +319,11 @@ def pack_array_with_numba_b4_c32(
| (raw_array[:, i * n_pack] & 0b1111)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b4_c16(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -333,23 +333,20 @@ def pack_array_with_numba_b4_c16(
| (raw_array[:, i * n_pack] & 0b1111)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b4_c8(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
((raw_array[:, i * n_pack + 1] & 0b1111) << 4)
| (raw_array[:, i * n_pack] & 0b1111)
)
packed_array[:, i] = ((raw_array[:, i * n_pack + 1] & 0b1111) << 4) | (raw_array[:, i * n_pack] & 0b1111)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b4_c64(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -372,11 +369,10 @@ def pack_array_with_numba_b4_c64(
)
return packed_array


@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b8_c32(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -386,11 +382,11 @@ def pack_array_with_numba_b8_c32(
| (raw_array[:, i * n_pack] & 0b11111111)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b8_c16(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -400,20 +396,20 @@ def pack_array_with_numba_b8_c16(
| (raw_array[:, i * n_pack] & 0b11111111)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b8_c8(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (raw_array[:, i * n_pack] & 0b11111111)
packed_array[:, i] = raw_array[:, i * n_pack] & 0b11111111
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b8_c64(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -427,11 +423,11 @@ def pack_array_with_numba_b8_c64(
| (raw_array[:, i * n_pack] & 0b11111111)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b2_c32(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -457,7 +453,7 @@ def pack_array_with_numba_b2_c32(
@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b2_c16(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -471,11 +467,11 @@ def pack_array_with_numba_b2_c16(
| (raw_array[:, i * n_pack] & 0b11)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b2_c8(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand All @@ -485,11 +481,11 @@ def pack_array_with_numba_b2_c8(
| (raw_array[:, i * n_pack] & 0b11)
)
return packed_array

@staticmethod
@numba.jit(nopython=True, parallel=True)
def pack_array_with_numba_b2_c64(
raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int
raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
) -> np.ndarray:
for i in range(new_in_features):
packed_array[:, i] = (
Expand Down Expand Up @@ -527,7 +523,7 @@ def pack_array_with_numba_b2_c64(
| (raw_array[:, i * n_pack] & 0b11)
)
return packed_array

def pack_array_with_numba(
self, raw_array: np.ndarray, n_pack: int, bits: int, compress_bits: int, compression_dtype=np.int32
) -> np.ndarray:
Expand All @@ -547,17 +543,18 @@ def pack_array_with_numba(
new_in_features = (in_features + n_pack - 1) // n_pack
packed_array = np.zeros((out_features, new_in_features), dtype=compression_dtype)
raw_array = raw_array.astype(compression_dtype)

pack_method_name = f"pack_array_with_numba_b{bits}_c{compress_bits}"
pack_method = getattr(self, pack_method_name)
return pack_method(raw_array, packed_array, n_pack, new_in_features)

@staticmethod
@numba.jit(nopython=True)
def pack_array_with_numba_yi(
raw_tensor: np.ndarray, n_pack: int, bits: int, compression_dtype=np.int32
) -> np.ndarray:
"""Packs the input tensor by combining elements into a specified bit-width format using NumPy.
Args:
raw_tensor (np.ndarray): The tensor to be packed. Shape: [out_features, in_features] or [1, in_features].
n_pack (int): The number of elements to be packed together.
Expand All @@ -575,7 +572,7 @@ def pack_array_with_numba_yi(
for i in range(new_in_features):
packed_tensor[:, i] = (
(raw_tensor[:, i * n_pack + 7] << 28)
| (raw_tensor[:, i * n_pack + 6] << 24)
| (raw_tensor[:, i * n_pack + 6] << 24)
| (raw_tensor[:, i * n_pack + 5] << 20)
| (raw_tensor[:, i * n_pack + 4] << 16)
| (raw_tensor[:, i * n_pack + 3] << 12)
Expand All @@ -585,25 +582,29 @@ def pack_array_with_numba_yi(
)

return packed_tensor

def pack_tensor_with_reshape(self, raw_tensor):
raw_array = raw_tensor.cpu().numpy()
target_len = np.ceil(raw_array.shape[1] / self.n_pack).astype(int)
target_dtype = torch.tensor(0, dtype=self.compression_dtype).numpy().dtype
reshaped = raw_array.reshape(-1, self.n_pack)
packed_array = np.zeros(reshaped.shape[0], dtype=target_dtype)
for i in range(self.n_pack):
packed_array |= (reshaped[:, i].astype(target_dtype) << (self.bits * i))

packed_tensor = torch.from_numpy(packed_array.reshape((raw_array.shape[0], target_len))).to(device=raw_tensor.device)
packed_array |= reshaped[:, i].astype(target_dtype) << (self.bits * i)

packed_tensor = torch.from_numpy(packed_array.reshape((raw_array.shape[0], target_len))).to(
device=raw_tensor.device
)
return packed_tensor

def pack_tensor_with_numpy(self, raw_tensor):
if self.bits not in [2, 4, 8]:
return self.pack_tensor_with_reshape(raw_tensor)
compression_dtype = torch.tensor(0, dtype=self.compression_dtype).numpy().dtype
# packed_array = self.pack_array_with_numba_yi(raw_tensor.cpu().numpy(), self.n_pack, self.bits, compression_dtype)
packed_array = self.pack_array_with_numba(raw_tensor.cpu().numpy(), self.n_pack, self.bits, self.compress_bits, compression_dtype)
packed_array = self.pack_array_with_numba(
raw_tensor.cpu().numpy(), self.n_pack, self.bits, self.compress_bits, compression_dtype
)
return torch.from_numpy(packed_array).to(device=raw_tensor.device)

def unpack_tensor_with_numpy(self, packed_tensor):
Expand Down
10 changes: 5 additions & 5 deletions neural_compressor/torch/algorithms/weight_only/rtn.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def convert(
"double_quant_group_size": kwargs.get("double_quant_group_size", 256),
}
use_optimum_format = kwargs.get("use_optimum_format", True)

if use_layer_wise:
from neural_compressor.common.utils import DEFAULT_WORKSPACE
from neural_compressor.torch.algorithms.layer_wise.utils import get_path, load_module, register_weight_hooks
Expand All @@ -135,10 +135,10 @@ def convert(
model_path = get_path(model_path)

register_weight_hooks(model, model_path, device=device, clean_weight=True)

for name, m in model.named_modules():
if not isinstance(m, supported_layers):

if not isinstance(m, supported_layers):
continue
if name in weight_config: # pragma: no cover
# initialize op configuration
Expand Down Expand Up @@ -185,7 +185,7 @@ def convert(
continue
logger.debug(f"RTN quantized module:{name, m}")
logger.debug(log_msg)

if use_layer_wise:
load_module(model, name, model_path, device=device)

Expand Down

0 comments on commit 35cb79d

Please sign in to comment.