From 35cb79d7e98f7e9bb028eb65d474d5e8231c820c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Jul 2024 03:33:20 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../torch/algorithms/weight_only/gptq.py | 4 +- .../torch/algorithms/weight_only/modules.py | 77 ++++++++++--------- .../torch/algorithms/weight_only/rtn.py | 10 +-- 3 files changed, 46 insertions(+), 45 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py index 82b73c4213b..0b138fc80e2 100644 --- a/neural_compressor/torch/algorithms/weight_only/gptq.py +++ b/neural_compressor/torch/algorithms/weight_only/gptq.py @@ -581,7 +581,7 @@ def tmp(_, inp, out): state_dict = torch.load(LWQ_WORKSPACE + f"/{self.get_full_layer_name(layer_name, block_idx)}.pt") Q = state_dict["weight"].data bias = state_dict["bias"] if "bias" in state_dict.keys() else None - + else: Q = sub_layers[layer_name].weight.data if weight_config_this_layer["act_order"]: @@ -614,7 +614,7 @@ def tmp(_, inp, out): if not self.use_layer_wise: bias = sub_layers[layer_name].bias - + new_module = WeightOnlyLinear( in_features, out_features, diff --git a/neural_compressor/torch/algorithms/weight_only/modules.py b/neural_compressor/torch/algorithms/weight_only/modules.py index 47d51560612..7b0aae9589b 100644 --- a/neural_compressor/torch/algorithms/weight_only/modules.py +++ b/neural_compressor/torch/algorithms/weight_only/modules.py @@ -19,11 +19,11 @@ # since the model classes inherit torch.nn.Module. import math +import numba import numpy as np import torch from torch.autograd import Function from torch.nn import functional as F -import numba from neural_compressor.torch.utils import accelerator, logger @@ -301,11 +301,11 @@ def unpack_tensor_with_torch(self, packed_tensor): unpacked_tensor[:, index].copy_(tmp.type(target_dtype)) accelerator.synchronize() return unpacked_tensor - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b4_c32( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -319,11 +319,11 @@ def pack_array_with_numba_b4_c32( | (raw_array[:, i * n_pack] & 0b1111) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b4_c16( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -333,23 +333,20 @@ def pack_array_with_numba_b4_c16( | (raw_array[:, i * n_pack] & 0b1111) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b4_c8( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): - packed_array[:, i] = ( - ((raw_array[:, i * n_pack + 1] & 0b1111) << 4) - | (raw_array[:, i * n_pack] & 0b1111) - ) + packed_array[:, i] = ((raw_array[:, i * n_pack + 1] & 0b1111) << 4) | (raw_array[:, i * n_pack] & 0b1111) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b4_c64( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -372,11 +369,10 @@ def pack_array_with_numba_b4_c64( ) return packed_array - @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b8_c32( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -386,11 +382,11 @@ def pack_array_with_numba_b8_c32( | (raw_array[:, i * n_pack] & 0b11111111) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b8_c16( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -400,20 +396,20 @@ def pack_array_with_numba_b8_c16( | (raw_array[:, i * n_pack] & 0b11111111) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b8_c8( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): - packed_array[:, i] = (raw_array[:, i * n_pack] & 0b11111111) + packed_array[:, i] = raw_array[:, i * n_pack] & 0b11111111 return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b8_c64( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -427,11 +423,11 @@ def pack_array_with_numba_b8_c64( | (raw_array[:, i * n_pack] & 0b11111111) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b2_c32( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -457,7 +453,7 @@ def pack_array_with_numba_b2_c32( @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b2_c16( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -471,11 +467,11 @@ def pack_array_with_numba_b2_c16( | (raw_array[:, i * n_pack] & 0b11) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b2_c8( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -485,11 +481,11 @@ def pack_array_with_numba_b2_c8( | (raw_array[:, i * n_pack] & 0b11) ) return packed_array - + @staticmethod @numba.jit(nopython=True, parallel=True) def pack_array_with_numba_b2_c64( - raw_array: np.ndarray, packed_array:np.ndarray, n_pack: int, new_in_features:int + raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int ) -> np.ndarray: for i in range(new_in_features): packed_array[:, i] = ( @@ -527,7 +523,7 @@ def pack_array_with_numba_b2_c64( | (raw_array[:, i * n_pack] & 0b11) ) return packed_array - + def pack_array_with_numba( self, raw_array: np.ndarray, n_pack: int, bits: int, compress_bits: int, compression_dtype=np.int32 ) -> np.ndarray: @@ -547,17 +543,18 @@ def pack_array_with_numba( new_in_features = (in_features + n_pack - 1) // n_pack packed_array = np.zeros((out_features, new_in_features), dtype=compression_dtype) raw_array = raw_array.astype(compression_dtype) - + pack_method_name = f"pack_array_with_numba_b{bits}_c{compress_bits}" pack_method = getattr(self, pack_method_name) return pack_method(raw_array, packed_array, n_pack, new_in_features) - + @staticmethod @numba.jit(nopython=True) def pack_array_with_numba_yi( raw_tensor: np.ndarray, n_pack: int, bits: int, compression_dtype=np.int32 ) -> np.ndarray: """Packs the input tensor by combining elements into a specified bit-width format using NumPy. + Args: raw_tensor (np.ndarray): The tensor to be packed. Shape: [out_features, in_features] or [1, in_features]. n_pack (int): The number of elements to be packed together. @@ -575,7 +572,7 @@ def pack_array_with_numba_yi( for i in range(new_in_features): packed_tensor[:, i] = ( (raw_tensor[:, i * n_pack + 7] << 28) - | (raw_tensor[:, i * n_pack + 6] << 24) + | (raw_tensor[:, i * n_pack + 6] << 24) | (raw_tensor[:, i * n_pack + 5] << 20) | (raw_tensor[:, i * n_pack + 4] << 16) | (raw_tensor[:, i * n_pack + 3] << 12) @@ -585,7 +582,7 @@ def pack_array_with_numba_yi( ) return packed_tensor - + def pack_tensor_with_reshape(self, raw_tensor): raw_array = raw_tensor.cpu().numpy() target_len = np.ceil(raw_array.shape[1] / self.n_pack).astype(int) @@ -593,9 +590,11 @@ def pack_tensor_with_reshape(self, raw_tensor): reshaped = raw_array.reshape(-1, self.n_pack) packed_array = np.zeros(reshaped.shape[0], dtype=target_dtype) for i in range(self.n_pack): - packed_array |= (reshaped[:, i].astype(target_dtype) << (self.bits * i)) - - packed_tensor = torch.from_numpy(packed_array.reshape((raw_array.shape[0], target_len))).to(device=raw_tensor.device) + packed_array |= reshaped[:, i].astype(target_dtype) << (self.bits * i) + + packed_tensor = torch.from_numpy(packed_array.reshape((raw_array.shape[0], target_len))).to( + device=raw_tensor.device + ) return packed_tensor def pack_tensor_with_numpy(self, raw_tensor): @@ -603,7 +602,9 @@ def pack_tensor_with_numpy(self, raw_tensor): return self.pack_tensor_with_reshape(raw_tensor) compression_dtype = torch.tensor(0, dtype=self.compression_dtype).numpy().dtype # packed_array = self.pack_array_with_numba_yi(raw_tensor.cpu().numpy(), self.n_pack, self.bits, compression_dtype) - packed_array = self.pack_array_with_numba(raw_tensor.cpu().numpy(), self.n_pack, self.bits, self.compress_bits, compression_dtype) + packed_array = self.pack_array_with_numba( + raw_tensor.cpu().numpy(), self.n_pack, self.bits, self.compress_bits, compression_dtype + ) return torch.from_numpy(packed_array).to(device=raw_tensor.device) def unpack_tensor_with_numpy(self, packed_tensor): diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py index 6a1de840b2d..f0e90fe00e5 100644 --- a/neural_compressor/torch/algorithms/weight_only/rtn.py +++ b/neural_compressor/torch/algorithms/weight_only/rtn.py @@ -124,7 +124,7 @@ def convert( "double_quant_group_size": kwargs.get("double_quant_group_size", 256), } use_optimum_format = kwargs.get("use_optimum_format", True) - + if use_layer_wise: from neural_compressor.common.utils import DEFAULT_WORKSPACE from neural_compressor.torch.algorithms.layer_wise.utils import get_path, load_module, register_weight_hooks @@ -135,10 +135,10 @@ def convert( model_path = get_path(model_path) register_weight_hooks(model, model_path, device=device, clean_weight=True) - + for name, m in model.named_modules(): - - if not isinstance(m, supported_layers): + + if not isinstance(m, supported_layers): continue if name in weight_config: # pragma: no cover # initialize op configuration @@ -185,7 +185,7 @@ def convert( continue logger.debug(f"RTN quantized module:{name, m}") logger.debug(log_msg) - + if use_layer_wise: load_module(model, name, model_path, device=device)