Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Google style docstrings to HQQ files #2

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
/neural-compressor/neural_compressor/strategy
/neural-compressor/neural_compressor/training.py
/neural-compressor/neural_compressor/utils
/neural-compressor/neural_compressor/torch/algorithms/weight_only/hqq
47 changes: 47 additions & 0 deletions neural_compressor/torch/algorithms/weight_only/hqq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,50 @@

from .quantizer import HQQuantizer
from .config import HQQModuleConfig, QTensorConfig

class HQQuantizer:

Check notice on line 18 in neural_compressor/torch/algorithms/weight_only/hqq/__init__.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

neural_compressor/torch/algorithms/weight_only/hqq/__init__.py#L18

Too few public methods (0/2)

Check failure on line 18 in neural_compressor/torch/algorithms/weight_only/hqq/__init__.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

neural_compressor/torch/algorithms/weight_only/hqq/__init__.py#L18

class already defined line 15
"""
A class for quantizing models using the HQQ algorithm.

Attributes:
quant_config (ConfigMappingType): Configuration for quantization.

Methods:
prepare(model: torch.nn.Module, *args, **kwargs) -> Optional[torch.nn.Module]:
Prepares a given model for quantization.
convert(model: torch.nn.Module, *args, **kwargs) -> Optional[torch.nn.Module]:
Converts a prepared model to a quantized model.
save(model, path):
Saves the quantized model to the specified path.
"""

class HQQModuleConfig:

Check notice on line 34 in neural_compressor/torch/algorithms/weight_only/hqq/__init__.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

neural_compressor/torch/algorithms/weight_only/hqq/__init__.py#L34

Too few public methods (0/2)

Check failure on line 34 in neural_compressor/torch/algorithms/weight_only/hqq/__init__.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

neural_compressor/torch/algorithms/weight_only/hqq/__init__.py#L34

class already defined line 16
"""
Configuration for HQQ modules.

Attributes:
weight (QTensorConfig): Configuration for weight quantization.
scale (QTensorConfig): Configuration for scale quantization.
zero (QTensorConfig): Configuration for zero quantization.

Methods:
__repr__() -> str:
Returns a string representation of the HQQModuleConfig object.
"""

class QTensorConfig:

Check notice on line 48 in neural_compressor/torch/algorithms/weight_only/hqq/__init__.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

neural_compressor/torch/algorithms/weight_only/hqq/__init__.py#L48

Too few public methods (0/2)

Check failure on line 48 in neural_compressor/torch/algorithms/weight_only/hqq/__init__.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

neural_compressor/torch/algorithms/weight_only/hqq/__init__.py#L48

class already defined line 16
"""
Configuration for quantized tensors.

Attributes:
nbits (int): Number of bits for quantization.
channel_wise (bool): Whether to use channel-wise quantization.
group_size (int): Size of the quantization group.
optimize (bool): Whether to optimize the quantization.
round_zero (Optional[bool]): Whether to round zero.
pack (bool): Whether to pack the quantized tensor.

Methods:
__repr__() -> str:
Returns a string representation of the QTensorConfig object.
"""
126 changes: 123 additions & 3 deletions neural_compressor/torch/algorithms/weight_only/hqq/bitpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,79 @@

# Bit packing logic. format: pack/unpack_nBits_target-<uint8 or int32>
class BitPack:
"""
A class for bit packing logic.

This class provides static methods for packing and unpacking tensors
with different bit-widths.

Methods:
pack_8bit_u8(W_q): Packs an 8-bit tensor to uint8.
unpack_8bit_u8(W_q): Unpacks an 8-bit tensor from uint8.
pack_4bit_u8(W_q): Packs a 4-bit tensor to uint8.
unpack_4bit_u8(W_q): Unpacks a 4-bit tensor from uint8.
pack_2bit_u8(W_q): Packs a 2-bit tensor to uint8.
unpack_2bit_u8(W_q): Unpacks a 2-bit tensor from uint8.
pack_3bit_32(W_q_in): Packs a 3-bit tensor to int32.
unpack_3bit_32(W_q): Unpacks a 3-bit tensor from int32.
"""

# 8-bit
################################################
@staticmethod
def pack_8bit_u8(W_q):
"""
Packs an 8-bit tensor to uint8.

Args:
W_q (torch.Tensor): The tensor to be packed.

Returns:
torch.Tensor: The packed tensor.
"""
return W_q.to(torch.uint8)

@staticmethod
def unpack_8bit_u8(W_q):
"""
Unpacks an 8-bit tensor from uint8.

Args:
W_q (torch.Tensor): The tensor to be unpacked.

Returns:
torch.Tensor: The unpacked tensor.
"""
return W_q

# 4-bit
################################################
@staticmethod
def pack_4bit_u8(W_q): # uint8 > uint8/2
"""
Packs a 4-bit tensor to uint8.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Start after """


Args:
W_q (torch.Tensor): The tensor to be packed.

Returns:
torch.Tensor: The packed tensor.
"""
W_q = W_q.to(torch.uint8)
_step = int(len(W_q) / 2)
return (W_q[:_step] << 4) | W_q[_step:]

# A bit faster than the _cat version
@staticmethod
def unpack_4bit_u8(W_q): # uint8/2 > uint8
"""
Unpacks a 4-bit tensor from uint8.

Args:
W_q (torch.Tensor): The tensor to be unpacked.

Returns:
torch.Tensor: The unpacked tensor.
"""
_step = W_q.shape[0]
tmp = torch.empty([2 * _step, W_q.shape[1]], dtype=torch.uint8, device=W_q.device)
tmp[:_step] = (W_q & 0b11110000) >> 4
Expand All @@ -61,13 +113,30 @@ def unpack_4bit_u8(W_q): # uint8/2 > uint8
################################################
@staticmethod
def pack_2bit_u8(W_q): # uint8 > uint8/4
"""
Packs a 2-bit tensor to uint8.

Args:
W_q (torch.Tensor): The tensor to be packed.

Returns:
torch.Tensor: The packed tensor.
"""
W_q = W_q.to(torch.uint8)
_step = int(len(W_q) / 4)
return W_q[:_step] << 6 | W_q[_step : 2 * _step] << 4 | W_q[2 * _step : 3 * _step] << 2 | W_q[3 * _step :]

# A bit faster than the _cat version
@staticmethod
def unpack_2bit_u8(W_q):
"""
Unpacks a 2-bit tensor from uint8.

Args:
W_q (torch.Tensor): The tensor to be unpacked.

Returns:
torch.Tensor: The unpacked tensor.
"""
_step = W_q.shape[0]
tmp = torch.empty([4 * _step, W_q.shape[1]], dtype=torch.uint8, device=W_q.device)
tmp[:_step] = (W_q & 0b11000000) >> 6
Expand All @@ -80,6 +149,15 @@ def unpack_2bit_u8(W_q):
################################################
@staticmethod
def pack_3bit_32(W_q_in):
"""
Packs a 3-bit tensor to int32.

Args:
W_q_in (torch.Tensor): The tensor to be packed.

Returns:
torch.Tensor: The packed tensor.
"""
W_q = torch.zeros(
[int(10 * np.ceil(W_q_in.shape[0] / 10.0)), W_q_in.shape[1]], device=W_q_in.device, dtype=torch.int32
)
Expand All @@ -99,9 +177,17 @@ def pack_3bit_32(W_q_in):
)
return W_q

# A bit faster than _cat version
@staticmethod
def unpack_3bit_32(W_q):
"""
Unpacks a 3-bit tensor from int32.

Args:
W_q (torch.Tensor): The tensor to be unpacked.

Returns:
torch.Tensor: The unpacked tensor.
"""
_step = W_q.shape[0]
tmp = torch.empty([10 * _step, W_q.shape[1]], dtype=torch.uint8, device=W_q.device)
tmp[:_step] = (W_q & 0b00111000000000000000000000000000) >> 27
Expand All @@ -118,6 +204,22 @@ def unpack_3bit_32(W_q):


class Packer:
"""
A class for managing bit packing functions.

This class provides methods to get the appropriate packing and unpacking
functions based on the number of bits.

Attributes:
bit_to_packing (dict): A mapping from bit-width to packing format.
pack_fn_mapping (dict): A mapping from packing format to packing function.
unpack_fn_mapping (dict): A mapping from packing format to unpacking function.

Methods:
get_pack_fn(nbits): Returns the packing function for the given bit-width.
get_unpack_fn(nbits): Returns the unpacking function for the given bit-width.
"""

# TODO: Refine the packer
bit_to_packing = {8: "8bit_u8", 4: "4bit_u8", 3: "3bit_32", 2: "2bit_u8"}

Expand All @@ -137,8 +239,26 @@ class Packer:

@staticmethod
def get_pack_fn(nbits: int):
"""
Returns the packing function for the given bit-width.

Args:
nbits (int): The bit-width.

Returns:
function: The packing function.
"""
return Packer.pack_fn_mapping[Packer.bit_to_packing[nbits]]

@staticmethod
def get_unpack_fn(nbits: int):
"""
Returns the unpacking function for the given bit-width.

Args:
nbits (int): The bit-width.

Returns:
function: The unpacking function.
"""
return Packer.unpack_fn_mapping[Packer.bit_to_packing[nbits]]
25 changes: 25 additions & 0 deletions neural_compressor/torch/algorithms/weight_only/hqq/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@


class HQQGlobalOptions:
"""
Global options for HQQ.

Attributes:
use_half (bool): Whether to use half precision.
"""
use_half = os.getenv("HQQ_NOT_USE_HALF", "0") == "0"


Expand All @@ -41,6 +47,17 @@ class HQQGlobalOptions:

@dataclass
class QTensorConfig:
"""
Configuration for quantized tensors.

Attributes:
nbits (int): Number of bits for quantization.
channel_wise (bool): Whether to use channel-wise quantization.
group_size (int): Size of the quantization group.
optimize (bool): Whether to optimize the quantization.
round_zero (Optional[bool]): Whether to round zero.
pack (bool): Whether to pack the quantized tensor.
"""
nbits: int
channel_wise: bool = True
group_size: int = 128
Expand All @@ -67,6 +84,14 @@ class HQQModuleConfig(
["weight", "scale", "zero"],
)
):
"""
Configuration for HQQ modules.

Attributes:
weight (QTensorConfig): Configuration for weight quantization.
scale (QTensorConfig): Configuration for scale quantization.
zero (QTensorConfig): Configuration for zero quantization.
"""
def __new__(
cls,
weight=default_weight_quant_config,
Expand Down
Loading
Loading