From 0c52e1243b78734e95fc348834303bc3c3cfe369 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 23 Jul 2024 09:59:17 +0800
Subject: [PATCH] Add docstring for WOQ&LayerWise (#1938)

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: xinhe <xin3.he@intel.com>
---
 .../scripts/codeScan/pydocstyle/scan_path.txt |   3 +-
 .../torch/algorithms/layer_wise/load.py       |   5 +-
 .../algorithms/layer_wise/modified_pickle.py  |   8 +-
 .../torch/algorithms/layer_wise/utils.py      |  35 ++++
 .../torch/algorithms/weight_only/__init__.py  |   2 +-
 .../torch/algorithms/weight_only/autoround.py |  21 ++-
 .../torch/algorithms/weight_only/awq.py       |  28 ++-
 .../torch/algorithms/weight_only/gptq.py      | 101 ++++++++++-
 .../torch/algorithms/weight_only/modules.py   | 102 ++++++++++-
 .../torch/algorithms/weight_only/rtn.py       |   4 +-
 .../torch/algorithms/weight_only/save_load.py |  34 +++-
 .../torch/algorithms/weight_only/teq.py       |  70 +++++---
 .../torch/algorithms/weight_only/utility.py   |  92 ++++++++--
 .../torch/quantization/config.py              | 164 +++++++++++++++++-
 14 files changed, 599 insertions(+), 70 deletions(-)

diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
index b5a69eaa938..1acfa95c75b 100644
--- a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
+++ b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
@@ -20,4 +20,5 @@
 /neural_compressor/torch/algorithms/pt2e_quant
 /neural_compressor/torch/export
 /neural_compressor/common
-/neural_compressor/torch/algorithms/weight_only/hqq
+/neural_compressor/torch/algorithms/weight_only
+/neural_compressor/torch/algorithms/layer_wise
\ No newline at end of file
diff --git a/neural_compressor/torch/algorithms/layer_wise/load.py b/neural_compressor/torch/algorithms/layer_wise/load.py
index a883bfe3848..a5176104b76 100644
--- a/neural_compressor/torch/algorithms/layer_wise/load.py
+++ b/neural_compressor/torch/algorithms/layer_wise/load.py
@@ -152,8 +152,7 @@ def load(
     # The first line of this docstring overrides the one Sphinx generates for the
     # documentation. We need it so that Sphinx doesn't leak `pickle`s path from
     # the build environment (e.g. `<module 'pickle' from '/leaked/path').
-
-    """Load(f, map_location=None, pickle_module=pickle, *, weights_only=False, **pickle_load_args)
+    """Load(f, map_location=None, pickle_module=pickle, *, weights_only=False, **pickle_load_args).
 
     Loads an object saved with :func:`torch.save` from a file.
 
@@ -198,6 +197,8 @@ def load(
         pickle_load_args: (Python 3 only) optional keyword arguments passed over to
             :func:`pickle_module.load` and :func:`pickle_module.Unpickler`, e.g.,
             :attr:`errors=...`.
+        prefix (str): the module prefix name.
+        tensor_name (str): the tensor name.
 
     .. warning::
         :func:`torch.load()` unless `weights_only` parameter is set to `True`,
diff --git a/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py b/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py
index eac4ce343e0..f336930e718 100644
--- a/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py
+++ b/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py
@@ -94,15 +94,13 @@ class PickleError(Exception):
 
 
 class PicklingError(PickleError):
-    """This exception is raised when an unpicklable object is passed to the
-    dump() method."""
+    """This exception is raised when an unpicklable object is passed to the dump() method."""
 
     pass
 
 
 class UnpicklingError(PickleError):
-    """This exception is raised when there is a problem unpickling an object,
-    such as a security violation.
+    """This exception is raised when there is a problem unpickling an object, such as a security violation.
 
     Note that other exceptions may also be raised during unpickling, including
     (but not necessarily limited to) AttributeError, EOFError, ImportError,
@@ -367,6 +365,7 @@ def whichmodule(obj, name):  # pragma: no cover
 
 def encode_long(x):  # pragma: no cover
     r"""Encode a long to a two's complement little-endian binary string.
+
     Note that 0 is a special case, returning an empty string, to save a
     byte in the LONG1 pickling context.
 
@@ -503,7 +502,6 @@ def dump(self, obj):
 
     def memoize(self, obj):
         """Store an object in the memo."""
-
         # The Pickler memo is a dictionary mapping object ids to 2-tuples
         # that contain the Unpickler memo key and the object being memoized.
         # The memo key is written to the pickle and will become
diff --git a/neural_compressor/torch/algorithms/layer_wise/utils.py b/neural_compressor/torch/algorithms/layer_wise/utils.py
index 0cbe23d5cca..49cd19efde2 100644
--- a/neural_compressor/torch/algorithms/layer_wise/utils.py
+++ b/neural_compressor/torch/algorithms/layer_wise/utils.py
@@ -35,7 +35,10 @@
 
 
 class QDQLayer(torch.nn.Module):
+    """Quantized and Dequantized Layer."""
+
     def __init__(self, module, input_scale=None) -> None:
+        """Init the QDQLayer object."""
         super().__init__()
         self.quant = torch.ao.quantization.QuantStub()
         self.module = module
@@ -43,6 +46,7 @@ def __init__(self, module, input_scale=None) -> None:
         self.input_scale = input_scale
 
     def forward(self, X):
+        """Forward function."""
         if self.input_scale is not None:
             X = torch.mul(X, self.input_scale)
         X = self.quant(X)
@@ -220,6 +224,16 @@ def _get_path(pretrained_model_name_or_path):
 
 
 def load_value(model, param_name, path):
+    """Load the module value.
+
+    Args:
+        model (torch.nn.module): torch model.
+        param_name (str): module name.
+        path (str): path to load state_dict per layer.
+
+    Returns:
+        tensor: the module value.
+    """
     if "lm_head" in param_name and getattr(model.config, "tie_word_embeddings", True):
         input_embeddings = model.get_input_embeddings()
         modules = get_named_children(model)
@@ -235,6 +249,14 @@ def load_value(model, param_name, path):
 
 
 def load_module(model, module_name, path, device="cpu"):
+    """Load all named parameters of module.
+
+    Args:
+        model (torch.nn.module): torch model.
+        module_name (str): module name.
+        path (str): path to load state_dict per layer.
+        device (str, optional): module device. Defaults to "cpu".
+    """
     module = get_module(model, module_name)
     for n, p in module.named_parameters():
         param_name = module_name + "." + n
@@ -243,6 +265,18 @@ def load_module(model, module_name, path, device="cpu"):
 
 
 def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None):
+    """Register weight hooks for model.
+
+    Args:
+        model (torch.nn.module): torch model.
+        path (str): path to load state_dict per layer.
+        device (str, optional): module device. Defaults to "cpu".
+        clean_weight (bool, optional): to clean model weight. Defaults to True.
+        saved_path (str, optional): path to save module weight. Defaults to None.
+
+    Returns:
+        list: handlers.
+    """
     if saved_path:
         os.makedirs(saved_path, exist_ok=True)
 
@@ -280,6 +314,7 @@ def hook(module, input, output):
 
 
 def clean_module_weight(module):
+    """Clean module weight."""
     if isinstance(module, QDQLayer):
         submodule = module.module
     else:
diff --git a/neural_compressor/torch/algorithms/weight_only/__init__.py b/neural_compressor/torch/algorithms/weight_only/__init__.py
index fc9ef0a5b3b..3ff6ec8b145 100644
--- a/neural_compressor/torch/algorithms/weight_only/__init__.py
+++ b/neural_compressor/torch/algorithms/weight_only/__init__.py
@@ -11,6 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""Weight-Only algorithms."""
 
 from .save_load import save, load
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index 6f5a022cfee..9ff488573c0 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""AutoRound quantization."""
 import copy
 import json
 import time
@@ -28,6 +28,8 @@
 
 
 class AutoRoundQuantizer(Quantizer):
+    """AutoRound Quantizer."""
+
     def __init__(
         self,
         quant_config: dict = {},
@@ -94,11 +96,11 @@ def __init__(
             lr_scheduler: The learning rate scheduler to be used.
             dataset (str): The default dataset name (default is "NeelNanda/pile-10k").
             enable_quanted_input (bool): Whether to use the output of the previous quantized block as
-                the input for the current block (default is True).
+                                            the input for the current block (default is True).
             enable_minmax_tuning (bool): Whether to enable weight min-max tuning (default is True).
             lr (float): The learning rate (default is None, will be set to 1.0/iters).
             minmax_lr (float): The learning rate for min-max tuning
-                (default is None, it will be set to lr automatically).
+                                    (default is None, it will be set to lr automatically).
             low_gpu_mem_usage (bool): Whether to use low GPU memory (default is True).
             iters (int): Number of iterations (default is 200).
             seqlen (int): Data length of the sequence for tuning (default is 2048).
@@ -111,7 +113,7 @@ def __init__(
             dynamic_max_gap (int): The dynamic maximum gap (default is -1).
             data_type (str): The data type to be used (default is "int").
             scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
-                have different choices.
+                                    have different choices.
             multimodal(bool): Enable multimodal model quantization, (default is "False").
             act_bits (int): Number of bits for activation quantization. Default is 32.
             act_group_size (int): Group size for activation quantization. Default is None.
@@ -153,6 +155,7 @@ def __init__(
 
     def prepare(self, model: torch.nn.Module, *args, **kwargs):
         """Prepares a given model for quantization.
+
         Args:
             model (torch.nn.Module): The model to be prepared.
 
@@ -163,6 +166,14 @@ def prepare(self, model: torch.nn.Module, *args, **kwargs):
         return prepare_model
 
     def convert(self, model: torch.nn.Module, *args, **kwargs):
+        """Convert the prepared model to a quantized model.
+
+        Args:
+            model (torch.nn.Module): the prepared model
+
+        Returns:
+            The quantized model.
+        """
         dataloader = CapturedDataloader(model.args_list, model.kwargs_list)
         model = model.orig_model
         rounder = AutoRound(
@@ -216,7 +227,7 @@ def get_dataloader(tokenizer, seqlen, dataset_name="NeelNanda/pile-10k", seed=42
         split (str, optional): The data split to use. Defaults to None.
         seed (int, optional): The random seed for reproducibility. Defaults to 42.
         bs (int, optional): The batch size. Defaults to 4.
-        n_samples (int, optional): The total number of samples to include. Defaults to 512.
+        nsamples (int, optional): The total number of samples to include. Defaults to 128.
 
     Returns:
         DataLoader: The DataLoader for the calibrated dataset.
diff --git a/neural_compressor/torch/algorithms/weight_only/awq.py b/neural_compressor/torch/algorithms/weight_only/awq.py
index b8c4329de3b..63ae6b08564 100644
--- a/neural_compressor/torch/algorithms/weight_only/awq.py
+++ b/neural_compressor/torch/algorithms/weight_only/awq.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""AWQ quantization."""
 # Copied from neural_compressor/adaptor/torch_utils/awq.py
 
 import copy
@@ -40,11 +40,16 @@ def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={}
     """Get absorbed layer per block.
 
     Args:
-        model (torch.nn.Module): input model
-        example_inputs: example_inputs
+        model (torch.nn.Module): input model.
+        example_inputs (tensor/tuple/dict, optional): used to trace torch model.
+        folding (bool, optional): whether only allow update scale when it can be fold
+                                    to upper layer. Defaults to False.
+        weight_config (dict, optional): the quantization configuration. Defaults to {}.
 
     Returns:
-        block_absorb_dict: dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...}
+        block_absorb_dict: The dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...}
+        absorb_layer_dict: The layer dict that scale can be absorbed. The dict is the inverse of
+                                block_absorb_dict for all blocks.
     """
     block_absorb_dict = {}  # record absorbed layer per block
     absorb_layer_dict = {}  # record absorb layers for absorbed layers
@@ -94,10 +99,12 @@ def _get_absorb_dict(model, absorb_layer_dict):
 
     Args:
         model (torch.nn.Module): input model
-        absorb_layer_dict (dict): The layer dict that scale can be absorbed, default is {}.
+        absorb_layer_dict (dict): The layer type dict that scale can be absorbed, default is {}.
 
     Returns:
         block_absorb_dict: dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...}
+        new_absorb_layer_dict: The layer dict that scale can be absorbed. The dict is the inverse of
+                                block_absorb_dict for all blocks.
     """
     block_absorb_dict = {}
     block_prefix, block_num = get_block_prefix(model)
@@ -121,6 +128,15 @@ def _get_absorb_dict(model, absorb_layer_dict):
 
 @torch.no_grad()
 def _get_weight_scale(weight, q_group_size=-1):
+    """Get scale for weight.
+
+    Args:
+        weight (tensor): input weight
+        q_group_size (int, optional): how many elements share one scale/zp. Defaults to -1.
+
+    Returns:
+        scale: the scale of input weight.
+    """
     org_shape = weight.shape
     if q_group_size > 0:
         weight = weight.view(-1, q_group_size)
@@ -526,6 +542,8 @@ def module_inference(self, model, inputs):
 
 
 class AWQQuantizer(Quantizer):
+    """AWQ Quantizer."""
+
     def __init__(self, quant_config: OrderedDict = {}, absorb_layer_dict: dict = {}):
         """Init an AWQQuantizer object.
 
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
index eae9f7c3a84..43bf5061bfa 100644
--- a/neural_compressor/torch/algorithms/weight_only/gptq.py
+++ b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -14,6 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""GPTQ quantization."""
 
 import gc
 import math
@@ -181,7 +182,8 @@ def __init__(
         *args,
         **kwargs,
     ):
-        """
+        """Init RAWGPTQuantizer.
+
         Args:
             model: the fp32 model to quantize
             weight_config (dict, optional): contains all info required by GPTQ. Defaults to {}. For example,
@@ -196,10 +198,13 @@ def __init__(
                 }
                 ...
             }
+            nsamples (int): the number of calibration data samples.
+            use_max_length (bool): set all sequence length to be same length.
+            max_seq_length (int): the same length of all sequence length.
             dataloader: an iterable containing calibration datasets, contains (inputs, targets)
             use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
             model_path (str): Model path that is used to load state_dict per layer.
-            device: cpu or cuda
+            device (str): cpu or cuda.
         """
         # model
         self.model = model
@@ -246,6 +251,11 @@ def __init__(
         self.nsamples = nsamples
 
     def prepare_layer_wise(self, model_path):
+        """Prepare for layer-wise quantization, including registering hooks and setting up the model path.
+
+        Args:
+            model_path (str): Model path that is used to load state_dict per layer.
+        """
         import os
 
         from neural_compressor.torch.algorithms.layer_wise import LWQ_WORKSPACE, get_path, register_weight_hooks
@@ -260,12 +270,20 @@ def prepare_layer_wise(self, model_path):
         )
 
     def get_full_layer_name(self, sub_layer_name, block_idx):
+        """Get full layer name.
+
+        Args:
+            sub_layer_name (str): sub layer name
+            block_idx (int): index of block
+
+        Returns:
+            str: The full name of layer.
+        """
         transformer_name = self.gptq_related_blocks["transformers_name"]
         return ".".join([transformer_name, str(block_idx), sub_layer_name])
 
     def check_layer_config(self):
         """Copy arguments from weight_config to built-in attributes."""
-
         for layer_name, config in self.weight_config.items():
             self.weight_config[layer_name]["dtype"] = config.get("dtype", self.dtype_default)
             self.weight_config[layer_name]["bits"] = config.get("bits", self.bits_default)
@@ -314,6 +332,14 @@ def get_layer_config(self, layer_name):
         return config
 
     def track_hidden_states(self, data):
+        """Track hidden states.
+
+        Args:
+            data (tensor/tuple/list): input data.
+
+        Returns:
+            tensor.
+        """
         if isinstance(data, torch.Tensor):
             return data
         elif isinstance(data, tuple) or isinstance(data, list):
@@ -382,6 +408,7 @@ def model_forward(model, *args, **kwargs):
 
     @torch.no_grad()
     def remove_prepare_for_calibration(self):
+        """Prepare for GPTQ quantization."""
         # output inp data shape
         logger.info("All calibration data's shape =>")
         # check all hidden_states shape
@@ -404,6 +431,15 @@ def remove_prepare_for_calibration(self):
         logger.info("GPTQ quantization prepared.")
 
     def gather_single_batch_from_dict(self, data_dict, idx):
+        """Gather single batch from a dict.
+
+        Args:
+            data_dict (dict): data dict.
+            idx (int): index
+
+        Returns:
+            dict: single batch.
+        """
         # obtain a set of keyword input from cache
         single_batch = {}
         for k, v in data_dict.items():
@@ -411,6 +447,15 @@ def gather_single_batch_from_dict(self, data_dict, idx):
         return single_batch
 
     def gather_single_batch_from_list(self, data_list, idx):
+        """Gather single batch from a list.
+
+        Args:
+            data_dict (dict): data list.
+            idx (int): index
+
+        Returns:
+            list: single batch.
+        """
         # obtain a set of keyword input from cache
         single_batch = []
         for data_item in data_list:
@@ -418,6 +463,11 @@ def gather_single_batch_from_list(self, data_list, idx):
         return single_batch
 
     def update_blockwise_hidden_states(self, outs):
+        """Update the blockwise hidden states.
+
+        Args:
+            outs: the output of block.
+        """
         if "hidden_states" in self.cache_key_arguments:
             self.cache_key_arguments["hidden_states"] = outs[:]
         else:
@@ -645,12 +695,13 @@ def tmp(_, inp, out):
 
 
 class GPTQ:
-    """
-    Please refer to:
+    """Please refer to the following.
+
     GPTQ: Accurate Post-training Compression for Generative Pretrained Transformers (https://arxiv.org/abs/2210.17323)
     """
 
     def __init__(self, layer, W, device="cpu"):
+        """Init GPTQ."""
         self.layer = layer
         self.device = device
         # W = layer.weight.data.clone()
@@ -666,6 +717,7 @@ def __init__(self, layer, W, device="cpu"):
         self.perm = None  # act_order choice
 
     def add_batch(self, inp, out):
+        """Add inputs and outputs to gptq object."""
         # if DEBUG:
         #     self.inp1 = inp
         #     self.out1 = out
@@ -697,6 +749,22 @@ def add_batch(self, inp, out):
         self.H += inp.matmul(inp.t())  # H = X*X, which should be a sym matrix
 
     def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=False, static_groups=False):
+        """Run quantization.
+
+        Args:
+            W (tensor): weight tensor.
+            block_size (int): Execute quantization per block, block shape = [C_out, block_size]. Default to 128.
+            percdamp (float): percdamp (float): Percentage of Hessian's diagonal values' average, which will be added
+                                to Hessian's diagonal to increase numerical stability. Defaults to 0.01.
+            groupsize (int): Size of weight groups. Defaults to -1.
+            act_order (bool): Whether to sort Hessian's diagonal values to rearrange channel-wise quantization order.
+                                Defaults to False.
+            static_groups (bool): Whether to calculate group wise quantization parameters in advance. This option
+                                    mitigate actorder's extra computational requirements. Default to False.
+
+        Returns:
+            scale, zero, Q
+        """
         # W = self.layer.weight.data.clone()
         weight_shape, weight_dtype = W.shape, W.data.dtype
         if isinstance(self.layer, nn.Conv2d):
@@ -824,6 +892,7 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F
         return scale, zero, Q
 
     def free(self):
+        """Free memory."""
         if DEBUG:
             self.inp1 = None
             self.out1 = None
@@ -834,13 +903,17 @@ def free(self):
 
 
 class Quantizer(nn.Module):
+    """Quantizer."""
+
     def __init__(self, shape=1):
+        """Init Quantizer."""
         super(Quantizer, self).__init__()
         self.maxq = 0
         self.register_buffer("scale", torch.zeros(shape))
         self.register_buffer("zero", torch.zeros(shape))
 
     def configure(self, weight_config_this_layer, norm=2.4, grid=100, maxshrink=0.8, trits=False):
+        """Configure the quantizer."""
         for k, v in weight_config_this_layer.items():
             setattr(self, k, v)
         # self.maxq = torch.tensor(2**self.bits - 1)
@@ -854,6 +927,7 @@ def configure(self, weight_config_this_layer, norm=2.4, grid=100, maxshrink=0.8,
             self.maxq = -1
 
     def find_params(self, x, weight=False):
+        """Find scale and zero for weight."""
         dev = x.device
         # NF4 FP4
         if self.dtype != "int":
@@ -991,6 +1065,11 @@ def quantize(self, x, scale, zero, maxq):
             return scale * (q - zero)
 
     def ready(self):
+        """Quantizer is ready.
+
+        Returns:
+            bool: True or False.
+        """
         return torch.all(self.scale != 0)
 
 
@@ -998,8 +1077,10 @@ def ready(self):
 
 
 class GPTQuantizer(INCQuantizer):
+    """GPTQ Quantizer."""
+
     def __init__(self, quant_config={}):
-        """Init a RTNQuantizer object.
+        """Init a GPTQQuantizer object.
 
         Args:
             quant_config (OrderedDict, optional): quantization config for ops. Defaults to {}.
@@ -1041,6 +1122,14 @@ def prepare(
 
     @torch.no_grad()
     def convert(self, model, *args, **kwargs):
+        """Convert the prepared model to a quantized model.
+
+        Args:
+            model (torch.nn.Module): the prepared model
+
+        Returns:
+            The quantized model.
+        """
         self.gptq_quantizer.model = model
         self.gptq_quantizer.remove_prepare_for_calibration()
 
diff --git a/neural_compressor/torch/algorithms/weight_only/modules.py b/neural_compressor/torch/algorithms/weight_only/modules.py
index 503a469b0c7..969cf455559 100644
--- a/neural_compressor/torch/algorithms/weight_only/modules.py
+++ b/neural_compressor/torch/algorithms/weight_only/modules.py
@@ -31,7 +31,10 @@
 
 
 class QDQLayer(torch.nn.Module):
+    """Quantized and dequantized layer."""
+
     def __init__(self, module, input_scale=None) -> None:
+        """Init the QDQLayer object."""
         super().__init__()
         self.quant = torch.ao.quantization.QuantStub()
         self.module = module
@@ -39,6 +42,7 @@ def __init__(self, module, input_scale=None) -> None:
         self.input_scale = input_scale
 
     def forward(self, X):
+        """Forward function."""
         if self.input_scale is not None:
             X = torch.mul(X, self.input_scale)
         X = self.quant(X)
@@ -48,6 +52,8 @@ def forward(self, X):
 
 
 class WeightOnlyLinear(torch.nn.Module):
+    """Weight Only Linear."""
+
     def __init__(
         self,
         in_features,
@@ -64,6 +70,31 @@ def __init__(
         device="cpu",
         use_optimum_format=True,
     ):
+        """Init the WeightOnlyLinear object.
+
+        Args:
+            in_features (int): input features.
+            out_features (int): out features.
+            dtype (str, optional):  the data type of the quantized model. Defaults to "int".
+            bits (int, optional): number of bits for quantization. Defaults to 4.
+            group_size (int, optional): size of the quantization group. Defaults to 32.
+            zp (bool, optional): zero point. Defaults to False.
+            bias (bool, optional): module bias. Defaults to False.
+            scale_dtype (torch.Tensor, optional): the data type of quantization scale to be used.
+                                                  Defaults to torch.float32.
+            compression_dtype (torch.Tensor, optional): the target dtype after comoression.
+                                                        Defaults to torch.int32.
+            compression_dim (int, optional): select from [0, 1], 0 is output channel, 1 is input channel.
+                                             Defaults to 1.
+            g_idx (bool, optional): for recording the channel order.
+            device (str, optional): choose device for compression. Defaults to cpu.
+            use_optimum_format (bool, optional): use the popular huggingface compression format.
+                1: compression_dim: weight = 1, zeros = 0 and both are transposed.
+                2: zeros -= 1 before compression.
+                3: g_idx: use same number for one group instead of recording the channel order.
+                4. parameter name changed, such as 'packed_weight' -> 'qweight'.
+                5. zeros is always needed even for sym.
+        """
         super().__init__()
         self.use_optimum_format = use_optimum_format
         self.dtype = dtype
@@ -172,6 +203,7 @@ def __init__(
             self.g_idx = None
 
     def pack(self, int_weight, scale, zp, bias, g_idx=None):
+        """Pack int weight."""
         if self.use_optimum_format:
             self.scales = self.scales.T.contiguous()
             self.qweight = self.qweight.T.contiguous()
@@ -225,6 +257,7 @@ def pack(self, int_weight, scale, zp, bias, g_idx=None):
             self.qzeros = self.qzeros.T.contiguous()
 
     def recover(self):
+        """Recover fp32 weight from packed weight."""
         logger.debug(f"Recovering {self} weight")
         scales = self.scales.T.contiguous() if self.use_optimum_format else self.scales
         qweight = self.qweight.T.contiguous() if self.use_optimum_format else self.qweight
@@ -271,6 +304,14 @@ def recover(self):
         return fp32_weight
 
     def pack_tensor_with_torch(self, raw_tensor):
+        """Pack the tensor with torch.
+
+        Args:
+            raw_tensor (tensor): raw tensor.
+
+        Returns:
+            tensor: packed tensor.
+        """
         target_len = math.ceil(raw_tensor.shape[1] / self.n_pack)
         packed_tensor = torch.zeros(raw_tensor.shape[0], target_len, dtype=self.compression_dtype).to(raw_tensor.device)
         mask = torch.tensor(2**self.bits - 1, dtype=self.compression_dtype).to(raw_tensor.device)
@@ -286,6 +327,14 @@ def pack_tensor_with_torch(self, raw_tensor):
         return packed_tensor
 
     def unpack_tensor_with_torch(self, packed_tensor):
+        """Unpack the tensor with torch.
+
+        Args:
+            packed_tensor (tensor): packed tensor.
+
+        Returns:
+            tensor: unpacked tensor.
+        """
         target_dtype = torch.int8 if not hasattr(self, "qzeros") or "int" not in self.dtype else torch.uint8
         target_len = packed_tensor.shape[1] * self.n_pack
         unpacked_tensor = torch.zeros(packed_tensor.shape[0], target_len, dtype=target_dtype).to(packed_tensor.device)
@@ -307,6 +356,7 @@ def unpack_tensor_with_torch(self, packed_tensor):
     def pack_array_with_numba_b4_c32(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=4 and compress_bits=32."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 7] & 0b1111) << 28)
@@ -325,6 +375,7 @@ def pack_array_with_numba_b4_c32(
     def pack_array_with_numba_b4_c16(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=4 and compress_bits=16."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 3] & 0b1111) << 12)
@@ -339,6 +390,7 @@ def pack_array_with_numba_b4_c16(
     def pack_array_with_numba_b4_c8(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=4 and compress_bits=8."""
         for i in range(new_in_features):
             packed_array[:, i] = ((raw_array[:, i * n_pack + 1] & 0b1111) << 4) | (raw_array[:, i * n_pack] & 0b1111)
         return packed_array
@@ -348,6 +400,7 @@ def pack_array_with_numba_b4_c8(
     def pack_array_with_numba_b4_c64(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=4 and compress_bits=64."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 15] & 0b1111) << 60)
@@ -374,6 +427,7 @@ def pack_array_with_numba_b4_c64(
     def pack_array_with_numba_b8_c32(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=8 and compress_bits=32."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 3] & 0b11111111) << 24)
@@ -388,6 +442,7 @@ def pack_array_with_numba_b8_c32(
     def pack_array_with_numba_b8_c16(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=8 and compress_bits=16."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 3] & 0b11111111) << 24)
@@ -402,6 +457,7 @@ def pack_array_with_numba_b8_c16(
     def pack_array_with_numba_b8_c8(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=8 and compress_bits=8."""
         for i in range(new_in_features):
             packed_array[:, i] = raw_array[:, i * n_pack] & 0b11111111
         return packed_array
@@ -411,6 +467,7 @@ def pack_array_with_numba_b8_c8(
     def pack_array_with_numba_b8_c64(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=8 and compress_bits=64."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 7] & 0b11111111) << 56)
@@ -429,6 +486,7 @@ def pack_array_with_numba_b8_c64(
     def pack_array_with_numba_b2_c32(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=2 and compress_bits=32."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 15] & 0b11) << 30)
@@ -455,6 +513,7 @@ def pack_array_with_numba_b2_c32(
     def pack_array_with_numba_b2_c16(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=2 and compress_bits=16."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 7] & 0b11) << 14)
@@ -473,6 +532,7 @@ def pack_array_with_numba_b2_c16(
     def pack_array_with_numba_b2_c8(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=2 and compress_bits=8."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 3] & 0b11) << 6)
@@ -487,6 +547,7 @@ def pack_array_with_numba_b2_c8(
     def pack_array_with_numba_b2_c64(
         raw_array: np.ndarray, packed_array: np.ndarray, n_pack: int, new_in_features: int
     ) -> np.ndarray:
+        """Pack the array with numba when bits=2 and compress_bits=64."""
         for i in range(new_in_features):
             packed_array[:, i] = (
                 ((raw_array[:, i * n_pack + 31] & 0b11) << 62)
@@ -549,6 +610,7 @@ def pack_array_with_numba(
         return pack_method(raw_array, packed_array, n_pack, new_in_features)
 
     def pack_tensor_with_numpy_impl(self, raw_tensor):
+        """The implement of packing tensor with numpy."""
         raw_array = raw_tensor.cpu().numpy()
         target_len = np.ceil(raw_array.shape[1] / self.n_pack).astype(int)
         target_dtype = torch.tensor(0, dtype=self.compression_dtype).numpy().dtype
@@ -567,6 +629,7 @@ def pack_tensor_with_numpy_impl(self, raw_tensor):
         return packed_tensor
 
     def pack_tensor_with_numpy(self, raw_tensor):
+        """Pack the tensor with numpy."""
         if self.bits not in [2, 4, 8]:
             return self.pack_tensor_with_numpy_impl(raw_tensor)
         compression_dtype = torch.tensor(0, dtype=self.compression_dtype).numpy().dtype
@@ -576,6 +639,7 @@ def pack_tensor_with_numpy(self, raw_tensor):
         return torch.from_numpy(packed_array).to(device=raw_tensor.device)
 
     def unpack_tensor_with_numpy(self, packed_tensor):
+        """Unpack the packed tensor with numpy."""
         packed_array = packed_tensor.cpu().numpy()
         target_dtype = np.int8 if not hasattr(self, "qzeros") or "int" not in self.dtype else np.uint8
         target_len = packed_array.shape[1] * self.n_pack
@@ -595,18 +659,21 @@ def unpack_tensor_with_numpy(self, packed_tensor):
         return unpacked_tensor
 
     def pack_tensor(self, raw_tensor):
+        """Pack tensor."""
         if "cuda" in raw_tensor.device.type:
             return self.pack_tensor_with_torch(raw_tensor)
         else:
             return self.pack_tensor_with_numpy(raw_tensor)
 
     def unpack_tensor(self, packed_tensor):
+        """Unpack tensor."""
         if "cuda" in packed_tensor.device.type:
             return self.unpack_tensor_with_torch(packed_tensor)
         else:
             return self.unpack_tensor_with_numpy(packed_tensor)
 
     def forward(self, input):
+        """Forward function."""
         if not hasattr(self, "weight"):
             weight = self.recover()
             device = self.scales.device
@@ -624,6 +691,11 @@ def forward(self, input):
             return F.linear(input, weight, self.bias)
 
     def extra_repr(self) -> str:
+        """Extract the configuration string.
+
+        Returns:
+            str: the configuration string.
+        """
         tmp_str = "in_features={}, out_features={}, bits={}, group_size={}, bias={}".format(
             self.in_features,
             self.out_features,
@@ -657,7 +729,8 @@ def forward(ctx, inputs, num_bits=4, group_size=1024, scheme="asym"):
 
     @staticmethod
     def backward(ctx, grad_outputs):
-        """
+        """Backward function.
+
         Args:
             ctx: Pytorch convention.
             grad_output: A tensor of gradient of outputs
@@ -672,11 +745,15 @@ class TEQLinearFakeQuant(torch.nn.Module):
     """Wrapper quantization linear."""
 
     def __init__(self, orig_layer, alpha=None, num_bits=4, group_size=-1, scheme="asym"):
-        """A forward hook to linear module
-        :param orig_layer: the original module
-        :param alpha: trainable alpha/scale
-        :param num_bits: quantization level
-        :param group_size: for fine-grained quantization."""
+        """A forward hook to linear module.
+
+        Args:
+            orig_layer: the original module
+            alpha: trainable alpha/scale
+            num_bits: quantization level
+            group_size: for fine-grained quantization.
+            scheme: symmetric quantization or asymmetric quantization.
+        """
         super(TEQLinearFakeQuant, self).__init__()
         self.orig_layer = orig_layer
         self.alpha = alpha
@@ -686,6 +763,7 @@ def __init__(self, orig_layer, alpha=None, num_bits=4, group_size=-1, scheme="as
         self.scheme = scheme
 
     def forward(self, x):
+        """Forward function."""
         alpha = torch.clip(self.alpha, 1e-5)
         shape_len = len(x.shape) - 1
         shape = (1,) * shape_len + (-1,)
@@ -700,9 +778,12 @@ class MulLinear(torch.nn.Module):
     """Linear wrapper to apply scale to input."""
 
     def __init__(self, module, input_scale=None):
-        """A forward hook to save input max of a module
-        :param module: the linear module
-        :param input_scale: scale for input."""
+        """A forward hook to save input max of a module.
+
+        Args:
+            module: the linear module.
+            input_scale: scale for input.
+        """
         super().__init__()
         if input_scale is None:
             input_scale = torch.empty(module.in_features)
@@ -711,13 +792,16 @@ def __init__(self, module, input_scale=None):
 
     @property
     def weight(self):
+        """Property weight."""
         return self.linear.weight
 
     @weight.setter
     def weight(self, weight):
+        """Property weight setter."""
         self.linear.weight = weight
 
     def forward(self, X):
+        """Forward function."""
         X = torch.mul(X, self.input_scale)
         X = self.linear(X)
         return X
diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py
index c04327a62f4..509674d01c6 100644
--- a/neural_compressor/torch/algorithms/weight_only/rtn.py
+++ b/neural_compressor/torch/algorithms/weight_only/rtn.py
@@ -17,7 +17,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""RTN quantization."""
 
 import copy
 from collections import OrderedDict
@@ -42,6 +42,8 @@
 
 
 class RTNQuantizer(Quantizer):
+    """RTN Quantizer."""
+
     def __init__(self, quant_config: OrderedDict = {}):
         """Init a RTNQuantizer object.
 
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
index 4a6e6a0d488..b3e2d95523b 100644
--- a/neural_compressor/torch/algorithms/weight_only/save_load.py
+++ b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""WOQ save and load."""
 # pylint:disable=import-error
 
 import copy
@@ -26,6 +26,12 @@
 
 
 def save(model, output_dir="./saved_results"):
+    """Save the quantized model and config to the output path.
+
+    Args:
+        model (torch.nn.module): raw fp32 model or prepared model.
+        output_dir (str, optional): output path to save.
+    """
     os.makedirs(output_dir, exist_ok=True)
     qmodel_weight_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), WEIGHT_NAME)
     qconfig_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), QCONFIG_NAME)
@@ -65,6 +71,7 @@ def load(model_name_or_path, original_model=None, format=LoadFormat.DEFAULT, dev
         kwargs (remaining dictionary of keyword arguments, optional):
             remaining dictionary of keyword arguments for loading huggingface models.
             will be passed to the huggingface model's `__init__` method, such as 'trust_remote_code', 'revision'.
+
     Returns:
         torch.nn.Module: quantized model
     """
@@ -74,7 +81,10 @@ def load(model_name_or_path, original_model=None, format=LoadFormat.DEFAULT, dev
 
 
 class WOQModelLoader:
+    """WOQ Model Loader."""
+
     def __init__(self, model_name_or_path, original_model=None, format=LoadFormat.DEFAULT, device="cpu", **kwargs):
+        """Init the WOQModelLoader object."""
         # TODO: When loading WOQ model, use different WeightOnlyLinear module according to device.
         self.model_name_or_path = model_name_or_path
         self.original_model = original_model
@@ -85,6 +95,14 @@ def __init__(self, model_name_or_path, original_model=None, format=LoadFormat.DE
         self.loaded_state_dict_keys = {}
 
     def load_woq_model(self):
+        """Load quantized weight-only quantization model.
+
+        Raises:
+            ValueError: `format` in load function can only be 'huggingface' or 'default'.
+
+        Returns:
+            torch.nn.Module: quantized model
+        """
         if self.format == LoadFormat.HUGGINGFACE:
             model = self.load_hf_format_woq_model()
             logger.info("Loading HuggingFace weight-only quantization model successfully.")
@@ -119,6 +137,15 @@ def load_woq_model(self):
         return model
 
     def load_inc_format_woq_model(self, qmodel_weight_file_path, qconfig_file_path):
+        """Load INC weight-only quantized model in local.
+
+        Args:
+            qmodel_weight_file_path (str): path to the quantized model.
+            qconfig_file_path (str): path to the quant config.
+
+        Returns:
+            torch.nn.Module: quantized model
+        """
         qweights = torch.load(qmodel_weight_file_path)
         self.loaded_state_dict_keys = qweights.keys()
 
@@ -130,6 +157,11 @@ def load_inc_format_woq_model(self, qmodel_weight_file_path, qconfig_file_path):
         return model
 
     def load_hf_format_woq_model(self):
+        """Load HuggingFace weight-only quantized model.
+
+        Returns:
+            torch.nn.Module: quantized model
+        """
         # check required package
         from neural_compressor.torch.utils import is_package_available
 
diff --git a/neural_compressor/torch/algorithms/weight_only/teq.py b/neural_compressor/torch/algorithms/weight_only/teq.py
index 595a2e8479f..f97efcf4e99 100644
--- a/neural_compressor/torch/algorithms/weight_only/teq.py
+++ b/neural_compressor/torch/algorithms/weight_only/teq.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+"""TEQ quantization."""
 
 from typing import Any, List
 
@@ -39,10 +39,15 @@ class TrainableEquivalentTransformation:
     _PREPARE_ATTRS_PREFIX = "_prepare_"
 
     def __init__(self, model, weight_config={}, absorb_to_layer=None, folding=True, example_inputs=None):
-        """
-        :param model: the model for quantization
-        :param weight_config (dict, optional): contains all info required by RTN. Defaults to {}.
-        :param example_inputs: inputs for trace
+        """Init the TrainableEquivalentTransformation object.
+
+        Args:
+            model (torch.nn.module): the model for quantization
+            weight_config (dict, optional): contains all info required by RTN. Defaults to {}.
+            absorb_to_layer (dict): The layer dict that scale can be absorbed. Default to None.
+            folding(bool): Allow insert mul before linear when the scale cannot be absorbed by last layer.
+                            Default to True.
+            example_inputs: inputs for trace. Default to None.
         """
         self.model = model
         self.weight_config = weight_config
@@ -78,8 +83,11 @@ def _post_init(self):
         self._post_initialized = True
 
     def _get_device(self):
-        """Get the model device
-        :return:Model device."""
+        """Get the model device.
+
+        Returns:
+            str: Model device.
+        """
         device = get_accelerator().current_device_name()
         return device
 
@@ -88,10 +96,11 @@ def _get_dtype(self):
             return p.data.dtype
 
     def add_tuning_scale(self, sqrt_w_init=False):
-        """The main entry of smooth quant
-        to the paper for more details
-        :param sqrt_w_init: use sqrt weight to init."""
+        """Add tuning scales.
 
+        Args:
+            sqrt_w_init: use sqrt weight to init.
+        """
         if not self.absorb_to_layer:
             self.absorb_to_layer = self._detect_absorb_to_layer(self.model, self.folding, self.example_inputs)
         if not self._post_initialized:
@@ -157,10 +166,13 @@ def add_tuning_scale(self, sqrt_w_init=False):
 
     @torch.no_grad()
     def _absorb_scales(self, layer, scale, layer_name=""):
-        """Absorb the scale to the layer at output channel
-        :param layer: The module
-        :param scale: The scale to be absorbed
-        :param layer_name: The layer name."""
+        """Absorb the scale to the layer at output channel.
+
+        Args:
+            layer: the module.
+            scale: the scale to be absorbed.
+            layer_name: the layer name.
+        """
         # for insert mul
         if not self.folding:  # pragma: no cover
             if isinstance(layer, MulLinear):
@@ -226,10 +238,12 @@ def _absorb_scales(self, layer, scale, layer_name=""):
 
     @torch.no_grad()
     def _scale_layer_weight(self, layer, scale):  ##input channel
-        """Scale the layer weights at input channel, depthwise conv output channel
-        :param layer_name: The layer name
-        :param scale: The scale to be multiplied
-        :return:"""
+        """Scale the layer weights at input channel, depthwise conv output channel.
+
+        Args:
+            layer: the layer.
+            scale: the scale to be multiplied.
+        """
         if layer.__class__.__name__ == "MulLinear":
             layer = layer.linear
 
@@ -331,10 +345,11 @@ def quantize(self, **kwargs):
         self.model = model
 
     def save(self, save_scale_file="", save_state_dict_file=""):
-        """
-        save alpha/scale or model weight
-        :param save_scale_file: save alpha/scale with torch.save
-        :param save_state_dict_file: save model state_dict
+        """Save alpha/scale or model weight.
+
+        Args:
+            save_scale_file: path to save alpha/scale with torch.save.
+            save_state_dict_file: path to save model state_dict.
         """
         if save_scale_file:  # pragma: no cover
             torch.save(self.trained_alphas, save_scale_file)
@@ -344,8 +359,10 @@ def save(self, save_scale_file="", save_state_dict_file=""):
 
 
 class TEQuantizer(Quantizer):
+    """TEQ Quantizer."""
 
     def __init__(self, quant_config, folding, example_inputs, absorb_to_layer=None):
+        """Init the TEQuantizer object."""
         super().__init__(quant_config=quant_config)
         self.folding = folding
         self.absorb_to_layer = absorb_to_layer
@@ -363,6 +380,7 @@ def prepare(self, model, *args, **kwargs):
 
         Args:
             model: A float model to be quantized.
+
         Returns:
             A prepared model.
         """
@@ -376,6 +394,14 @@ def prepare(self, model, *args, **kwargs):
         return float_model
 
     def convert(self, model, *args: Any, **kwargs: Any):
+        """Convert the prepared model to a quantized model.
+
+        Args:
+            model (torch.nn.Module): the prepared model
+
+        Returns:
+            The quantized model.
+        """
         for attr in self._quantizer._PREPARE_ATTRS:
             setattr(self._quantizer, attr, getattr(model, self._quantizer._PREPARE_ATTRS_PREFIX + attr, None))
         self._quantizer.model = model
diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py
index 8f46b778ec5..255c2d6db2a 100644
--- a/neural_compressor/torch/algorithms/weight_only/utility.py
+++ b/neural_compressor/torch/algorithms/weight_only/utility.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""Weight-Only utility."""
 import torch
 
 from neural_compressor.torch.utils import accelerator, device_synchronize, logger
@@ -527,6 +527,7 @@ def quant_weight_w_scale(weight, scale, zp=None, group_size=-1, dtype="int"):
 
 # AWQ Required, copy from neural_compressor/adaptor/torch_utils/smooth_quant.py
 def model_forward(model, dataloader, iters, device):
+    """The model forward function."""
     try:
         cnt = 0
         for idx, (input, label) in enumerate(dataloader):
@@ -546,6 +547,7 @@ def model_forward(model, dataloader, iters, device):
 # copy from neural_compressor/adaptor/torch_utils/smooth_quant.py
 # TODO: potential bug, data type
 def forward_wrapper(model, input, device=torch.device("cpu")):
+    """The forward wrapper."""
     try:
         model = model.to(device)
         input = move_input_to_device(input, device)
@@ -566,6 +568,7 @@ def forward_wrapper(model, input, device=torch.device("cpu")):
 
 # copy from neural_compressor/adaptor/torch_utils/smooth_quant.py
 def move_input_to_device(input, device=torch.device("cpu")):
+    """Move input to the spevific device."""
     if isinstance(input, dict) or isinstance(input, UserDict):
         tmp_input = {}
         for k, inp in input.items():
@@ -669,6 +672,7 @@ def get_absorb_layers(model, example_inputs, supported_layers=["Linear"], foldin
 
 # copy from neural_compressor/adaptor/torch_utils/smooth_quant.py
 def get_parent(node, all_parents=False):
+    """Get parent of node."""
     if node.inputs() is None:
         return None
     elif len(list(node.inputs())) == 0:
@@ -705,9 +709,10 @@ def get_module(model, key):
 
 # copy from neural_compressor/adaptor/torch_utils/smooth_quant.py
 class GraphTrace:
-    """"""
+    """GraphTrace."""
 
     def __init__(self):
+        """Init the GraphTrace object."""
         self.supported_torch_module_to_aten = {
             "Linear": "aten::linear",
             "Conv2d": "aten::_convolution",
@@ -736,6 +741,15 @@ def __init__(self):
         ]  ##TODO,support more norm
 
     def trace(self, model, dummy_input):
+        """Trace a torch model.
+
+        Args:
+            model (torch.nn.module): model to be trace.
+            dummy_input : dummy input.
+
+        Returns:
+            traced model.
+        """
         traced_model = None
         optimize_numerics = False
         orig_device = str(next(model.parameters()).device)
@@ -775,6 +789,15 @@ def trace(self, model, dummy_input):
         return traced_model
 
     def get_nodes(self, traced_model, op_types=["Linear"]):
+        """Get nodes from traced model.
+
+        Args:
+            traced_model: traced model.
+            op_types (list, optional): . Defaults to ["Linear"].
+
+        Returns:
+            list: nodes.
+        """
         if isinstance(op_types, str):
             op_types = [op_types]
         nodes = []
@@ -787,6 +810,14 @@ def get_nodes(self, traced_model, op_types=["Linear"]):
         return nodes
 
     def get_prev_absorb_layer(self, nodes):
+        """Get previous absorb layers.
+
+        Args:
+            nodes (list): target nodes.
+
+        Returns:
+            list: previous absorb layer
+        """
         prev_absorb_layer = []
         for node in nodes:
             parent = get_parent(node)
@@ -815,6 +846,14 @@ def get_prev_absorb_layer(self, nodes):
         return prev_absorb_layer
 
     def skip_op_absorb_helper(self, parent_node):
+        """Skip op absorption.
+
+        Args:
+            parent_node : parent node.
+
+        Returns:
+            bool: True or False.
+        """
         for val_user in list(parent_node.outputs())[0].uses():
             next_node = val_user.user
             if next_node.kind() == "aten::size":
@@ -830,6 +869,14 @@ def skip_op_absorb_helper(self, parent_node):
         return True
 
     def mapping_torch_module_to_aten(self, op_types):
+        """Mapping torch module to aten.
+
+        Args:
+            op_types : op types.
+
+        Returns:
+            list: the mapping results.
+        """
         res = []
         for op in op_types:
             if op not in self.supported_torch_module_to_aten.keys():
@@ -840,11 +887,7 @@ def mapping_torch_module_to_aten(self, op_types):
         return res
 
     def _check_valid_conv(self, module):
-        """Remove group conv except depthwise conv
-        :param module:
-
-        :return:
-        """
+        """Remove group conv except depthwise conv."""
         if not isinstance(module, torch.nn.Conv2d):
             return True
         if module.groups > 1:
@@ -855,6 +898,17 @@ def _check_valid_conv(self, module):
         return True
 
     def get_absorb_to_layer(self, model, example_input, op_types, skip_unsupported_layers=True):
+        """Get absorbed layers of a model.
+
+        Args:
+            model: torch model
+            example_input: used to trace torch model.
+            op_types: op types.
+            skip_unsupported_layers (bool, optional): unsupported layers to skip. Defaults to True.
+
+        Returns:
+            absorb to layer, no absorb layers
+        """
         traced_model = self.trace(model, example_input)
         if traced_model is None:
             return None, None
@@ -883,6 +937,16 @@ def get_absorb_to_layer(self, model, example_input, op_types, skip_unsupported_l
         return absorb_to_layer, no_absorb_layers
 
     def remove_unsupported_layers(self, model, absorb_to_layer, no_absorb_layers):
+        """Remove unsupported layers from layers to be absorb.
+
+        Args:
+            model : torch model.
+            absorb_to_layer (dict): layers to be absorb.
+            no_absorb_layers (dict): unsupported layers.
+
+        Returns:
+            dict: the new layers to be absorb.
+        """
         res = {}
         for key in absorb_to_layer.keys():
             absorb_layer = get_module(model, key)
@@ -931,6 +995,7 @@ def get_example_input(dataloader, i=1):
 
     Args:
         dataloader (object): calibration dataset.
+
     Returns:
         example_inp (object).
     """
@@ -1044,10 +1109,15 @@ def get_module_input_output(
     total_values = defaultdict(defaultdict)
 
     def _save_input_output_hook(name, record_input=False, record_output=False):
-        """
-        A forward hook to save input and output values of a module
-            param name: the module name
-            return: A hook function
+        """A forward hook to save input and output values of a module.
+
+        Args:
+            name: the module name.
+            record_input (bool): to record input.
+            record_ouput (bool): to record output.
+
+        Returns:
+            A hook function
         """
 
         def _hook(module, inputs, outputs):
diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index 75e6460a53e..66f01a50c75 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -162,6 +162,8 @@ def __init__(
             double_quant_use_sym (bool): Indicates whether double_quant scale are symmetric. Default is True.
             double_quant_group_size (int): Size of double_quant groups. Default is 32.
             quant_lm_head (bool): Indicates whether quantize the lm_head layer in transformers。 Default is False.
+            white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
+                Default is DEFAULT_WHITE_LIST.
         """
         super().__init__(white_list=white_list)
         self.dtype = dtype
@@ -184,6 +186,11 @@ def __init__(
 
     @classmethod
     def register_supported_configs(cls) -> List[OperatorConfig]:
+        """Register supported configurations for RTN.
+
+        Returns:
+            List[OperatorConfig]: List of supported operator configurations.
+        """
         supported_configs = []
         linear_rtn_config = RTNConfig(
             dtype=[
@@ -220,6 +227,16 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
     def to_config_mapping(
         self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
     ) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
+        """Convert the configuration to a mapping.
+
+        Args:
+            config_list (List[BaseConfig]): List of base configurations. Default is None.
+            model_info (List[Tuple[str, str]]): List of tuples containing the name and type of each module in the model.
+                Default is None.
+
+        Returns:
+            OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]: The configuration mapping.
+        """
         if not self.quant_lm_head:
             self.set_local(
                 LM_HEAD_NAMES, RTNConfig(dtype="fp32", use_layer_wise=self.use_layer_wise, model_path=self.model_path)
@@ -229,6 +246,14 @@ def to_config_mapping(
 
     @staticmethod
     def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
+        """Get information about the model.
+
+        Args:
+            model (torch.nn.Module): The model.
+
+        Returns:
+            List[Tuple[str, Callable]]: List of tuples containing the name and type of each module in the model.
+        """
         filter_result = []
         for op_name, module in model.named_modules():
             if isinstance(module, WOQ_WHITE_LIST):
@@ -239,12 +264,22 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
 
     @classmethod
     def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]:
+        """Get the configuration set for tuning.
+
+        Returns:
+            Union[None, "RTNConfig", List["RTNConfig"]]: The configuration set for tuning.
+        """
         return RTNConfig(
             dtype=["int4", "nf4"], use_sym=[True, False], group_size=[32, 128], use_mse_search=[False, True]
         )
 
     @classmethod
     def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]:
+        """Get the predefined configuration set.
+
+        Returns:
+            Dict[torch_utils.ProcessorType, "RTNConfig"]: The configuration of RTN.
+        """
         pre_defined_configs: Dict[torch_utils.ProcessorType, RTNConfig] = {}
         pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
         pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
@@ -252,11 +287,28 @@ def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]:
 
 
 def get_default_rtn_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
+    """Get the default configuration of RTN.
+
+    Args:
+        processor_type (Optional[Union[str, torch_utils.ProcessorType]], optional): The user-specified processor type.
+            Defaults to None.
+
+    Returns:
+        RTNConfig: _description_
+    """
     process_type = torch_utils.get_processor_type_from_user_config(processor_type)
     return RTNConfig.get_predefined_configs()[process_type]
 
 
 def get_default_double_quant_config(type="BNB_NF4"):
+    """Get the default configuration of double quant.
+
+    Args:
+        type (str, optional): double quant type. Defaults to "BNB_NF4".
+
+    Returns:
+        dict: double quant config.
+    """
     from neural_compressor.torch.utils.constants import DOUBLE_QUANT_CONFIGS
 
     assert type in DOUBLE_QUANT_CONFIGS, "Supported double quant configs: {}".format(list(DOUBLE_QUANT_CONFIGS.keys()))
@@ -348,6 +400,8 @@ def __init__(
             static_groups (bool): Whether to calculate group wise quantization parameters in advance.
                                   This option mitigate actorder's extra computational requirements.
                                   Default is False.
+            white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
+                                                                 Default is DEFAULT_WHITE_LIST.
         """
         assert not quant_lm_head, "GPTQ doesn't support lm_head quantization currently, it's coming soon!"
         super().__init__(white_list=white_list)
@@ -375,6 +429,11 @@ def __init__(
 
     @classmethod
     def register_supported_configs(cls) -> List[OperatorConfig]:
+        """Register supported configurations for GPTQ.
+
+        Returns:
+            List[OperatorConfig]: List of supported operator configurations.
+        """
         supported_configs = []
         # TODO(Yi)
         linear_gptq_config = GPTQConfig()
@@ -385,6 +444,16 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
     def to_config_mapping(
         self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
     ) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
+        """Convert the configuration to a mapping.
+
+        Args:
+            config_list (List[BaseConfig]): List of base configurations. Default is None.
+            model_info (List[Tuple[str, str]]): List of tuples containing the name and type of each module in the model.
+                Default is None.
+
+        Returns:
+            OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]: The configuration mapping.
+        """
         if not self.quant_lm_head:
             self.set_local(
                 LM_HEAD_NAMES, GPTQConfig(dtype="fp32", use_layer_wise=self.use_layer_wise, model_path=self.model_path)
@@ -394,6 +463,14 @@ def to_config_mapping(
 
     @staticmethod
     def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
+        """Get information about the model.
+
+        Args:
+            model (torch.nn.Module): The model.
+
+        Returns:
+            List[Tuple[str, Callable]]: List of tuples containing the name and type of each module in the model.
+        """
         filter_result = []
         for op_name, module in model.named_modules():
             if isinstance(module, WOQ_WHITE_LIST):
@@ -404,6 +481,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
 
     @classmethod
     def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig"]]:
+        """Get the configuration set for tuning.
+
+        Returns:
+            Union[None, "GPTQConfig", List["GPTQConfig"]]: The configuration set for tuning.
+        """
         # TODO fwk owner needs to update it.
         return GPTQConfig(act_order=[True, False], use_sym=[False, True])
 
@@ -505,6 +587,8 @@ def __init__(
             folding(bool): Allow insert mul before linear when the scale cannot be absorbed by last layer,
               default is False.
             absorb_layer_dict (dict): The layer dict that scale can be absorbed, default is {}.
+            white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
+              Default is DEFAULT_WHITE_LIST.
         """
         super().__init__(white_list=white_list)
         self.dtype = dtype
@@ -531,6 +615,11 @@ def __init__(
 
     @classmethod
     def register_supported_configs(cls) -> List[OperatorConfig]:
+        """Register supported configurations for AWQ.
+
+        Returns:
+            List[OperatorConfig]: List of supported operator configurations.
+        """
         supported_configs = []
         # TODO(Yi)
         linear_awq_config = AWQConfig()
@@ -541,6 +630,16 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
     def to_config_mapping(
         self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
     ) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
+        """Convert the configuration to a mapping.
+
+        Args:
+            config_list (List[BaseConfig]): List of base configurations. Default is None.
+            model_info (List[Tuple[str, str]]): List of tuples containing the name and type of each module in the model.
+                Default is None.
+
+        Returns:
+            OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]: The configuration mapping.
+        """
         if not self.quant_lm_head:
             self.set_local(
                 LM_HEAD_NAMES, AWQConfig(dtype="fp32", use_layer_wise=self.use_layer_wise, model_path=self.model_path)
@@ -550,6 +649,14 @@ def to_config_mapping(
 
     @staticmethod
     def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
+        """Get information about the model.
+
+        Args:
+            model (torch.nn.Module): The model.
+
+        Returns:
+            List[Tuple[str, Callable]]: List of tuples containing the name and type of each module in the model.
+        """
         filter_result = []
         for op_name, module in model.named_modules():
             if isinstance(module, WOQ_WHITE_LIST):
@@ -560,6 +667,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
 
     @classmethod
     def get_config_set_for_tuning(cls) -> Union[None, "AWQConfig", List["AWQConfig"]]:
+        """Get the configuration set for tuning.
+
+        Returns:
+            Union[None, "AWQConfig", List["AWQConfig"]]: The configuration set for tuning.
+        """
         # TODO fwk owner needs to update it.
         return AWQConfig(bits=[4, 6])
 
@@ -648,6 +760,8 @@ def __init__(
             absorb_to_layer (dict): The layer dict that scale can be absorbed, default is {}.
             folding(bool): Allow insert mul before linear when the scale cannot be absorbed by last layer,
               default is False.
+            white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
+              Default is DEFAULT_WHITE_LIST.
         """
         super().__init__(white_list=white_list)
         self.dtype = dtype
@@ -671,6 +785,11 @@ def __init__(
 
     @classmethod
     def register_supported_configs(cls) -> List[OperatorConfig]:
+        """Register supported configurations for TEQ.
+
+        Returns:
+            List[OperatorConfig]: List of supported operator configurations.
+        """
         supported_configs = []
         # TODO(Yi)
         linear_teq_config = TEQConfig()
@@ -681,6 +800,16 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
     def to_config_mapping(
         self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
     ) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
+        """Convert the configuration to a mapping.
+
+        Args:
+            config_list (List[BaseConfig]): List of base configurations. Default is None.
+            model_info (List[Tuple[str, str]]): List of tuples containing the name and type of each module in the model.
+                Default is None.
+
+        Returns:
+            OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]: The configuration mapping.
+        """
         if not self.quant_lm_head:
             self.set_local(LM_HEAD_NAMES, TEQConfig(dtype="fp32"))
         config_mapping = super().to_config_mapping(config_list, model_info)
@@ -688,6 +817,14 @@ def to_config_mapping(
 
     @staticmethod
     def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
+        """Get information about the model.
+
+        Args:
+            model (torch.nn.Module): The model.
+
+        Returns:
+            List[Tuple[str, Callable]]: List of tuples containing the name and type of each module in the model.
+        """
         filter_result = []
         for op_name, module in model.named_modules():
             if isinstance(module, WOQ_WHITE_LIST):
@@ -698,6 +835,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
 
     @classmethod
     def get_config_set_for_tuning(cls) -> Union[None, "TEQConfig", List["TEQConfig"]]:
+        """Get the configuration set for tuning.
+
+        Returns:
+            Union[None, "TEQConfig", List["TEQConfig"]]: The configuration set for tuning.
+        """
         # TODO fwk owner needs to update it.
         return TEQConfig(bits=[4, 6])
 
@@ -805,9 +947,11 @@ def __init__(
             not_use_best_mse (bool): Whether to use mean squared error (default is False).
             dynamic_max_gap (int): The dynamic maximum gap (default is -1).
             scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
-                        have different choices.
+              have different choices.
             use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
             multimodal(bool): Enable multimodal model quantization, (default is "False").
+            white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
+              Default is DEFAULT_WHITE_LIST.
         """
         super().__init__(white_list=white_list)
         self.dtype = dtype
@@ -842,6 +986,11 @@ def __init__(
 
     @classmethod
     def register_supported_configs(cls) -> List[OperatorConfig]:
+        """Register supported configurations for AutoRound.
+
+        Returns:
+            List[OperatorConfig]: List of supported operator configurations.
+        """
         supported_configs = []
         # TODO(Yi)
         linear_AUTOROUND_config = AutoRoundConfig()
@@ -851,6 +1000,14 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
 
     @staticmethod
     def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
+        """Get information about the model.
+
+        Args:
+            model (torch.nn.Module): The model.
+
+        Returns:
+            List[Tuple[str, Callable]]: List of tuples containing the name and type of each module in the model.
+        """
         filter_result = []
         for op_name, module in model.named_modules():
             if isinstance(module, WOQ_WHITE_LIST):
@@ -861,6 +1018,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
 
     @classmethod
     def get_config_set_for_tuning(cls) -> Union[None, "AutoRoundConfig", List["AutoRoundConfig"]]:
+        """Get the configuration set for tuning.
+
+        Returns:
+            Union[None, "AutoRoundConfig", List["AutoRoundConfig"]]: The configuration set for tuning.
+        """
         # TODO fwk owner needs to update it.
         return AutoRoundConfig(bits=[4, 6])