Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add docstring for static quant and smooth quant #1936

Merged
merged 5 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
/neural-compressor/neural_compressor/strategy
/neural-compressor/neural_compressor/training.py
/neural-compressor/neural_compressor/utils
/neural-compressor/neural_compressor/torch/algorithms/static_quant
/neural-compressor/neural_compressor/torch/algorithms/smooth_quant
/neural_compressor/torch/algorithms/pt2e_quant
/neural_compressor/torch/export
/neural_compressor/common
/neural_compressor/torch/algorithms/weight_only/hqq
/neural_compressor/torch/algorithms/weight_only/hqq
2 changes: 2 additions & 0 deletions neural_compressor/torch/algorithms/smooth_quant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The SmoothQuant-related modules."""


from .utility import *
from .smooth_quant import SmoothQuantQuantizer
Expand Down
4 changes: 3 additions & 1 deletion neural_compressor/torch/algorithms/smooth_quant/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Save and load the quantized model."""


# pylint:disable=import-error
import torch
Expand All @@ -32,7 +34,7 @@ def recover_model_from_json(model, json_file_path, example_inputs): # pragma: n
example_inputs (tuple or torch.Tensor or dict): example inputs that will be passed to the ipex function.

Returns:
(object): quantized model
model (object): quantized model
"""
from torch.ao.quantization.observer import MinMaxObserver

Expand Down
62 changes: 45 additions & 17 deletions neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The quantizer using SmoothQuant path."""


import json
import os
Expand Down Expand Up @@ -49,6 +51,8 @@


class SmoothQuantQuantizer(Quantizer):
"""SmoothQuantQuantizer Class."""

def __init__(self, quant_config: OrderedDict = {}): # pragma: no cover
"""Init a SmoothQuantQuantizer object.

Expand All @@ -61,9 +65,9 @@ def prepare(self, model, example_inputs, inplace=True, *args, **kwargs):
"""Prepares a given model for quantization.

Args:
model: A float model to be quantized.
example_inputs: Used to trace torch model.
inplace: Whether to carry out model transformations in-place. Defaults to True.
model (torch.nn.Module): raw fp32 model or prepared model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A prepared model.
Expand Down Expand Up @@ -128,9 +132,9 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
"""Converts a prepared model to a quantized model.

Args:
model: The prepared model to be converted.
example_inputs: Used to trace torch model.
inplace: Whether to carry out model transformations in-place. Defaults to True.
model (QuantizationInterceptionModule): the prepared model to be converted.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A quantized model.
Expand All @@ -153,14 +157,14 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
return model

def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args, **kwargs):
"""Execute the quantize process on the specified model.
"""Executes the quantize process on the specified model.

Args:
model: a float model to be quantized.
tune_cfg: quantization config for ops.
run_fn: a calibration function for calibrating the model.
example_inputs: used to trace torch model.
inplace: whether to carry out model transformations in-place.
model (torch.nn.Module): raw fp32 model or prepared model.
tune_cfg (OrderedDict): quantization config for ops.
run_fn (Callable): a calibration function for calibrating the model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A quantized model.
Expand Down Expand Up @@ -255,6 +259,22 @@ def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args,
def qdq_quantize(
model, tune_cfg, run_fn, example_inputs, inplace, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, sq
):
"""Executes the smooth quantize process.

Args:
model (torch.nn.Module): raw fp32 model or prepared model.
tune_cfg (OrderedDict): quantization config for ops.
run_fn (Callable): a calibration function for calibrating the model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool): whether to carry out model transformations in-place. Defaults to True.
cfgs (dict): configs loaded from ipex config path.
op_infos_from_cfgs (dict): dict containing configs that have been parsed for each op.
output_tensor_id_op_name (dict): dict containing op names corresponding to 'op_infos_from_cfgs'.
sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.

Returns:
A quantized model.
"""
smoothquant_scale_info = sq.sq_scale_info
sq_minmax_init = True if tune_cfg.get("act_algo", "kl") == "minmax" else False

Expand Down Expand Up @@ -325,6 +345,14 @@ def qdq_quantize(


def _apply_pre_optimization(model, tune_cfg, sq, recover=False):
"""Retrieves sq info to absorb the scale to the layer at output channel.

Args:
model (QuantizationInterceptionModule): a prepared model.
tune_cfg (OrderedDict): quantization config for ops.
sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.
recover (bool, optional): whether to recover the scale. Defaults to False.
"""
sq_max_info = {}
if sq.record_max_info:
sq_max_info = sq.max_value_info
Expand Down Expand Up @@ -354,13 +382,13 @@ def _apply_pre_optimization(model, tune_cfg, sq, recover=False):


def _ipex_post_quant_process(model, example_inputs, use_bf16, inplace=False):
"""Convert to a jit model.
"""Converts to a jit model.

Args:
model: a prepared model.
example_inputs: used to trace torch model.
use_bf16: whether to use bf16 for mixed precision.
inplace: whether to carry out model transformations in-place.
model (QuantizationInterceptionModule): a prepared model.
example_inputs (tensor/tuple/dict): used to trace torch model.
use_bf16 (bool): whether to use bf16 for mixed precision.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A converted jit model.
Expand Down
Loading
Loading