From 3c27b95f2e1fbf8e6b36ffebd33a51c51ee040e9 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 18 Jun 2024 14:06:32 +0800 Subject: [PATCH 1/5] dump op statistics Signed-off-by: Kaihui-intel --- .../torch/algorithms/weight_only/utility.py | 4 +- .../torch/quantization/algorithm_entry.py | 9 +- neural_compressor/torch/utils/utility.py | 97 +++++++++++++++++++ 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index ce13990c00f..02b20251461 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math - import torch +import prettytable as pt from neural_compressor.torch.utils import accelerator, device_synchronize, logger @@ -1134,3 +1133,4 @@ def convert_dtype_str2torch(str_dtype): return torch.bfloat16 else: assert False, "Unsupported str dtype {} to torch dtype".format(str_dtype) + \ No newline at end of file diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index cf429c2118f..d068ccf9701 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -45,7 +45,7 @@ StaticQuantConfig, TEQConfig, ) -from neural_compressor.torch.utils import get_quantizer, is_ipex_imported, logger, postprocess_model, register_algo +from neural_compressor.torch.utils import get_quantizer, is_ipex_imported, logger, postprocess_model, register_algo, dump_model_op_stats from neural_compressor.torch.utils.constants import PT2E_DYNAMIC_QUANT, PT2E_STATIC_QUANT @@ -89,6 +89,8 @@ def rtn_entry( model.qconfig = configs_mapping model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) + dump_model_op_stats(mode, configs_mapping) + return model @@ -141,6 +143,7 @@ def gptq_entry( model.qconfig = configs_mapping model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) + dump_model_op_stats(mode, configs_mapping) return model @@ -361,6 +364,7 @@ def awq_quantize_entry( model.qconfig = configs_mapping model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) + dump_model_op_stats(mode, configs_mapping) return model @@ -415,6 +419,7 @@ def teq_quantize_entry( model.qconfig = configs_mapping model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) + dump_model_op_stats(mode, configs_mapping) return model @@ -491,6 +496,7 @@ def autoround_quantize_entry( model.qconfig = configs_mapping model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) + dump_model_op_stats(mode, configs_mapping) return model @@ -511,6 +517,7 @@ def hqq_entry( quantizer = get_quantizer(model, quantizer_cls=HQQuantizer, quant_config=configs_mapping) model = quantizer.execute(model, mode=mode) postprocess_model(model, mode, quantizer) + dump_model_op_stats(mode, configs_mapping) return model diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py index b7855d506e6..38bb5f343c2 100644 --- a/neural_compressor/torch/utils/utility.py +++ b/neural_compressor/torch/utils/utility.py @@ -16,6 +16,7 @@ from typing import Callable, Dict, List, Tuple, Union import torch +import prettytable as pt from typing_extensions import TypeAlias from neural_compressor.common.utils import LazyImport, Mode, logger @@ -163,3 +164,99 @@ def postprocess_model(model, mode, quantizer): elif mode == Mode.CONVERT or mode == Mode.QUANTIZE: if getattr(model, "quantizer", False): del model.quantizer + +class Statistics: # pragma: no cover + """The statistics printer.""" + + def __init__(self, data, header, field_names, output_handle=logger.info): + """Init a Statistics object. + + Args: + data: The statistics data + header: The table header + field_names: The field names + output_handle: The output logging method + """ + self.field_names = field_names + self.header = header + self.data = data + self.output_handle = output_handle + self.tb = pt.PrettyTable(min_table_width=40) + + def print_stat(self): + """Print the statistics.""" + valid_field_names = [] + for index, value in enumerate(self.field_names): + if index < 2: + valid_field_names.append(value) + continue + + if any(i[index] for i in self.data): + valid_field_names.append(value) + self.tb.field_names = valid_field_names + for i in self.data: + tmp_data = [] + for index, value in enumerate(i): + if self.field_names[index] in valid_field_names: + tmp_data.append(value) + if any(tmp_data[1:]): + self.tb.add_row(tmp_data) + lines = self.tb.get_string().split("\n") + self.output_handle("|" + self.header.center(len(lines[0]) - 2, "*") + "|") + for i in lines: + self.output_handle(i) + +def dump_model_op_stats(mode, tune_cfg): + """This is a function to dump quantizable ops of model to user. + + Args: + model (object): input model + tune_cfg (dict): quantization config + Returns: + None + """ + if mode == Mode.PREPARE: + return + res = {} + # collect all dtype info and build empty results with existing op_type + dtype_set = set() + for op, config in tune_cfg.items(): + op_type = op[1] + config = config.to_dict() + # import pdb; pdb.set_trace() + if not config["dtype"] == "fp32": + num_bits = config["bits"] + group_size = config["group_size"] + dtype_str = "A32W{}G{}".format(num_bits, group_size) + dtype_set.add(dtype_str) + dtype_set.add("FP32") + dtype_list = list(dtype_set) + dtype_list.sort() + + for op, config in tune_cfg.items(): + config = config.to_dict() + op_type = op[1] + if op_type not in res.keys(): + res[op_type] = {dtype: 0 for dtype in dtype_list} + + # fill in results with op_type and dtype + for op, config in tune_cfg.items(): + config = config.to_dict() + if config["dtype"] == "fp32": + res[op_type]["FP32"] += 1 + else: + num_bits = config["bits"] + group_size = config["group_size"] + dtype_str = "A32W{}G{}".format(num_bits, group_size) + res[op_type][dtype_str] += 1 + + # update stats format for dump. + field_names = ["Op Type", "Total"] + field_names.extend(dtype_list) + output_data = [] + for op_type in res.keys(): + field_results = [op_type, sum(res[op_type].values())] + field_results.extend([res[op_type][dtype] for dtype in dtype_list]) + output_data.append(field_results) + + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() \ No newline at end of file From 8e9e5a7aad221274aaef8e3427ad0eaa4e9a752c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 06:12:23 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../torch/algorithms/weight_only/utility.py | 3 +- .../torch/quantization/algorithm_entry.py | 11 +- neural_compressor/torch/utils/utility.py | 110 +++++++++--------- 3 files changed, 66 insertions(+), 58 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index 02b20251461..0a45e63a716 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import torch import prettytable as pt +import torch from neural_compressor.torch.utils import accelerator, device_synchronize, logger @@ -1133,4 +1133,3 @@ def convert_dtype_str2torch(str_dtype): return torch.bfloat16 else: assert False, "Unsupported str dtype {} to torch dtype".format(str_dtype) - \ No newline at end of file diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index d068ccf9701..b815b07b5f8 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -45,7 +45,14 @@ StaticQuantConfig, TEQConfig, ) -from neural_compressor.torch.utils import get_quantizer, is_ipex_imported, logger, postprocess_model, register_algo, dump_model_op_stats +from neural_compressor.torch.utils import ( + dump_model_op_stats, + get_quantizer, + is_ipex_imported, + logger, + postprocess_model, + register_algo, +) from neural_compressor.torch.utils.constants import PT2E_DYNAMIC_QUANT, PT2E_STATIC_QUANT @@ -90,7 +97,7 @@ def rtn_entry( model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) dump_model_op_stats(mode, configs_mapping) - + return model diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py index 38bb5f343c2..975e98b3fda 100644 --- a/neural_compressor/torch/utils/utility.py +++ b/neural_compressor/torch/utils/utility.py @@ -15,8 +15,8 @@ from typing import Callable, Dict, List, Tuple, Union -import torch import prettytable as pt +import torch from typing_extensions import TypeAlias from neural_compressor.common.utils import LazyImport, Mode, logger @@ -165,6 +165,7 @@ def postprocess_model(model, mode, quantizer): if getattr(model, "quantizer", False): del model.quantizer + class Statistics: # pragma: no cover """The statistics printer.""" @@ -205,58 +206,59 @@ def print_stat(self): self.output_handle("|" + self.header.center(len(lines[0]) - 2, "*") + "|") for i in lines: self.output_handle(i) - + + def dump_model_op_stats(mode, tune_cfg): - """This is a function to dump quantizable ops of model to user. + """This is a function to dump quantizable ops of model to user. - Args: - model (object): input model - tune_cfg (dict): quantization config - Returns: - None - """ - if mode == Mode.PREPARE: - return - res = {} - # collect all dtype info and build empty results with existing op_type - dtype_set = set() - for op, config in tune_cfg.items(): - op_type = op[1] - config = config.to_dict() - # import pdb; pdb.set_trace() - if not config["dtype"] == "fp32": - num_bits = config["bits"] - group_size = config["group_size"] - dtype_str = "A32W{}G{}".format(num_bits, group_size) - dtype_set.add(dtype_str) - dtype_set.add("FP32") - dtype_list = list(dtype_set) - dtype_list.sort() - - for op, config in tune_cfg.items(): - config = config.to_dict() - op_type = op[1] - if op_type not in res.keys(): - res[op_type] = {dtype: 0 for dtype in dtype_list} - - # fill in results with op_type and dtype - for op, config in tune_cfg.items(): - config = config.to_dict() - if config["dtype"] == "fp32": - res[op_type]["FP32"] += 1 - else: - num_bits = config["bits"] - group_size = config["group_size"] - dtype_str = "A32W{}G{}".format(num_bits, group_size) - res[op_type][dtype_str] += 1 - - # update stats format for dump. - field_names = ["Op Type", "Total"] - field_names.extend(dtype_list) - output_data = [] - for op_type in res.keys(): - field_results = [op_type, sum(res[op_type].values())] - field_results.extend([res[op_type][dtype] for dtype in dtype_list]) - output_data.append(field_results) - - Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() \ No newline at end of file + Args: + model (object): input model + tune_cfg (dict): quantization config + Returns: + None + """ + if mode == Mode.PREPARE: + return + res = {} + # collect all dtype info and build empty results with existing op_type + dtype_set = set() + for op, config in tune_cfg.items(): + op_type = op[1] + config = config.to_dict() + # import pdb; pdb.set_trace() + if not config["dtype"] == "fp32": + num_bits = config["bits"] + group_size = config["group_size"] + dtype_str = "A32W{}G{}".format(num_bits, group_size) + dtype_set.add(dtype_str) + dtype_set.add("FP32") + dtype_list = list(dtype_set) + dtype_list.sort() + + for op, config in tune_cfg.items(): + config = config.to_dict() + op_type = op[1] + if op_type not in res.keys(): + res[op_type] = {dtype: 0 for dtype in dtype_list} + + # fill in results with op_type and dtype + for op, config in tune_cfg.items(): + config = config.to_dict() + if config["dtype"] == "fp32": + res[op_type]["FP32"] += 1 + else: + num_bits = config["bits"] + group_size = config["group_size"] + dtype_str = "A32W{}G{}".format(num_bits, group_size) + res[op_type][dtype_str] += 1 + + # update stats format for dump. + field_names = ["Op Type", "Total"] + field_names.extend(dtype_list) + output_data = [] + for op_type in res.keys(): + field_results = [op_type, sum(res[op_type].values())] + field_results.extend([res[op_type][dtype] for dtype in dtype_list]) + output_data.append(field_results) + + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() From ad8073bef1dcfbdf8fd5d84956a66e125355f184 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 18 Jun 2024 14:54:03 +0800 Subject: [PATCH 3/5] add prettytable into requirements_pt Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/utility.py | 1 - neural_compressor/torch/quantization/algorithm_entry.py | 1 - requirements_pt.txt | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index 02b20251461..39a082833ea 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -13,7 +13,6 @@ # limitations under the License. import torch -import prettytable as pt from neural_compressor.torch.utils import accelerator, device_synchronize, logger diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index d068ccf9701..d939d3eb884 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -90,7 +90,6 @@ def rtn_entry( model.save = MethodType(save, model) postprocess_model(model, mode, quantizer) dump_model_op_stats(mode, configs_mapping) - return model diff --git a/requirements_pt.txt b/requirements_pt.txt index a164be3d24f..94667b64665 100644 --- a/requirements_pt.txt +++ b/requirements_pt.txt @@ -1,5 +1,6 @@ numpy < 2.0 peft==0.10.0 +prettytable psutil py-cpuinfo pydantic From f099c927296ce705202bad3b3a37bff5893a169f Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 18 Jun 2024 16:46:48 +0800 Subject: [PATCH 4/5] remove alias Signed-off-by: Kaihui-intel --- neural_compressor/torch/utils/utility.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py index 975e98b3fda..212dbfe7bef 100644 --- a/neural_compressor/torch/utils/utility.py +++ b/neural_compressor/torch/utils/utility.py @@ -15,9 +15,9 @@ from typing import Callable, Dict, List, Tuple, Union -import prettytable as pt import torch from typing_extensions import TypeAlias +from prettytable import PrettyTable from neural_compressor.common.utils import LazyImport, Mode, logger @@ -182,7 +182,7 @@ def __init__(self, data, header, field_names, output_handle=logger.info): self.header = header self.data = data self.output_handle = output_handle - self.tb = pt.PrettyTable(min_table_width=40) + self.tb = PrettyTable(min_table_width=40) def print_stat(self): """Print the statistics.""" From 56ae0889ab6cd05c084f02fe056dd10ace715a87 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 08:50:01 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/torch/utils/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py index 212dbfe7bef..e1c869dca45 100644 --- a/neural_compressor/torch/utils/utility.py +++ b/neural_compressor/torch/utils/utility.py @@ -16,8 +16,8 @@ from typing import Callable, Dict, List, Tuple, Union import torch -from typing_extensions import TypeAlias from prettytable import PrettyTable +from typing_extensions import TypeAlias from neural_compressor.common.utils import LazyImport, Mode, logger