From 0bc3d5d8b19af0d2cf8157a1d77b4c375c6876aa Mon Sep 17 00:00:00 2001 From: Xiao Sheng Date: Tue, 17 Oct 2023 18:34:23 +0800 Subject: [PATCH] Update Vitis AI quantization to support ORT 1.16, support TensorData and QuantizationParams (#650) ## Describe your changes Update Vitis AI quantization to support ORT 1.16, support TensorData and QuantizationParams ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Format your code by running `pre-commit run --all-files` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link https://github.com/microsoft/Olive/issues/629 --- examples/resnet/resnet_vitis_ai_ptq_cpu.json | 4 +-- examples/test/test_resnet_vitis_ai_ptq_cpu.py | 6 ---- olive/passes/onnx/vitis_ai/quantize.py | 10 ++++++- olive/passes/onnx/vitis_ai/quantizer.py | 30 ++++++++++++------- olive/passes/onnx/vitis_ai_quantization.py | 7 ++++- .../vitis_ai/test_vitis_ai_quantization.py | 7 ----- 6 files changed, 37 insertions(+), 27 deletions(-) diff --git a/examples/resnet/resnet_vitis_ai_ptq_cpu.json b/examples/resnet/resnet_vitis_ai_ptq_cpu.json index 1ee0242b3..cf9ae2076 100644 --- a/examples/resnet/resnet_vitis_ai_ptq_cpu.json +++ b/examples/resnet/resnet_vitis_ai_ptq_cpu.json @@ -24,7 +24,7 @@ { "name": "accuracy_custom", "priority": 1, "higher_is_better": true, - "goal": {"type": "max-degradation", "value": 0.01} + "goal": {"type": "max-degradation", "value": 0.1} } ], "user_config":{ @@ -41,7 +41,7 @@ { "name": "avg", "priority": 2, - "goal": {"type": "percent-min-improvement", "value": 20} + "goal": {"type": "percent-min-improvement", "value": 10} } ], "user_config":{ diff --git a/examples/test/test_resnet_vitis_ai_ptq_cpu.py b/examples/test/test_resnet_vitis_ai_ptq_cpu.py index fff11065e..c13f2c183 100644 --- a/examples/test/test_resnet_vitis_ai_ptq_cpu.py +++ b/examples/test/test_resnet_vitis_ai_ptq_cpu.py @@ -6,8 +6,6 @@ from pathlib import Path import pytest -from onnxruntime import __version__ as OrtVersion -from packaging import version from utils import check_output, patch_config from olive.common.utils import retry_func, run_subprocess @@ -32,10 +30,6 @@ def setup(): @pytest.mark.parametrize("execution_order", ["pass-by-pass"]) @pytest.mark.parametrize("system", ["local_system", "aml_system"]) @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"]) -@pytest.mark.skipif( - version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"), - reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData", -) def test_resnet(search_algorithm, execution_order, system, olive_json): # TODO(jambayk): add gpu e2e test from olive.workflows import run as olive_run diff --git a/olive/passes/onnx/vitis_ai/quantize.py b/olive/passes/onnx/vitis_ai/quantize.py index 33835f2cd..73dc526aa 100644 --- a/olive/passes/onnx/vitis_ai/quantize.py +++ b/olive/passes/onnx/vitis_ai/quantize.py @@ -207,7 +207,15 @@ def quantize_static( ) calibrator.collect_data(calibration_data_reader) - tensors_range = calibrator.compute_range() + if is_ort_version_below_1_16(): + tensors_range = calibrator.compute_range() + elif calibrate_method == PowerOfTwoMethod.MinMSE: + tensors_range = calibrator.compute_range() + from onnxruntime.quantization.calibrate import TensorsData + + tensors_range = TensorsData(CalibrationMethod.MinMax, tensors_range) + else: + tensors_range = calibrator.compute_data() del calibrator if input_nodes or output_nodes: diff --git a/olive/passes/onnx/vitis_ai/quantizer.py b/olive/passes/onnx/vitis_ai/quantizer.py index 1d49d7e9c..e1b704ede 100644 --- a/olive/passes/onnx/vitis_ai/quantizer.py +++ b/olive/passes/onnx/vitis_ai/quantizer.py @@ -426,6 +426,8 @@ def quantize_weight_per_channel( return q_weight_name, zp_name, scale_name def calculate_quantization_params(self): + from olive.passes.onnx.vitis_ai.quant_utils import is_ort_version_below_1_16 + if self.tensors_range is None: return @@ -439,17 +441,28 @@ def calculate_quantization_params(self): continue if len(self.model.input_name_to_nodes()[node.input[0]]) != 1: continue - if node.input[0] not in self.tensors_range.keys() or node.output[0] not in self.tensors_range.keys(): + if node.input[0] not in self.tensors_range or node.output[0] not in self.tensors_range: continue self.tensors_range[node.input[0]] = self.tensors_range[node.output[0]] quantization_params = {} - for tensor_name in self.tensors_range.keys(): - rmin, rmax = self.tensors_range[tensor_name] - qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric) + if is_ort_version_below_1_16(): + for tensor_name in self.tensors_range.keys(): + rmin, rmax = self.tensors_range[tensor_name] + qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric) - quantization_params[tensor_name] = compute_scale_zp_pof2s( - rmin, rmax, qmin, qmax, self.is_activation_symmetric - ) + quantization_params[tensor_name] = compute_scale_zp_pof2s( + rmin, rmax, qmin, qmax, self.is_activation_symmetric + ) + else: + from onnxruntime.quantization.onnx_quantizer import QuantizationParams + + for tensor_name in self.tensors_range: + td = self.tensors_range[tensor_name] + rmin, rmax = td.range_value + qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric) + + zero, scale = compute_scale_zp_pof2s(rmin, rmax, qmin, qmax, self.is_activation_symmetric) + quantization_params[tensor_name] = QuantizationParams(zero_point=zero, scale=scale) return quantization_params @@ -549,7 +562,6 @@ def __quantize_tensor(self, tensor_name, quant_sharing_param=None, tensor_type=Q """ Quantize tensors. If quant_param_tensor is not None, tensor with name tensor_name will be quantized with same quantization parameters as tensor quant_param_tensor - Args: tensor_name: name of the tensor to quantize quant_sharing_param: name of the tensor that provides quantization parameter @@ -569,7 +581,6 @@ def quantize_activation_tensor(self, tensor_name, quant_sharing_param=None): Args: tensor_name: name of the tensor to quantize quant_sharing_param: name of the tensor that provides quantization parameter - """ return self.__quantize_tensor(tensor_name, quant_sharing_param, QDQQuantTensorType.ACTIVATION) @@ -579,7 +590,6 @@ def quantize_weight_tensor(self, tensor_name, quant_sharing_param=None): Args: tensor_name: name of the tensor to quantize quant_sharing_param: name of the tensor that provides quantization parameter - """ return self.__quantize_tensor(tensor_name, quant_sharing_param, QDQQuantTensorType.WEIGHT) diff --git a/olive/passes/onnx/vitis_ai_quantization.py b/olive/passes/onnx/vitis_ai_quantization.py index afbc95e86..bb708723c 100644 --- a/olive/passes/onnx/vitis_ai_quantization.py +++ b/olive/passes/onnx/vitis_ai_quantization.py @@ -362,7 +362,12 @@ def _quant_preprocess(self, model: ONNXModel, output_model_path: str) -> ONNXMod from onnxruntime.quantization.preprocess import quant_pre_process try: - quant_pre_process(input_model_path=model.model_path, output_model_path=output_model_path, auto_merge=True) + quant_pre_process( + input_model_path=model.model_path, + output_model_path=str(output_model_path), + auto_merge=True, + save_as_external_data=True, + ) except Exception as e: # TODO(xiaosheng): try with `skip_optimization = True` # quantization preprocessing will fail if the model is too large and `skip_optimization = False` diff --git a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py index cbd34ab00..6282c6832 100644 --- a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py +++ b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py @@ -6,10 +6,7 @@ from test.unit_test.utils import get_onnx_model import numpy as np -import pytest -from onnxruntime import __version__ as OrtVersion from onnxruntime.quantization.calibrate import CalibrationDataReader -from packaging import version from olive.passes.olive_pass import create_pass_from_dict from olive.passes.onnx.vitis_ai_quantization import VitisAIQuantization @@ -36,10 +33,6 @@ def dummy_calibration_reader(data_dir=None, batch_size=1, *args, **kwargs): return RandomDataReader() -@pytest.mark.skipif( - version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"), - reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData", -) def test_vitis_ai_quantization_pass(tmp_path): # setup input_model = get_onnx_model()