Update Vitis AI quantization to support ORT 1.16, support TensorData …

…and QuantizationParams (#650) ## Describe your changes Update Vitis AI quantization to support ORT 1.16, support TensorData and QuantizationParams ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Format your code by running `pre-commit run --all-files` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link #629
microsoft · Oct 17, 2023 · 0bc3d5d · 0bc3d5d
1 parent efd83e0
commit 0bc3d5d
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 27 deletions.
diff --git a/examples/resnet/resnet_vitis_ai_ptq_cpu.json b/examples/resnet/resnet_vitis_ai_ptq_cpu.json
@@ -24,7 +24,7 @@
                         {
                             "name": "accuracy_custom",
                             "priority": 1, "higher_is_better": true,
-                            "goal": {"type": "max-degradation", "value": 0.01}
+                            "goal": {"type": "max-degradation", "value": 0.1}
                         }
                     ],
                     "user_config":{
@@ -41,7 +41,7 @@
                         {
                             "name": "avg",
                             "priority": 2,
-                            "goal": {"type": "percent-min-improvement", "value": 20}
+                            "goal": {"type": "percent-min-improvement", "value": 10}
                         }
                     ],
                     "user_config":{

diff --git a/examples/test/test_resnet_vitis_ai_ptq_cpu.py b/examples/test/test_resnet_vitis_ai_ptq_cpu.py
@@ -6,8 +6,6 @@
 from pathlib import Path
 
 import pytest
-from onnxruntime import __version__ as OrtVersion
-from packaging import version
 from utils import check_output, patch_config
 
 from olive.common.utils import retry_func, run_subprocess
@@ -32,10 +30,6 @@ def setup():
 @pytest.mark.parametrize("execution_order", ["pass-by-pass"])
 @pytest.mark.parametrize("system", ["local_system", "aml_system"])
 @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"])
-@pytest.mark.skipif(
-    version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"),
-    reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData",
-)
 def test_resnet(search_algorithm, execution_order, system, olive_json):
     # TODO(jambayk): add gpu e2e test
     from olive.workflows import run as olive_run

diff --git a/olive/passes/onnx/vitis_ai/quantize.py b/olive/passes/onnx/vitis_ai/quantize.py
@@ -207,7 +207,15 @@ def quantize_static(
         )
 
         calibrator.collect_data(calibration_data_reader)
-        tensors_range = calibrator.compute_range()
+        if is_ort_version_below_1_16():
+            tensors_range = calibrator.compute_range()
+        elif calibrate_method == PowerOfTwoMethod.MinMSE:
+            tensors_range = calibrator.compute_range()
+            from onnxruntime.quantization.calibrate import TensorsData
+
+            tensors_range = TensorsData(CalibrationMethod.MinMax, tensors_range)
+        else:
+            tensors_range = calibrator.compute_data()
         del calibrator
 
     if input_nodes or output_nodes:

diff --git a/olive/passes/onnx/vitis_ai/quantizer.py b/olive/passes/onnx/vitis_ai/quantizer.py
@@ -426,6 +426,8 @@ def quantize_weight_per_channel(
         return q_weight_name, zp_name, scale_name
 
     def calculate_quantization_params(self):
+        from olive.passes.onnx.vitis_ai.quant_utils import is_ort_version_below_1_16
+
         if self.tensors_range is None:
             return
 
@@ -439,17 +441,28 @@ def calculate_quantization_params(self):
                 continue
             if len(self.model.input_name_to_nodes()[node.input[0]]) != 1:
                 continue
-            if node.input[0] not in self.tensors_range.keys() or node.output[0] not in self.tensors_range.keys():
+            if node.input[0] not in self.tensors_range or node.output[0] not in self.tensors_range:
                 continue
             self.tensors_range[node.input[0]] = self.tensors_range[node.output[0]]
         quantization_params = {}
-        for tensor_name in self.tensors_range.keys():
-            rmin, rmax = self.tensors_range[tensor_name]
-            qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)
+        if is_ort_version_below_1_16():
+            for tensor_name in self.tensors_range.keys():
+                rmin, rmax = self.tensors_range[tensor_name]
+                qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)
 
-            quantization_params[tensor_name] = compute_scale_zp_pof2s(
-                rmin, rmax, qmin, qmax, self.is_activation_symmetric
-            )
+                quantization_params[tensor_name] = compute_scale_zp_pof2s(
+                    rmin, rmax, qmin, qmax, self.is_activation_symmetric
+                )
+        else:
+            from onnxruntime.quantization.onnx_quantizer import QuantizationParams
+
+            for tensor_name in self.tensors_range:
+                td = self.tensors_range[tensor_name]
+                rmin, rmax = td.range_value
+                qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)
+
+                zero, scale = compute_scale_zp_pof2s(rmin, rmax, qmin, qmax, self.is_activation_symmetric)
+                quantization_params[tensor_name] = QuantizationParams(zero_point=zero, scale=scale)
 
         return quantization_params
 
@@ -549,7 +562,6 @@ def __quantize_tensor(self, tensor_name, quant_sharing_param=None, tensor_type=Q
         """
         Quantize tensors. If quant_param_tensor is not None, tensor with name tensor_name will be quantized with same
         quantization parameters as tensor quant_param_tensor
-
         Args:
             tensor_name: name of the tensor to quantize
             quant_sharing_param: name of the tensor that provides quantization parameter
@@ -569,7 +581,6 @@ def quantize_activation_tensor(self, tensor_name, quant_sharing_param=None):
         Args:
             tensor_name: name of the tensor to quantize
             quant_sharing_param: name of the tensor that provides quantization parameter
-
         """
         return self.__quantize_tensor(tensor_name, quant_sharing_param, QDQQuantTensorType.ACTIVATION)
 
@@ -579,7 +590,6 @@ def quantize_weight_tensor(self, tensor_name, quant_sharing_param=None):
         Args:
             tensor_name: name of the tensor to quantize
             quant_sharing_param: name of the tensor that provides quantization parameter
-
         """
         return self.__quantize_tensor(tensor_name, quant_sharing_param, QDQQuantTensorType.WEIGHT)
 

diff --git a/olive/passes/onnx/vitis_ai_quantization.py b/olive/passes/onnx/vitis_ai_quantization.py
@@ -362,7 +362,12 @@ def _quant_preprocess(self, model: ONNXModel, output_model_path: str) -> ONNXMod
         from onnxruntime.quantization.preprocess import quant_pre_process
 
         try:
-            quant_pre_process(input_model_path=model.model_path, output_model_path=output_model_path, auto_merge=True)
+            quant_pre_process(
+                input_model_path=model.model_path,
+                output_model_path=str(output_model_path),
+                auto_merge=True,
+                save_as_external_data=True,
+            )
         except Exception as e:
             # TODO(xiaosheng): try with `skip_optimization = True`
             # quantization preprocessing will fail if the model is too large and `skip_optimization = False`

diff --git a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py
@@ -6,10 +6,7 @@
 from test.unit_test.utils import get_onnx_model
 
 import numpy as np
-import pytest
-from onnxruntime import __version__ as OrtVersion
 from onnxruntime.quantization.calibrate import CalibrationDataReader
-from packaging import version
 
 from olive.passes.olive_pass import create_pass_from_dict
 from olive.passes.onnx.vitis_ai_quantization import VitisAIQuantization
@@ -36,10 +33,6 @@ def dummy_calibration_reader(data_dir=None, batch_size=1, *args, **kwargs):
     return RandomDataReader()
 
 
-@pytest.mark.skipif(
-    version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"),
-    reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData",
-)
 def test_vitis_ai_quantization_pass(tmp_path):
     # setup
     input_model = get_onnx_model()