Skip to content

Commit

Permalink
Update Vitis AI quantization to support ORT 1.16, support TensorData …
Browse files Browse the repository at this point in the history
…and QuantizationParams (#650)

## Describe your changes
Update Vitis AI quantization to support ORT 1.16, support TensorData and
QuantizationParams

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Format your code by running `pre-commit run --all-files`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.

## (Optional) Issue link
#629
  • Loading branch information
sheng-xiao authored Oct 17, 2023
1 parent efd83e0 commit 0bc3d5d
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 27 deletions.
4 changes: 2 additions & 2 deletions examples/resnet/resnet_vitis_ai_ptq_cpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
{
"name": "accuracy_custom",
"priority": 1, "higher_is_better": true,
"goal": {"type": "max-degradation", "value": 0.01}
"goal": {"type": "max-degradation", "value": 0.1}
}
],
"user_config":{
Expand All @@ -41,7 +41,7 @@
{
"name": "avg",
"priority": 2,
"goal": {"type": "percent-min-improvement", "value": 20}
"goal": {"type": "percent-min-improvement", "value": 10}
}
],
"user_config":{
Expand Down
6 changes: 0 additions & 6 deletions examples/test/test_resnet_vitis_ai_ptq_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from pathlib import Path

import pytest
from onnxruntime import __version__ as OrtVersion
from packaging import version
from utils import check_output, patch_config

from olive.common.utils import retry_func, run_subprocess
Expand All @@ -32,10 +30,6 @@ def setup():
@pytest.mark.parametrize("execution_order", ["pass-by-pass"])
@pytest.mark.parametrize("system", ["local_system", "aml_system"])
@pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"])
@pytest.mark.skipif(
version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"),
reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData",
)
def test_resnet(search_algorithm, execution_order, system, olive_json):
# TODO(jambayk): add gpu e2e test
from olive.workflows import run as olive_run
Expand Down
10 changes: 9 additions & 1 deletion olive/passes/onnx/vitis_ai/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,15 @@ def quantize_static(
)

calibrator.collect_data(calibration_data_reader)
tensors_range = calibrator.compute_range()
if is_ort_version_below_1_16():
tensors_range = calibrator.compute_range()
elif calibrate_method == PowerOfTwoMethod.MinMSE:
tensors_range = calibrator.compute_range()
from onnxruntime.quantization.calibrate import TensorsData

tensors_range = TensorsData(CalibrationMethod.MinMax, tensors_range)
else:
tensors_range = calibrator.compute_data()
del calibrator

if input_nodes or output_nodes:
Expand Down
30 changes: 20 additions & 10 deletions olive/passes/onnx/vitis_ai/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ def quantize_weight_per_channel(
return q_weight_name, zp_name, scale_name

def calculate_quantization_params(self):
from olive.passes.onnx.vitis_ai.quant_utils import is_ort_version_below_1_16

if self.tensors_range is None:
return

Expand All @@ -439,17 +441,28 @@ def calculate_quantization_params(self):
continue
if len(self.model.input_name_to_nodes()[node.input[0]]) != 1:
continue
if node.input[0] not in self.tensors_range.keys() or node.output[0] not in self.tensors_range.keys():
if node.input[0] not in self.tensors_range or node.output[0] not in self.tensors_range:
continue
self.tensors_range[node.input[0]] = self.tensors_range[node.output[0]]
quantization_params = {}
for tensor_name in self.tensors_range.keys():
rmin, rmax = self.tensors_range[tensor_name]
qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)
if is_ort_version_below_1_16():
for tensor_name in self.tensors_range.keys():
rmin, rmax = self.tensors_range[tensor_name]
qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)

quantization_params[tensor_name] = compute_scale_zp_pof2s(
rmin, rmax, qmin, qmax, self.is_activation_symmetric
)
quantization_params[tensor_name] = compute_scale_zp_pof2s(
rmin, rmax, qmin, qmax, self.is_activation_symmetric
)
else:
from onnxruntime.quantization.onnx_quantizer import QuantizationParams

for tensor_name in self.tensors_range:
td = self.tensors_range[tensor_name]
rmin, rmax = td.range_value
qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)

zero, scale = compute_scale_zp_pof2s(rmin, rmax, qmin, qmax, self.is_activation_symmetric)
quantization_params[tensor_name] = QuantizationParams(zero_point=zero, scale=scale)

return quantization_params

Expand Down Expand Up @@ -549,7 +562,6 @@ def __quantize_tensor(self, tensor_name, quant_sharing_param=None, tensor_type=Q
"""
Quantize tensors. If quant_param_tensor is not None, tensor with name tensor_name will be quantized with same
quantization parameters as tensor quant_param_tensor
Args:
tensor_name: name of the tensor to quantize
quant_sharing_param: name of the tensor that provides quantization parameter
Expand All @@ -569,7 +581,6 @@ def quantize_activation_tensor(self, tensor_name, quant_sharing_param=None):
Args:
tensor_name: name of the tensor to quantize
quant_sharing_param: name of the tensor that provides quantization parameter
"""
return self.__quantize_tensor(tensor_name, quant_sharing_param, QDQQuantTensorType.ACTIVATION)

Expand All @@ -579,7 +590,6 @@ def quantize_weight_tensor(self, tensor_name, quant_sharing_param=None):
Args:
tensor_name: name of the tensor to quantize
quant_sharing_param: name of the tensor that provides quantization parameter
"""
return self.__quantize_tensor(tensor_name, quant_sharing_param, QDQQuantTensorType.WEIGHT)

Expand Down
7 changes: 6 additions & 1 deletion olive/passes/onnx/vitis_ai_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,12 @@ def _quant_preprocess(self, model: ONNXModel, output_model_path: str) -> ONNXMod
from onnxruntime.quantization.preprocess import quant_pre_process

try:
quant_pre_process(input_model_path=model.model_path, output_model_path=output_model_path, auto_merge=True)
quant_pre_process(
input_model_path=model.model_path,
output_model_path=str(output_model_path),
auto_merge=True,
save_as_external_data=True,
)
except Exception as e:
# TODO(xiaosheng): try with `skip_optimization = True`
# quantization preprocessing will fail if the model is too large and `skip_optimization = False`
Expand Down
7 changes: 0 additions & 7 deletions test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
from test.unit_test.utils import get_onnx_model

import numpy as np
import pytest
from onnxruntime import __version__ as OrtVersion
from onnxruntime.quantization.calibrate import CalibrationDataReader
from packaging import version

from olive.passes.olive_pass import create_pass_from_dict
from olive.passes.onnx.vitis_ai_quantization import VitisAIQuantization
Expand All @@ -36,10 +33,6 @@ def dummy_calibration_reader(data_dir=None, batch_size=1, *args, **kwargs):
return RandomDataReader()


@pytest.mark.skipif(
version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"),
reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData",
)
def test_vitis_ai_quantization_pass(tmp_path):
# setup
input_model = get_onnx_model()
Expand Down

0 comments on commit 0bc3d5d

Please sign in to comment.