From 42a9c0d7d145eaa312b016994b4c7520cbc76195 Mon Sep 17 00:00:00 2001 From: Chen Fu <1316708+chenfucn@users.noreply.github.com> Date: Wed, 8 Feb 2023 20:47:46 -0800 Subject: [PATCH 1/3] add symmetric quant in softmax --- .../tools/quantization/operators/softmax.py | 5 +++ .../python/quantization/test_op_softmax.py | 38 +++++++++++++++---- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/onnxruntime/python/tools/quantization/operators/softmax.py b/onnxruntime/python/tools/quantization/operators/softmax.py index e0a3bcabdc80e..caa82d3756a4b 100644 --- a/onnxruntime/python/tools/quantization/operators/softmax.py +++ b/onnxruntime/python/tools/quantization/operators/softmax.py @@ -80,6 +80,11 @@ def quantize(self): if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8: out_scale = 1 / 256.0 out_zero_point = 0 + elif self.quantizer.is_activation_symmetric: + # results are all greater or equal to 0, so we can only use + # half of the range + out_scale = 1 / 127.0 + out_zero_point = 0 else: out_scale = 1 / 256.0 out_zero_point = -128 diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py index add97f9ebc00a..a41cecd1aec18 100644 --- a/onnxruntime/test/python/quantization/test_op_softmax.py +++ b/onnxruntime/test/python/quantization/test_op_softmax.py @@ -10,10 +10,11 @@ # -------------------------------------------------------------------------- import unittest +from pathlib import Path import numpy as np import onnx -from onnx import TensorProto, helper +from onnx import TensorProto, helper, numpy_helper from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type from onnxruntime.quantization import QuantFormat, QuantType, quantize_static @@ -148,13 +149,30 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}): weight_type=weight_type, extra_options=extra_options, ) - qdqnode_counts = { - "Conv": 1, - "QuantizeLinear": 3, - "DequantizeLinear": 4, - "Softmax": 1, - } - check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts) + + result_model = onnx.load(Path(model_q8_qdq_path)) + qnode_cnt = 0 + dqnode_cnt = 0 + softmax_cnt = 0 + qnode_zeropoints = [] + for node in result_model.graph.node: + match node.op_type: + case "QuantizeLinear": + qnode_cnt +=1 + qnode_zeropoints.append(node.input[2]) + case "DequantizeLinear": + dqnode_cnt += 1 + case "Softmax": + softmax_cnt += 1 + self.assertEqual(3, qnode_cnt, "Expected 3 QuantizeLinear nodes, found {}".format(qnode_cnt)) + self.assertEqual(4, dqnode_cnt, "Expected 4 DequantizeLinear nodes, found {}".format(dqnode_cnt)) + self.assertEqual(1, softmax_cnt, "Expected 1 Softmax node, found {}".format(softmax_cnt)) + if extra_options.get("ActivationSymmetric", False): + for tensor in result_model.graph.initializer: + if tensor.name in qnode_zeropoints: + np_value = numpy_helper.to_array(tensor) + self.assertEqual(0, np_value, "QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value)) + qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], @@ -169,6 +187,10 @@ def test_quantize_softmax(self): self.quantize_softmax_test(QuantType.QUInt8, QuantType.QUInt8) def test_quantize_softmax_s8s8(self): + self.quantize_softmax_test( + QuantType.QInt8, + QuantType.QInt8, + ) self.quantize_softmax_test( QuantType.QInt8, QuantType.QInt8, From 544498ef9e9781cf8f85c654d02cee034b1ab581 Mon Sep 17 00:00:00 2001 From: Chen Fu <1316708+chenfucn@users.noreply.github.com> Date: Thu, 9 Feb 2023 09:01:43 -0800 Subject: [PATCH 2/3] build pipeline does not support 'match' keyword --- .../test/python/quantization/test_op_softmax.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py index a41cecd1aec18..44d32148485db 100644 --- a/onnxruntime/test/python/quantization/test_op_softmax.py +++ b/onnxruntime/test/python/quantization/test_op_softmax.py @@ -156,14 +156,13 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}): softmax_cnt = 0 qnode_zeropoints = [] for node in result_model.graph.node: - match node.op_type: - case "QuantizeLinear": - qnode_cnt +=1 - qnode_zeropoints.append(node.input[2]) - case "DequantizeLinear": - dqnode_cnt += 1 - case "Softmax": - softmax_cnt += 1 + if node.op_type == "QuantizeLinear": + qnode_cnt +=1 + qnode_zeropoints.append(node.input[2]) + elif node.op_type == "DequantizeLinear": + dqnode_cnt += 1 + elif node.op_type == "Softmax": + softmax_cnt += 1 self.assertEqual(3, qnode_cnt, "Expected 3 QuantizeLinear nodes, found {}".format(qnode_cnt)) self.assertEqual(4, dqnode_cnt, "Expected 4 DequantizeLinear nodes, found {}".format(dqnode_cnt)) self.assertEqual(1, softmax_cnt, "Expected 1 Softmax node, found {}".format(softmax_cnt)) From de834828fec59e1ef7815ca5ac1587e2e9ce6105 Mon Sep 17 00:00:00 2001 From: Chen Fu <1316708+chenfucn@users.noreply.github.com> Date: Thu, 9 Feb 2023 09:25:54 -0800 Subject: [PATCH 3/3] format --- onnxruntime/test/python/quantization/test_op_softmax.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py index 44d32148485db..e8fb00a312084 100644 --- a/onnxruntime/test/python/quantization/test_op_softmax.py +++ b/onnxruntime/test/python/quantization/test_op_softmax.py @@ -157,7 +157,7 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}): qnode_zeropoints = [] for node in result_model.graph.node: if node.op_type == "QuantizeLinear": - qnode_cnt +=1 + qnode_cnt += 1 qnode_zeropoints.append(node.input[2]) elif node.op_type == "DequantizeLinear": dqnode_cnt += 1 @@ -170,7 +170,11 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}): for tensor in result_model.graph.initializer: if tensor.name in qnode_zeropoints: np_value = numpy_helper.to_array(tensor) - self.assertEqual(0, np_value, "QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value)) + self.assertEqual( + 0, + np_value, + "QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value), + ) qnode_io_qtypes = { "QuantizeLinear": [