Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add symmetric quant in softmax #14640

Merged
merged 3 commits into from
Feb 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions onnxruntime/python/tools/quantization/operators/softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def quantize(self):
if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
out_scale = 1 / 256.0
out_zero_point = 0
elif self.quantizer.is_activation_symmetric:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to support symmetric in QLinearSoftmax also?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, qlinearXXX is the result of Q node merging with others

# results are all greater or equal to 0, so we can only use
# half of the range
out_scale = 1 / 127.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be 128.0? as we have closed interval [0,127]

Copy link
Contributor Author

@chenfucn chenfucn Feb 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the symmetric range is [-127,127], have to discard -128 to make it symmetric. This range has 254 unit intervals, half of it is 127 intervals. So the correct value is 1/127

out_zero_point = 0
else:
out_scale = 1 / 256.0
out_zero_point = -128
Expand Down
41 changes: 33 additions & 8 deletions onnxruntime/test/python/quantization/test_op_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
# --------------------------------------------------------------------------

import unittest
from pathlib import Path

import numpy as np
import onnx
from onnx import TensorProto, helper
from onnx import TensorProto, helper, numpy_helper
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type

from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
Expand Down Expand Up @@ -148,13 +149,33 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
weight_type=weight_type,
extra_options=extra_options,
)
qdqnode_counts = {
"Conv": 1,
"QuantizeLinear": 3,
"DequantizeLinear": 4,
"Softmax": 1,
}
check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)

result_model = onnx.load(Path(model_q8_qdq_path))
qnode_cnt = 0
dqnode_cnt = 0
softmax_cnt = 0
qnode_zeropoints = []
for node in result_model.graph.node:
if node.op_type == "QuantizeLinear":
qnode_cnt += 1
qnode_zeropoints.append(node.input[2])
elif node.op_type == "DequantizeLinear":
dqnode_cnt += 1
elif node.op_type == "Softmax":
softmax_cnt += 1
self.assertEqual(3, qnode_cnt, "Expected 3 QuantizeLinear nodes, found {}".format(qnode_cnt))
self.assertEqual(4, dqnode_cnt, "Expected 4 DequantizeLinear nodes, found {}".format(dqnode_cnt))
self.assertEqual(1, softmax_cnt, "Expected 1 Softmax node, found {}".format(softmax_cnt))
if extra_options.get("ActivationSymmetric", False):
for tensor in result_model.graph.initializer:
if tensor.name in qnode_zeropoints:
np_value = numpy_helper.to_array(tensor)
self.assertEqual(
0,
np_value,
"QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value),
)

qnode_io_qtypes = {
"QuantizeLinear": [
["i", 2, activation_proto_qtype],
Expand All @@ -169,6 +190,10 @@ def test_quantize_softmax(self):
self.quantize_softmax_test(QuantType.QUInt8, QuantType.QUInt8)

def test_quantize_softmax_s8s8(self):
self.quantize_softmax_test(
QuantType.QInt8,
QuantType.QInt8,
)
self.quantize_softmax_test(
QuantType.QInt8,
QuantType.QInt8,
Expand Down