From 42a9c0d7d145eaa312b016994b4c7520cbc76195 Mon Sep 17 00:00:00 2001
From: Chen Fu <1316708+chenfucn@users.noreply.github.com>
Date: Wed, 8 Feb 2023 20:47:46 -0800
Subject: [PATCH 1/3] add symmetric quant in softmax

---
 .../tools/quantization/operators/softmax.py   |  5 +++
 .../python/quantization/test_op_softmax.py    | 38 +++++++++++++++----
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/python/tools/quantization/operators/softmax.py b/onnxruntime/python/tools/quantization/operators/softmax.py
index e0a3bcabdc80e..caa82d3756a4b 100644
--- a/onnxruntime/python/tools/quantization/operators/softmax.py
+++ b/onnxruntime/python/tools/quantization/operators/softmax.py
@@ -80,6 +80,11 @@ def quantize(self):
         if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
             out_scale = 1 / 256.0
             out_zero_point = 0
+        elif self.quantizer.is_activation_symmetric:
+            # results are all greater or equal to 0, so we can only use
+            # half of the range
+            out_scale = 1 / 127.0
+            out_zero_point = 0
         else:
             out_scale = 1 / 256.0
             out_zero_point = -128
diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py
index add97f9ebc00a..a41cecd1aec18 100644
--- a/onnxruntime/test/python/quantization/test_op_softmax.py
+++ b/onnxruntime/test/python/quantization/test_op_softmax.py
@@ -10,10 +10,11 @@
 # --------------------------------------------------------------------------
 
 import unittest
+from pathlib import Path
 
 import numpy as np
 import onnx
-from onnx import TensorProto, helper
+from onnx import TensorProto, helper, numpy_helper
 from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
 
 from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
@@ -148,13 +149,30 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
             weight_type=weight_type,
             extra_options=extra_options,
         )
-        qdqnode_counts = {
-            "Conv": 1,
-            "QuantizeLinear": 3,
-            "DequantizeLinear": 4,
-            "Softmax": 1,
-        }
-        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
+
+        result_model = onnx.load(Path(model_q8_qdq_path))
+        qnode_cnt = 0
+        dqnode_cnt = 0
+        softmax_cnt = 0
+        qnode_zeropoints = []
+        for node in result_model.graph.node:
+            match node.op_type:
+                case "QuantizeLinear":
+                    qnode_cnt +=1
+                    qnode_zeropoints.append(node.input[2])
+                case "DequantizeLinear":
+                    dqnode_cnt += 1
+                case "Softmax":
+                    softmax_cnt += 1
+        self.assertEqual(3, qnode_cnt, "Expected 3 QuantizeLinear nodes, found {}".format(qnode_cnt))
+        self.assertEqual(4, dqnode_cnt, "Expected 4 DequantizeLinear nodes, found {}".format(dqnode_cnt))
+        self.assertEqual(1, softmax_cnt, "Expected 1 Softmax node, found {}".format(softmax_cnt))
+        if extra_options.get("ActivationSymmetric", False):
+            for tensor in result_model.graph.initializer:
+                if tensor.name in qnode_zeropoints:
+                    np_value = numpy_helper.to_array(tensor)
+                    self.assertEqual(0, np_value, "QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value))
+
         qnode_io_qtypes = {
             "QuantizeLinear": [
                 ["i", 2, activation_proto_qtype],
@@ -169,6 +187,10 @@ def test_quantize_softmax(self):
         self.quantize_softmax_test(QuantType.QUInt8, QuantType.QUInt8)
 
     def test_quantize_softmax_s8s8(self):
+        self.quantize_softmax_test(
+            QuantType.QInt8,
+            QuantType.QInt8,
+        )
         self.quantize_softmax_test(
             QuantType.QInt8,
             QuantType.QInt8,

From 544498ef9e9781cf8f85c654d02cee034b1ab581 Mon Sep 17 00:00:00 2001
From: Chen Fu <1316708+chenfucn@users.noreply.github.com>
Date: Thu, 9 Feb 2023 09:01:43 -0800
Subject: [PATCH 2/3] build pipeline does not support 'match' keyword

---
 .../test/python/quantization/test_op_softmax.py   | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py
index a41cecd1aec18..44d32148485db 100644
--- a/onnxruntime/test/python/quantization/test_op_softmax.py
+++ b/onnxruntime/test/python/quantization/test_op_softmax.py
@@ -156,14 +156,13 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
         softmax_cnt = 0
         qnode_zeropoints = []
         for node in result_model.graph.node:
-            match node.op_type:
-                case "QuantizeLinear":
-                    qnode_cnt +=1
-                    qnode_zeropoints.append(node.input[2])
-                case "DequantizeLinear":
-                    dqnode_cnt += 1
-                case "Softmax":
-                    softmax_cnt += 1
+            if node.op_type == "QuantizeLinear":
+                qnode_cnt +=1
+                qnode_zeropoints.append(node.input[2])
+            elif node.op_type == "DequantizeLinear":
+                dqnode_cnt += 1
+            elif node.op_type == "Softmax":
+                softmax_cnt += 1
         self.assertEqual(3, qnode_cnt, "Expected 3 QuantizeLinear nodes, found {}".format(qnode_cnt))
         self.assertEqual(4, dqnode_cnt, "Expected 4 DequantizeLinear nodes, found {}".format(dqnode_cnt))
         self.assertEqual(1, softmax_cnt, "Expected 1 Softmax node, found {}".format(softmax_cnt))

From de834828fec59e1ef7815ca5ac1587e2e9ce6105 Mon Sep 17 00:00:00 2001
From: Chen Fu <1316708+chenfucn@users.noreply.github.com>
Date: Thu, 9 Feb 2023 09:25:54 -0800
Subject: [PATCH 3/3] format

---
 onnxruntime/test/python/quantization/test_op_softmax.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py
index 44d32148485db..e8fb00a312084 100644
--- a/onnxruntime/test/python/quantization/test_op_softmax.py
+++ b/onnxruntime/test/python/quantization/test_op_softmax.py
@@ -157,7 +157,7 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
         qnode_zeropoints = []
         for node in result_model.graph.node:
             if node.op_type == "QuantizeLinear":
-                qnode_cnt +=1
+                qnode_cnt += 1
                 qnode_zeropoints.append(node.input[2])
             elif node.op_type == "DequantizeLinear":
                 dqnode_cnt += 1
@@ -170,7 +170,11 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
             for tensor in result_model.graph.initializer:
                 if tensor.name in qnode_zeropoints:
                     np_value = numpy_helper.to_array(tensor)
-                    self.assertEqual(0, np_value, "QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value))
+                    self.assertEqual(
+                        0,
+                        np_value,
+                        "QuantizeLinear node zero point value must be 0, found {} instead!".format(np_value),
+                    )
 
         qnode_io_qtypes = {
             "QuantizeLinear": [