Added unit test for qlinearconv

quadric-io · Jan 17, 2025 · 03e3ea5 · 03e3ea5
1 parent f738d8b
commit 03e3ea5
Show file tree

Hide file tree

Showing 5 changed files with 397 additions and 14 deletions.
diff --git a/onnxruntime/core/mlas/lib/quantize.cpp b/onnxruntime/core/mlas/lib/quantize.cpp
@@ -2186,9 +2186,14 @@ MlasRequantizeOutputFixedPoint(
     // New MlasRequantizeOuput but for fixed point not floating point
     // Floating point conversion to fixed point is multiply by 2**n where n is the number of decimal places
     // Then, interpret this number as a 32 bit int
-    int fractional_bits = 31;
+    // Need to wrap into vector to use function scalarToQfp
+    std::vector<float> ScaleValueVec = {*Scale};  // Create single-element vector
+    auto p = dataToQfp(ScaleValueVec, -1, 32, false); // Returns std::make_pair(qfp, fracBits)
+    int fracBits = p.second;
+    int mulScale = fracBits - 2;
+
     int64_t* fpScale = new int64_t;
-    *fpScale = static_cast<int64_t>(*Scale * (1LL << fractional_bits));
+    *fpScale = static_cast<int64_t>(*Scale * (1LL << fracBits));
 
 
     const int32_t PerMatrixScaleValue = PerColumnScale ? 0 : *fpScale;
@@ -2229,12 +2234,6 @@ MlasRequantizeOutputFixedPoint(
 
             int64_t ScaleValue = PerColumnScale ? *fpscale++ : PerMatrixScaleValue;
 
-            // Need to wrap into vector to use function scalarToQfp
-            std::vector<float> ScaleValueVec = {*Scale};  // Create single-element vector
-            auto p = dataToQfp(ScaleValueVec, -1, 32, false); // Returns std::make_pair(qfp, fracBits)
-            int fracBits = p.second;
-            int mulScale = fracBits - 2;
-
             int64_t largeInt = static_cast<int64_t>(IntegerValue) * ScaleValue; // This is a 29 fixed point
             largeInt = largeInt >> mulScale;
             IntegerValue = customRound<2>(static_cast<int32_t>(largeInt));

diff --git a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
@@ -523,8 +523,6 @@ Status QLinearConv<ActType>::Compute(OpKernelContext* context) const {
   // Test to see if we have access to enable_gpnpu flag
   const bool gpnpu_flag = session_options.enable_gpnpu;
 
-  std::cout << "Check enable_gpnpu from qlinearconv.cc: " << gpnpu_flag << std::endl;
-
   const Tensor* X = context->Input<Tensor>(InputTensors::IN_X);
   const Tensor* W = is_W_packed_ ? nullptr : context->Input<Tensor>(InputTensors::IN_W);
   const auto& W_shape = W ? W->Shape() : W_shape_;

diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py
@@ -452,8 +452,6 @@ def __init__(
 
         self._sess_options = sess_options
         self._sess_options_initial = sess_options
-        print("here???")
-        # print(sess_options.gpnpu_mode)
         self._enable_fallback = True
         if "read_config_from_model" in kwargs:
             self._read_config_from_model = int(kwargs["read_config_from_model"]) == 1