Skip to content

Commit

Permalink
Added unit test for qlinearconv
Browse files Browse the repository at this point in the history
  • Loading branch information
maggiesquadric committed Jan 17, 2025
1 parent f738d8b commit 03e3ea5
Show file tree
Hide file tree
Showing 5 changed files with 397 additions and 14 deletions.
15 changes: 7 additions & 8 deletions onnxruntime/core/mlas/lib/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2186,9 +2186,14 @@ MlasRequantizeOutputFixedPoint(
// New MlasRequantizeOuput but for fixed point not floating point
// Floating point conversion to fixed point is multiply by 2**n where n is the number of decimal places
// Then, interpret this number as a 32 bit int
int fractional_bits = 31;
// Need to wrap into vector to use function scalarToQfp
std::vector<float> ScaleValueVec = {*Scale}; // Create single-element vector
auto p = dataToQfp(ScaleValueVec, -1, 32, false); // Returns std::make_pair(qfp, fracBits)
int fracBits = p.second;
int mulScale = fracBits - 2;

int64_t* fpScale = new int64_t;
*fpScale = static_cast<int64_t>(*Scale * (1LL << fractional_bits));
*fpScale = static_cast<int64_t>(*Scale * (1LL << fracBits));


const int32_t PerMatrixScaleValue = PerColumnScale ? 0 : *fpScale;
Expand Down Expand Up @@ -2229,12 +2234,6 @@ MlasRequantizeOutputFixedPoint(

int64_t ScaleValue = PerColumnScale ? *fpscale++ : PerMatrixScaleValue;

// Need to wrap into vector to use function scalarToQfp
std::vector<float> ScaleValueVec = {*Scale}; // Create single-element vector
auto p = dataToQfp(ScaleValueVec, -1, 32, false); // Returns std::make_pair(qfp, fracBits)
int fracBits = p.second;
int mulScale = fracBits - 2;

int64_t largeInt = static_cast<int64_t>(IntegerValue) * ScaleValue; // This is a 29 fixed point
largeInt = largeInt >> mulScale;
IntegerValue = customRound<2>(static_cast<int32_t>(largeInt));
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -523,8 +523,6 @@ Status QLinearConv<ActType>::Compute(OpKernelContext* context) const {
// Test to see if we have access to enable_gpnpu flag
const bool gpnpu_flag = session_options.enable_gpnpu;

std::cout << "Check enable_gpnpu from qlinearconv.cc: " << gpnpu_flag << std::endl;

const Tensor* X = context->Input<Tensor>(InputTensors::IN_X);
const Tensor* W = is_W_packed_ ? nullptr : context->Input<Tensor>(InputTensors::IN_W);
const auto& W_shape = W ? W->Shape() : W_shape_;
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/python/onnxruntime_inference_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,6 @@ def __init__(

self._sess_options = sess_options
self._sess_options_initial = sess_options
print("here???")
# print(sess_options.gpnpu_mode)
self._enable_fallback = True
if "read_config_from_model" in kwargs:
self._read_config_from_model = int(kwargs["read_config_from_model"]) == 1
Expand Down
Loading

0 comments on commit 03e3ea5

Please sign in to comment.