diff --git a/onnxruntime/contrib_ops/cpu/quantize_linear.cc b/onnxruntime/contrib_ops/cpu/quantize_linear.cc index 48b90d847b02b..6f504c37aca78 100644 --- a/onnxruntime/contrib_ops/cpu/quantize_linear.cc +++ b/onnxruntime/contrib_ops/cpu/quantize_linear.cc @@ -4,6 +4,7 @@ #include "contrib_ops/cpu/quantize_linear.h" #include "core/providers/cpu/math/element_wise_ops.h" #include "core/providers/cpu/tensor/cast_op.h" +#include "core/providers/common.h" namespace onnxruntime { namespace contrib { @@ -40,51 +41,48 @@ Status DequantizeLinear::Compute(OpKernelContext* ctx) const { auto& x_zero_point = *ctx->Input(2); auto& y = *ctx->Output(0, x.Shape()); - TensorShape shape(0, 0); - std::unique_ptr reshaped_zero_point; - std::unique_ptr reshaped_scale; + const auto& x_shape = x.Shape(); + const auto& scale_shape = x_scale.Shape(); + const auto& zero_point_shape = x_zero_point.Shape(); + const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions()); + + size_t stride = 0; + const auto& broadcastDim = x_shape[axis]; - // if an axis was provided, build the shape necessary for broadcasting across that axis if (has_axis_) { - ONNXRUNTIME_ENFORCE(axis_ < static_cast(x.Shape().NumDimensions()), "axis greater than input data dimension!"); - std::vector shape_; - shape_.push_back(x_zero_point.Size()); - if (axis_ > 0) { - for (int64_t i = axis_ - 1; i >= 0; i--) { - shape_.push_back(1); - } - } - shape = TensorShape(shape_); + // if an axis was specified, ensure the scale and zero point are compatible + ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 1 && scale_shape.Size() == broadcastDim, "x_scale must be 1D tensor with size ", broadcastDim); + ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 1 && zero_point_shape.Size() == broadcastDim, "x_zero_point must be 1D tensor with size ", broadcastDim); + stride = 1; + } else { + // if no axis, enforce that scale and zero point are scalars + ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 0, "x_scale must be a scalar if no axis is provided"); + ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 0, "x_zero_point must be a scalar if no axis is provided"); + } - // reshape copies of the inputs for broadcasting. - TensorAllocator tensorAllocatorUint8(*ctx); - reshaped_zero_point = tensorAllocatorUint8.Allocate(shape); - memcpy(reshaped_zero_point->MutableDataRaw(), x_zero_point.DataRaw(), sizeof(T) * x_zero_point.Size()); + size_t N = x_shape.SizeToDimension(axis); + const T* zero_point = x_zero_point.template Data(); + const float* scale = x_scale.template Data(); + size_t block_size = x_shape.SizeFromDimension(axis + 1); + const T* input = x.template Data(); + float* output = y.template MutableData(); - TensorAllocator tensorAllocatorFloat(*ctx); - reshaped_scale = tensorAllocatorFloat.Allocate(shape); - memcpy(reshaped_scale->MutableDataRaw(), x_scale.DataRaw(), sizeof(float) * x_scale.Size()); - } + for (size_t n = 0; n < N; n++) { + const float* current_scale = scale; + const T* current_zero_point = zero_point; - TBroadcaster bc(x, has_axis_ ? *reshaped_zero_point : x_zero_point); - TBroadcastOutput output(bc.GetSpanSize(), y); - BroadcastLoop(bc, output, - [](EigenVectorMap output, T input0, ConstEigenVectorMap input1) { - output = (int32_t(input0) - input1.template cast().array()).template cast(); - }, - [](EigenVectorMap output, ConstEigenVectorMap input0, T input1) { - output = (input0.template cast().array() - int32_t(input1)).template cast(); - }, - [](EigenVectorMap output, ConstEigenVectorMap input0, ConstEigenVectorMap input1) { - output = (input0.template cast() - input1.template cast()).template cast(); - }); - - TBroadcaster bc2(y, has_axis_ ? *reshaped_scale : x_scale); - TBroadcastOutput output2(bc2.GetSpanSize(), y); - BroadcastLoop(bc2, output2, - [](EigenVectorMap output, float input0, ConstEigenVectorMap input1) { output = input0 * input1.array(); }, - [](EigenVectorMap output, ConstEigenVectorMap input0, float input1) { output = input0.array() * input1; }, - [](EigenVectorMap output, ConstEigenVectorMap input0, ConstEigenVectorMap input1) { output = input0.array() * input1.array(); }); + for (size_t bd = 0; bd < static_cast(broadcastDim); bd++) { + auto zp = static_cast(*current_zero_point); + auto sc = *current_scale; + + for (size_t bs = 0; bs < block_size; bs++) { + *output++ = static_cast(static_cast(*input++) - zp) * sc; + } + + current_scale += stride; + current_zero_point += stride; + } + } return Status::OK(); } @@ -117,60 +115,49 @@ Status QuantizeLinear::Compute(OpKernelContext* ctx) const { auto& y_zero_point = *ctx->Input(2); auto& y = *ctx->Output(0, x.Shape()); - TensorShape shape(0, 0); - std::unique_ptr reshaped_scale; + const auto& x_shape = x.Shape(); + const auto& scale_shape = y_scale.Shape(); + const auto& zero_point_shape = y_zero_point.Shape(); + const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions()); - TensorAllocator tensorAllocator(*ctx); + size_t stride = 0; + const auto& broadcastDim = x_shape[axis]; - // if an axis was provided, build the shape necessary for broadcasting across that axis if (has_axis_) { - ONNXRUNTIME_ENFORCE(axis_ < static_cast(x.Shape().NumDimensions()), "axis greater than input data dimension!"); - std::vector shape_; - shape_.push_back(y_zero_point.Size()); - if (axis_ > 0) { - for (int64_t i = axis_ - 1; i >= 0; i--) { - shape_.push_back(1); + // if an axis was specified, ensure the scale and zero point are compatible + ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 1 && scale_shape.Size() == broadcastDim, "x_scale must be 1D tensor with size ", broadcastDim); + ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 1 && zero_point_shape.Size() == broadcastDim, "x_zero_point must be 1D tensor with size ", broadcastDim); + stride = 1; + } else { + // if no axis, enforce that scale and zero point are scalars + ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 0, "x_scale must be a scalar if no axis is provided"); + ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 0, "x_zero_point must be a scalar if no axis is provided"); + } + + size_t N = x_shape.SizeToDimension(axis); + const uint8_t* zero_point = y_zero_point.template Data(); + const float* scale = y_scale.template Data(); + size_t block_size = x_shape.SizeFromDimension(axis + 1); + const float* input = x.template Data(); + uint8_t* output = y.template MutableData(); + + for (size_t n = 0; n < N; n++) { + const float* current_scale = scale; + const uint8_t* current_zero_point = zero_point; + + for (size_t bd = 0; bd < static_cast(broadcastDim); bd++) { + auto zp = static_cast(*current_zero_point); + auto sc = *current_scale; + + for (size_t bs = 0; bs < block_size; bs++) { + *output++ = static_cast(clamp(std::round(static_cast(*input++) / sc) + zp, 0.0f, float(UINT8_MAX))); } - } - shape = TensorShape(shape_); - reshaped_scale = tensorAllocator.Allocate(shape); - memcpy(reshaped_scale->MutableDataRaw(), y_scale.DataRaw(), sizeof(float) * y_scale.Size()); + current_scale += stride; + current_zero_point += stride; + } } - std::unique_ptr W = tensorAllocator.Allocate(x.Shape()); - Tensor* pW = W.get(); - - TBroadcaster bc(x, has_axis_ ? *reshaped_scale : y_scale); - TBroadcastOutput output2(bc.GetSpanSize(), *pW); - BroadcastLoop(bc, output2, - [](EigenVectorMap output, float input0, ConstEigenVectorMap input1) { output = (input0 / input1.array()).round(); }, - [](EigenVectorMap output, ConstEigenVectorMap input0, float input1) { output = (input0.array() / input1).round(); }, - [](EigenVectorMap output, ConstEigenVectorMap input0, ConstEigenVectorMap input1) { output = (input0.array() / input1.array()).round(); }); - - std::unique_ptr Zf = tensorAllocator.Allocate(has_axis_ ? shape : y_zero_point.Shape()); - Tensor* pZf = Zf.get(); - CastData(&y_zero_point, pZf, has_axis_ ? shape : y_zero_point.Shape()); - - TBroadcaster bc2(*pW, *pZf); - TBroadcastOutput output(bc2.GetSpanSize(), y); - BroadcastLoop(bc2, output, - [](EigenVectorMap output, float input0, ConstEigenVectorMap input1) { - for (std::ptrdiff_t i = 0; i < output.size(); i++) { - output[i] = uint8_t(clamp(input0 + float(input1[i]), 0.0f, float(UINT8_MAX))); - } - }, - [](EigenVectorMap output, ConstEigenVectorMap input0, float input1) { - for (std::ptrdiff_t i = 0; i < output.size(); i++) { - output[i] = uint8_t(clamp(input0[i] + float(input1), 0.0f, float(UINT8_MAX))); - } - }, - [](EigenVectorMap output, ConstEigenVectorMap input0, ConstEigenVectorMap input1) { - for (std::ptrdiff_t i = 0; i < output.size(); i++) { - output[i] = uint8_t(clamp(input0[i] + float(input1[i]), 0.0f, float(UINT8_MAX))); - } - }); - return Status::OK(); } } // namespace contrib diff --git a/onnxruntime/contrib_ops/cpu/quantize_linear.h b/onnxruntime/contrib_ops/cpu/quantize_linear.h index 1175e88fcf045..cc113af44551a 100644 --- a/onnxruntime/contrib_ops/cpu/quantize_linear.h +++ b/onnxruntime/contrib_ops/cpu/quantize_linear.h @@ -20,7 +20,7 @@ class DequantizeLinear final : public OpKernel { Status Compute(OpKernelContext* context) const override; private: - int64_t axis_; + int64_t axis_ = 0; bool has_axis_; }; @@ -34,7 +34,7 @@ class QuantizeLinear final : public OpKernel { Status Compute(OpKernelContext* context) const override; private: - int64_t axis_; + int64_t axis_ = 0; bool has_axis_; }; } // namespace contrib diff --git a/onnxruntime/test/contrib_ops/quantize_linear_test.cc b/onnxruntime/test/contrib_ops/quantize_linear_test.cc index b81536aee80cd..952f3f08f4aab 100644 --- a/onnxruntime/test/contrib_ops/quantize_linear_test.cc +++ b/onnxruntime/test/contrib_ops/quantize_linear_test.cc @@ -7,6 +7,7 @@ namespace onnxruntime { namespace test { +// scalar zero & scale with uint8 TEST(DequantizeLinearOpTest, DequantizeLinear_0) { OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); std::vector dims{4}; @@ -17,6 +18,7 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_0) { test.Run(); } +// scalar zero & scale with int8 TEST(DequantizeLinearOpTest, DequantizeLinear_1) { OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); std::vector dims{4}; @@ -27,6 +29,7 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_1) { test.Run(); } +// 1d zero & scale with uint8 broadcast axis 0 TEST(DequantizeLinearOpTest, DequantizeLinear_2) { OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); std::vector dims{3, 4}; @@ -34,9 +37,103 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_2) { {0, 1, 2, 3, 0, 1, 2, 3, 0, 10, 20, 30}); + test.AddAttribute("axis", 0); + test.AddInput("scale", {3}, + {1.0f, + 2.0f, + 4.0f}); + test.AddInput("zero_point", {3}, + {0, + 0, + 0}); + test.AddOutput("Y", dims, + {0, 1, 2, 3, + 0, 2, 4, 6, + 0, 40, 80, 120}); + test.Run(); +} + +// 1d zero & scale with int8 broadcast axis 1 +TEST(DequantizeLinearOpTest, DequantizeLinear_3) { + OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{3, 4}; + test.AddInput("X", dims, + {0, 1, 2, 3, + 0, 2, 4, 6, + 0, 10, 20, 30}); test.AddAttribute("axis", 1); - test.AddInput("scale", {3}, {1.0f, 2.0f, 4.0f}); - test.AddInput("zero_point", {3}, {0, 0, 0}); + test.AddInput("scale", {4}, {1, 2, 4, 8}); + test.AddInput("zero_point", {4}, {0, -10, -20, -30}); + test.AddOutput("Y", dims, + {0, 22, 88, 264, + 0, 24, 96, 288, + 0, 40, 160, 480}); + test.Run(); +} + +// 1d zero & scale with int8 broadcast axis 0 with 4d tensor input/output +TEST(DequantizeLinearOpTest, DequantizeLinear_4) { + OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{2, 3, 2, 4}; + test.AddInput("X", dims, + {7, 9, 10, 10, + 5, 8, 9, 1, + + 8, 6, 7, 9, + 10, 0, 7, 10, + + 8, 2, 6, 0, + 5, 9, 8, 1, + + 2, 7, 5, 3, + 2, 4, 1, 3, + + 8, 7, 4, 8, + 10, 1, 5, 5, + + 7, 7, 0, 2, + 4, 4, 0, 5}); + test.AddAttribute("axis", 1); + test.AddInput("scale", {3}, {1, 10, 7}); + test.AddInput("zero_point", {3}, {10, 2, 1}); + test.AddOutput("Y", dims, + {-3, -1, 0, 0, + -5, -2, -1, -9, + + 60, 40, 50, 70, + 80, -20, 50, 80, + + 49, 7, 35, -7, + 28, 56, 49, 0, + + -8, -3, -5, -7, + -8, -6, -9, -7, + + 60, 50, 20, 60, + 80, -10, 30, 30, + + 42, 42, -7, 7, + 21, 21, -7, 28}); + test.Run(); +} + +// 1d zero & scale with uint8 broadcast axis -2 (-2 resolves to axis 0) +TEST(DequantizeLinearOpTest, DequantizeLinear_5) { + OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{3, 4}; + test.AddInput("X", dims, + {0, 1, 2, 3, + 0, 1, 2, 3, + 0, 10, 20, 30}); + test.AddAttribute("axis", -2); + test.AddInput("scale", {3}, + {1.0f, + 2.0f, + 4.0f}); + test.AddInput("zero_point", {3}, + {0, + 0, + 0}); test.AddOutput("Y", dims, {0, 1, 2, 3, 0, 2, 4, 6, @@ -44,6 +141,7 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_2) { test.Run(); } +// quantize with scalar zero point and scale TEST(QuantizeLinearOpTest, QuantizeLinear_0) { OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain); std::vector dims{6}; @@ -54,19 +152,15 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_0) { test.Run(); } -// TODO this test is failing for Mac. needs to be debugged. -#if defined(__MACH__) -TEST(QuantizeLinearOpTest, DISABLED_QuantizeLinear_1) { -#else +// quantize with broadcasting TEST(QuantizeLinearOpTest, QuantizeLinear_1) { -#endif OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain); std::vector dims{3, 4}; test.AddInput("X", dims, {0, 2, 3, 1000, 0, 2, 3, 1000, 0, 2, 3, 1000}); - test.AddAttribute("axis", 1); + test.AddAttribute("axis", 0); test.AddInput("scale", {3}, {1, 2, 4}); test.AddInput("zero_point", {3}, {0, 0, 0}); test.AddOutput("Y", dims, @@ -76,5 +170,22 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_1) { test.Run(); } +// quantize with broadcasting and negative axis (-2 resolves to axis 0) +TEST(QuantizeLinearOpTest, QuantizeLinear_2) { + OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{3, 4}; + test.AddInput("X", dims, + {0, 2, 3, 1000, + 0, 2, 3, 1000, + 0, 2, 3, 1000}); + test.AddAttribute("axis", -2); + test.AddInput("scale", {3}, {1, 2, 4}); + test.AddInput("zero_point", {3}, {0, 0, 0}); + test.AddOutput("Y", dims, + {0, 2, 3, 255, + 0, 1, 2, 255, + 0, 1, 1, 250}); + test.Run(); +} } // namespace test } // namespace onnxruntime