Skip to content

Commit

Permalink
(de)quantize_linear optimizations (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
martinb35 authored Dec 4, 2018
1 parent 37385ec commit 744ec28
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 97 deletions.
161 changes: 74 additions & 87 deletions onnxruntime/contrib_ops/cpu/quantize_linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "contrib_ops/cpu/quantize_linear.h"
#include "core/providers/cpu/math/element_wise_ops.h"
#include "core/providers/cpu/tensor/cast_op.h"
#include "core/providers/common.h"

namespace onnxruntime {
namespace contrib {
Expand Down Expand Up @@ -40,51 +41,48 @@ Status DequantizeLinear<T>::Compute(OpKernelContext* ctx) const {
auto& x_zero_point = *ctx->Input<Tensor>(2);
auto& y = *ctx->Output(0, x.Shape());

TensorShape shape(0, 0);
std::unique_ptr<Tensor> reshaped_zero_point;
std::unique_ptr<Tensor> reshaped_scale;
const auto& x_shape = x.Shape();
const auto& scale_shape = x_scale.Shape();
const auto& zero_point_shape = x_zero_point.Shape();
const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions());

size_t stride = 0;
const auto& broadcastDim = x_shape[axis];

// if an axis was provided, build the shape necessary for broadcasting across that axis
if (has_axis_) {
ONNXRUNTIME_ENFORCE(axis_ < static_cast<int64_t>(x.Shape().NumDimensions()), "axis greater than input data dimension!");
std::vector<int64_t> shape_;
shape_.push_back(x_zero_point.Size());
if (axis_ > 0) {
for (int64_t i = axis_ - 1; i >= 0; i--) {
shape_.push_back(1);
}
}
shape = TensorShape(shape_);
// if an axis was specified, ensure the scale and zero point are compatible
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 1 && scale_shape.Size() == broadcastDim, "x_scale must be 1D tensor with size ", broadcastDim);
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 1 && zero_point_shape.Size() == broadcastDim, "x_zero_point must be 1D tensor with size ", broadcastDim);
stride = 1;
} else {
// if no axis, enforce that scale and zero point are scalars
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 0, "x_scale must be a scalar if no axis is provided");
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 0, "x_zero_point must be a scalar if no axis is provided");
}

// reshape copies of the inputs for broadcasting.
TensorAllocator<T> tensorAllocatorUint8(*ctx);
reshaped_zero_point = tensorAllocatorUint8.Allocate(shape);
memcpy(reshaped_zero_point->MutableDataRaw(), x_zero_point.DataRaw(), sizeof(T) * x_zero_point.Size());
size_t N = x_shape.SizeToDimension(axis);
const T* zero_point = x_zero_point.template Data<T>();
const float* scale = x_scale.template Data<float>();
size_t block_size = x_shape.SizeFromDimension(axis + 1);
const T* input = x.template Data<T>();
float* output = y.template MutableData<float>();

TensorAllocator<float> tensorAllocatorFloat(*ctx);
reshaped_scale = tensorAllocatorFloat.Allocate(shape);
memcpy(reshaped_scale->MutableDataRaw(), x_scale.DataRaw(), sizeof(float) * x_scale.Size());
}
for (size_t n = 0; n < N; n++) {
const float* current_scale = scale;
const T* current_zero_point = zero_point;

TBroadcaster<T> bc(x, has_axis_ ? *reshaped_zero_point : x_zero_point);
TBroadcastOutput<float> output(bc.GetSpanSize(), y);
BroadcastLoop(bc, output,
[](EigenVectorMap<float> output, T input0, ConstEigenVectorMap<T> input1) {
output = (int32_t(input0) - input1.template cast<int32_t>().array()).template cast<float>();
},
[](EigenVectorMap<float> output, ConstEigenVectorMap<T> input0, T input1) {
output = (input0.template cast<int32_t>().array() - int32_t(input1)).template cast<float>();
},
[](EigenVectorMap<float> output, ConstEigenVectorMap<T> input0, ConstEigenVectorMap<T> input1) {
output = (input0.template cast<int32_t>() - input1.template cast<int32_t>()).template cast<float>();
});

TBroadcaster<float> bc2(y, has_axis_ ? *reshaped_scale : x_scale);
TBroadcastOutput<float> output2(bc2.GetSpanSize(), y);
BroadcastLoop(bc2, output2,
[](EigenVectorMap<float> output, float input0, ConstEigenVectorMap<float> input1) { output = input0 * input1.array(); },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, float input1) { output = input0.array() * input1; },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, ConstEigenVectorMap<float> input1) { output = input0.array() * input1.array(); });
for (size_t bd = 0; bd < static_cast<size_t>(broadcastDim); bd++) {
auto zp = static_cast<const int>(*current_zero_point);
auto sc = *current_scale;

for (size_t bs = 0; bs < block_size; bs++) {
*output++ = static_cast<float>(static_cast<const int>(*input++) - zp) * sc;
}

current_scale += stride;
current_zero_point += stride;
}
}

return Status::OK();
}
Expand Down Expand Up @@ -117,60 +115,49 @@ Status QuantizeLinear<float>::Compute(OpKernelContext* ctx) const {
auto& y_zero_point = *ctx->Input<Tensor>(2);
auto& y = *ctx->Output(0, x.Shape());

TensorShape shape(0, 0);
std::unique_ptr<Tensor> reshaped_scale;
const auto& x_shape = x.Shape();
const auto& scale_shape = y_scale.Shape();
const auto& zero_point_shape = y_zero_point.Shape();
const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions());

TensorAllocator<float> tensorAllocator(*ctx);
size_t stride = 0;
const auto& broadcastDim = x_shape[axis];

// if an axis was provided, build the shape necessary for broadcasting across that axis
if (has_axis_) {
ONNXRUNTIME_ENFORCE(axis_ < static_cast<int64_t>(x.Shape().NumDimensions()), "axis greater than input data dimension!");
std::vector<int64_t> shape_;
shape_.push_back(y_zero_point.Size());
if (axis_ > 0) {
for (int64_t i = axis_ - 1; i >= 0; i--) {
shape_.push_back(1);
// if an axis was specified, ensure the scale and zero point are compatible
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 1 && scale_shape.Size() == broadcastDim, "x_scale must be 1D tensor with size ", broadcastDim);
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 1 && zero_point_shape.Size() == broadcastDim, "x_zero_point must be 1D tensor with size ", broadcastDim);
stride = 1;
} else {
// if no axis, enforce that scale and zero point are scalars
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 0, "x_scale must be a scalar if no axis is provided");
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 0, "x_zero_point must be a scalar if no axis is provided");
}

size_t N = x_shape.SizeToDimension(axis);
const uint8_t* zero_point = y_zero_point.template Data<uint8_t>();
const float* scale = y_scale.template Data<float>();
size_t block_size = x_shape.SizeFromDimension(axis + 1);
const float* input = x.template Data<float>();
uint8_t* output = y.template MutableData<uint8_t>();

for (size_t n = 0; n < N; n++) {
const float* current_scale = scale;
const uint8_t* current_zero_point = zero_point;

for (size_t bd = 0; bd < static_cast<size_t>(broadcastDim); bd++) {
auto zp = static_cast<const float>(*current_zero_point);
auto sc = *current_scale;

for (size_t bs = 0; bs < block_size; bs++) {
*output++ = static_cast<uint8_t>(clamp(std::round(static_cast<float>(*input++) / sc) + zp, 0.0f, float(UINT8_MAX)));
}
}
shape = TensorShape(shape_);

reshaped_scale = tensorAllocator.Allocate(shape);
memcpy(reshaped_scale->MutableDataRaw(), y_scale.DataRaw(), sizeof(float) * y_scale.Size());
current_scale += stride;
current_zero_point += stride;
}
}

std::unique_ptr<Tensor> W = tensorAllocator.Allocate(x.Shape());
Tensor* pW = W.get();

TBroadcaster<float> bc(x, has_axis_ ? *reshaped_scale : y_scale);
TBroadcastOutput<float> output2(bc.GetSpanSize(), *pW);
BroadcastLoop(bc, output2,
[](EigenVectorMap<float> output, float input0, ConstEigenVectorMap<float> input1) { output = (input0 / input1.array()).round(); },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, float input1) { output = (input0.array() / input1).round(); },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, ConstEigenVectorMap<float> input1) { output = (input0.array() / input1.array()).round(); });

std::unique_ptr<Tensor> Zf = tensorAllocator.Allocate(has_axis_ ? shape : y_zero_point.Shape());
Tensor* pZf = Zf.get();
CastData<uint8_t, float>(&y_zero_point, pZf, has_axis_ ? shape : y_zero_point.Shape());

TBroadcaster<float> bc2(*pW, *pZf);
TBroadcastOutput<uint8_t> output(bc2.GetSpanSize(), y);
BroadcastLoop(bc2, output,
[](EigenVectorMap<uint8_t> output, float input0, ConstEigenVectorMap<float> input1) {
for (std::ptrdiff_t i = 0; i < output.size(); i++) {
output[i] = uint8_t(clamp(input0 + float(input1[i]), 0.0f, float(UINT8_MAX)));
}
},
[](EigenVectorMap<uint8_t> output, ConstEigenVectorMap<float> input0, float input1) {
for (std::ptrdiff_t i = 0; i < output.size(); i++) {
output[i] = uint8_t(clamp(input0[i] + float(input1), 0.0f, float(UINT8_MAX)));
}
},
[](EigenVectorMap<uint8_t> output, ConstEigenVectorMap<float> input0, ConstEigenVectorMap<float> input1) {
for (std::ptrdiff_t i = 0; i < output.size(); i++) {
output[i] = uint8_t(clamp(input0[i] + float(input1[i]), 0.0f, float(UINT8_MAX)));
}
});

return Status::OK();
}
} // namespace contrib
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/contrib_ops/cpu/quantize_linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DequantizeLinear final : public OpKernel {
Status Compute(OpKernelContext* context) const override;

private:
int64_t axis_;
int64_t axis_ = 0;
bool has_axis_;
};

Expand All @@ -34,7 +34,7 @@ class QuantizeLinear final : public OpKernel {
Status Compute(OpKernelContext* context) const override;

private:
int64_t axis_;
int64_t axis_ = 0;
bool has_axis_;
};
} // namespace contrib
Expand Down
127 changes: 119 additions & 8 deletions onnxruntime/test/contrib_ops/quantize_linear_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
namespace onnxruntime {
namespace test {

// scalar zero & scale with uint8
TEST(DequantizeLinearOpTest, DequantizeLinear_0) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{4};
Expand All @@ -17,6 +18,7 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_0) {
test.Run();
}

// scalar zero & scale with int8
TEST(DequantizeLinearOpTest, DequantizeLinear_1) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{4};
Expand All @@ -27,23 +29,119 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_1) {
test.Run();
}

// 1d zero & scale with uint8 broadcast axis 0
TEST(DequantizeLinearOpTest, DequantizeLinear_2) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("scale", {3},
{1.0f,
2.0f,
4.0f});
test.AddInput<uint8_t>("zero_point", {3},
{0,
0,
0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 40, 80, 120});
test.Run();
}

// 1d zero & scale with int8 broadcast axis 1
TEST(DequantizeLinearOpTest, DequantizeLinear_3) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<int8_t>("X", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 1);
test.AddInput<float>("scale", {3}, {1.0f, 2.0f, 4.0f});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddInput<float>("scale", {4}, {1, 2, 4, 8});
test.AddInput<int8_t>("zero_point", {4}, {0, -10, -20, -30});
test.AddOutput<float>("Y", dims,
{0, 22, 88, 264,
0, 24, 96, 288,
0, 40, 160, 480});
test.Run();
}

// 1d zero & scale with int8 broadcast axis 0 with 4d tensor input/output
TEST(DequantizeLinearOpTest, DequantizeLinear_4) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{2, 3, 2, 4};
test.AddInput<int8_t>("X", dims,
{7, 9, 10, 10,
5, 8, 9, 1,

8, 6, 7, 9,
10, 0, 7, 10,

8, 2, 6, 0,
5, 9, 8, 1,

2, 7, 5, 3,
2, 4, 1, 3,

8, 7, 4, 8,
10, 1, 5, 5,

7, 7, 0, 2,
4, 4, 0, 5});
test.AddAttribute<int64_t>("axis", 1);
test.AddInput<float>("scale", {3}, {1, 10, 7});
test.AddInput<int8_t>("zero_point", {3}, {10, 2, 1});
test.AddOutput<float>("Y", dims,
{-3, -1, 0, 0,
-5, -2, -1, -9,

60, 40, 50, 70,
80, -20, 50, 80,

49, 7, 35, -7,
28, 56, 49, 0,

-8, -3, -5, -7,
-8, -6, -9, -7,

60, 50, 20, 60,
80, -10, 30, 30,

42, 42, -7, 7,
21, 21, -7, 28});
test.Run();
}

// 1d zero & scale with uint8 broadcast axis -2 (-2 resolves to axis 0)
TEST(DequantizeLinearOpTest, DequantizeLinear_5) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", -2);
test.AddInput<float>("scale", {3},
{1.0f,
2.0f,
4.0f});
test.AddInput<uint8_t>("zero_point", {3},
{0,
0,
0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 40, 80, 120});
test.Run();
}

// quantize with scalar zero point and scale
TEST(QuantizeLinearOpTest, QuantizeLinear_0) {
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{6};
Expand All @@ -54,19 +152,15 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_0) {
test.Run();
}

// TODO this test is failing for Mac. needs to be debugged.
#if defined(__MACH__)
TEST(QuantizeLinearOpTest, DISABLED_QuantizeLinear_1) {
#else
// quantize with broadcasting
TEST(QuantizeLinearOpTest, QuantizeLinear_1) {
#endif
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddAttribute<int64_t>("axis", 1);
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("scale", {3}, {1, 2, 4});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddOutput<uint8_t>("Y", dims,
Expand All @@ -76,5 +170,22 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_1) {
test.Run();
}

// quantize with broadcasting and negative axis (-2 resolves to axis 0)
TEST(QuantizeLinearOpTest, QuantizeLinear_2) {
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddAttribute<int64_t>("axis", -2);
test.AddInput<float>("scale", {3}, {1, 2, 4});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddOutput<uint8_t>("Y", dims,
{0, 2, 3, 255,
0, 1, 2, 255,
0, 1, 1, 250});
test.Run();
}
} // namespace test
} // namespace onnxruntime

0 comments on commit 744ec28

Please sign in to comment.