Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(de)quantize_linear optimizations #76

Merged
merged 1 commit into from
Dec 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 74 additions & 87 deletions onnxruntime/contrib_ops/cpu/quantize_linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "contrib_ops/cpu/quantize_linear.h"
#include "core/providers/cpu/math/element_wise_ops.h"
#include "core/providers/cpu/tensor/cast_op.h"
#include "core/providers/common.h"

namespace onnxruntime {
namespace contrib {
Expand Down Expand Up @@ -40,51 +41,48 @@ Status DequantizeLinear<T>::Compute(OpKernelContext* ctx) const {
auto& x_zero_point = *ctx->Input<Tensor>(2);
auto& y = *ctx->Output(0, x.Shape());

TensorShape shape(0, 0);
std::unique_ptr<Tensor> reshaped_zero_point;
std::unique_ptr<Tensor> reshaped_scale;
const auto& x_shape = x.Shape();
const auto& scale_shape = x_scale.Shape();
const auto& zero_point_shape = x_zero_point.Shape();
const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions());

size_t stride = 0;
const auto& broadcastDim = x_shape[axis];

// if an axis was provided, build the shape necessary for broadcasting across that axis
if (has_axis_) {
ONNXRUNTIME_ENFORCE(axis_ < static_cast<int64_t>(x.Shape().NumDimensions()), "axis greater than input data dimension!");
std::vector<int64_t> shape_;
shape_.push_back(x_zero_point.Size());
if (axis_ > 0) {
for (int64_t i = axis_ - 1; i >= 0; i--) {
shape_.push_back(1);
}
}
shape = TensorShape(shape_);
// if an axis was specified, ensure the scale and zero point are compatible
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 1 && scale_shape.Size() == broadcastDim, "x_scale must be 1D tensor with size ", broadcastDim);
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 1 && zero_point_shape.Size() == broadcastDim, "x_zero_point must be 1D tensor with size ", broadcastDim);
stride = 1;
} else {
// if no axis, enforce that scale and zero point are scalars
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 0, "x_scale must be a scalar if no axis is provided");
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 0, "x_zero_point must be a scalar if no axis is provided");
}

// reshape copies of the inputs for broadcasting.
TensorAllocator<T> tensorAllocatorUint8(*ctx);
reshaped_zero_point = tensorAllocatorUint8.Allocate(shape);
memcpy(reshaped_zero_point->MutableDataRaw(), x_zero_point.DataRaw(), sizeof(T) * x_zero_point.Size());
size_t N = x_shape.SizeToDimension(axis);
const T* zero_point = x_zero_point.template Data<T>();
const float* scale = x_scale.template Data<float>();
size_t block_size = x_shape.SizeFromDimension(axis + 1);
const T* input = x.template Data<T>();
float* output = y.template MutableData<float>();

TensorAllocator<float> tensorAllocatorFloat(*ctx);
reshaped_scale = tensorAllocatorFloat.Allocate(shape);
memcpy(reshaped_scale->MutableDataRaw(), x_scale.DataRaw(), sizeof(float) * x_scale.Size());
}
for (size_t n = 0; n < N; n++) {
const float* current_scale = scale;
const T* current_zero_point = zero_point;

TBroadcaster<T> bc(x, has_axis_ ? *reshaped_zero_point : x_zero_point);
TBroadcastOutput<float> output(bc.GetSpanSize(), y);
BroadcastLoop(bc, output,
[](EigenVectorMap<float> output, T input0, ConstEigenVectorMap<T> input1) {
output = (int32_t(input0) - input1.template cast<int32_t>().array()).template cast<float>();
},
[](EigenVectorMap<float> output, ConstEigenVectorMap<T> input0, T input1) {
output = (input0.template cast<int32_t>().array() - int32_t(input1)).template cast<float>();
},
[](EigenVectorMap<float> output, ConstEigenVectorMap<T> input0, ConstEigenVectorMap<T> input1) {
output = (input0.template cast<int32_t>() - input1.template cast<int32_t>()).template cast<float>();
});

TBroadcaster<float> bc2(y, has_axis_ ? *reshaped_scale : x_scale);
TBroadcastOutput<float> output2(bc2.GetSpanSize(), y);
BroadcastLoop(bc2, output2,
[](EigenVectorMap<float> output, float input0, ConstEigenVectorMap<float> input1) { output = input0 * input1.array(); },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, float input1) { output = input0.array() * input1; },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, ConstEigenVectorMap<float> input1) { output = input0.array() * input1.array(); });
for (size_t bd = 0; bd < static_cast<size_t>(broadcastDim); bd++) {
auto zp = static_cast<const int>(*current_zero_point);
auto sc = *current_scale;

for (size_t bs = 0; bs < block_size; bs++) {
*output++ = static_cast<float>(static_cast<const int>(*input++) - zp) * sc;
}

current_scale += stride;
current_zero_point += stride;
}
}

return Status::OK();
}
Expand Down Expand Up @@ -117,60 +115,49 @@ Status QuantizeLinear<float>::Compute(OpKernelContext* ctx) const {
auto& y_zero_point = *ctx->Input<Tensor>(2);
auto& y = *ctx->Output(0, x.Shape());

TensorShape shape(0, 0);
std::unique_ptr<Tensor> reshaped_scale;
const auto& x_shape = x.Shape();
const auto& scale_shape = y_scale.Shape();
const auto& zero_point_shape = y_zero_point.Shape();
const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions());

TensorAllocator<float> tensorAllocator(*ctx);
size_t stride = 0;
const auto& broadcastDim = x_shape[axis];

// if an axis was provided, build the shape necessary for broadcasting across that axis
if (has_axis_) {
ONNXRUNTIME_ENFORCE(axis_ < static_cast<int64_t>(x.Shape().NumDimensions()), "axis greater than input data dimension!");
std::vector<int64_t> shape_;
shape_.push_back(y_zero_point.Size());
if (axis_ > 0) {
for (int64_t i = axis_ - 1; i >= 0; i--) {
shape_.push_back(1);
// if an axis was specified, ensure the scale and zero point are compatible
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 1 && scale_shape.Size() == broadcastDim, "x_scale must be 1D tensor with size ", broadcastDim);
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 1 && zero_point_shape.Size() == broadcastDim, "x_zero_point must be 1D tensor with size ", broadcastDim);
stride = 1;
} else {
// if no axis, enforce that scale and zero point are scalars
ONNXRUNTIME_ENFORCE(scale_shape.NumDimensions() == 0, "x_scale must be a scalar if no axis is provided");
ONNXRUNTIME_ENFORCE(zero_point_shape.NumDimensions() == 0, "x_zero_point must be a scalar if no axis is provided");
}

size_t N = x_shape.SizeToDimension(axis);
const uint8_t* zero_point = y_zero_point.template Data<uint8_t>();
const float* scale = y_scale.template Data<float>();
size_t block_size = x_shape.SizeFromDimension(axis + 1);
const float* input = x.template Data<float>();
uint8_t* output = y.template MutableData<uint8_t>();

for (size_t n = 0; n < N; n++) {
const float* current_scale = scale;
const uint8_t* current_zero_point = zero_point;

for (size_t bd = 0; bd < static_cast<size_t>(broadcastDim); bd++) {
auto zp = static_cast<const float>(*current_zero_point);
auto sc = *current_scale;

for (size_t bs = 0; bs < block_size; bs++) {
*output++ = static_cast<uint8_t>(clamp(std::round(static_cast<float>(*input++) / sc) + zp, 0.0f, float(UINT8_MAX)));
}
}
shape = TensorShape(shape_);

reshaped_scale = tensorAllocator.Allocate(shape);
memcpy(reshaped_scale->MutableDataRaw(), y_scale.DataRaw(), sizeof(float) * y_scale.Size());
current_scale += stride;
current_zero_point += stride;
}
}

std::unique_ptr<Tensor> W = tensorAllocator.Allocate(x.Shape());
Tensor* pW = W.get();

TBroadcaster<float> bc(x, has_axis_ ? *reshaped_scale : y_scale);
TBroadcastOutput<float> output2(bc.GetSpanSize(), *pW);
BroadcastLoop(bc, output2,
[](EigenVectorMap<float> output, float input0, ConstEigenVectorMap<float> input1) { output = (input0 / input1.array()).round(); },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, float input1) { output = (input0.array() / input1).round(); },
[](EigenVectorMap<float> output, ConstEigenVectorMap<float> input0, ConstEigenVectorMap<float> input1) { output = (input0.array() / input1.array()).round(); });

std::unique_ptr<Tensor> Zf = tensorAllocator.Allocate(has_axis_ ? shape : y_zero_point.Shape());
Tensor* pZf = Zf.get();
CastData<uint8_t, float>(&y_zero_point, pZf, has_axis_ ? shape : y_zero_point.Shape());

TBroadcaster<float> bc2(*pW, *pZf);
TBroadcastOutput<uint8_t> output(bc2.GetSpanSize(), y);
BroadcastLoop(bc2, output,
[](EigenVectorMap<uint8_t> output, float input0, ConstEigenVectorMap<float> input1) {
for (std::ptrdiff_t i = 0; i < output.size(); i++) {
output[i] = uint8_t(clamp(input0 + float(input1[i]), 0.0f, float(UINT8_MAX)));
}
},
[](EigenVectorMap<uint8_t> output, ConstEigenVectorMap<float> input0, float input1) {
for (std::ptrdiff_t i = 0; i < output.size(); i++) {
output[i] = uint8_t(clamp(input0[i] + float(input1), 0.0f, float(UINT8_MAX)));
}
},
[](EigenVectorMap<uint8_t> output, ConstEigenVectorMap<float> input0, ConstEigenVectorMap<float> input1) {
for (std::ptrdiff_t i = 0; i < output.size(); i++) {
output[i] = uint8_t(clamp(input0[i] + float(input1[i]), 0.0f, float(UINT8_MAX)));
}
});

return Status::OK();
}
} // namespace contrib
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/contrib_ops/cpu/quantize_linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DequantizeLinear final : public OpKernel {
Status Compute(OpKernelContext* context) const override;

private:
int64_t axis_;
int64_t axis_ = 0;
bool has_axis_;
};

Expand All @@ -34,7 +34,7 @@ class QuantizeLinear final : public OpKernel {
Status Compute(OpKernelContext* context) const override;

private:
int64_t axis_;
int64_t axis_ = 0;
bool has_axis_;
};
} // namespace contrib
Expand Down
127 changes: 119 additions & 8 deletions onnxruntime/test/contrib_ops/quantize_linear_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
namespace onnxruntime {
namespace test {

// scalar zero & scale with uint8
TEST(DequantizeLinearOpTest, DequantizeLinear_0) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{4};
Expand All @@ -17,6 +18,7 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_0) {
test.Run();
}

// scalar zero & scale with int8
TEST(DequantizeLinearOpTest, DequantizeLinear_1) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{4};
Expand All @@ -27,23 +29,119 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_1) {
test.Run();
}

// 1d zero & scale with uint8 broadcast axis 0
TEST(DequantizeLinearOpTest, DequantizeLinear_2) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("scale", {3},
{1.0f,
2.0f,
4.0f});
test.AddInput<uint8_t>("zero_point", {3},
{0,
0,
0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 40, 80, 120});
test.Run();
}

// 1d zero & scale with int8 broadcast axis 1
TEST(DequantizeLinearOpTest, DequantizeLinear_3) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<int8_t>("X", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 1);
test.AddInput<float>("scale", {3}, {1.0f, 2.0f, 4.0f});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddInput<float>("scale", {4}, {1, 2, 4, 8});
test.AddInput<int8_t>("zero_point", {4}, {0, -10, -20, -30});
test.AddOutput<float>("Y", dims,
{0, 22, 88, 264,
0, 24, 96, 288,
0, 40, 160, 480});
test.Run();
}

// 1d zero & scale with int8 broadcast axis 0 with 4d tensor input/output
TEST(DequantizeLinearOpTest, DequantizeLinear_4) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{2, 3, 2, 4};
test.AddInput<int8_t>("X", dims,
{7, 9, 10, 10,
5, 8, 9, 1,

8, 6, 7, 9,
10, 0, 7, 10,

8, 2, 6, 0,
5, 9, 8, 1,

2, 7, 5, 3,
2, 4, 1, 3,

8, 7, 4, 8,
10, 1, 5, 5,

7, 7, 0, 2,
4, 4, 0, 5});
test.AddAttribute<int64_t>("axis", 1);
test.AddInput<float>("scale", {3}, {1, 10, 7});
test.AddInput<int8_t>("zero_point", {3}, {10, 2, 1});
test.AddOutput<float>("Y", dims,
{-3, -1, 0, 0,
-5, -2, -1, -9,

60, 40, 50, 70,
80, -20, 50, 80,

49, 7, 35, -7,
28, 56, 49, 0,

-8, -3, -5, -7,
-8, -6, -9, -7,

60, 50, 20, 60,
80, -10, 30, 30,

42, 42, -7, 7,
21, 21, -7, 28});
test.Run();
}

// 1d zero & scale with uint8 broadcast axis -2 (-2 resolves to axis 0)
TEST(DequantizeLinearOpTest, DequantizeLinear_5) {
OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", -2);
test.AddInput<float>("scale", {3},
{1.0f,
2.0f,
4.0f});
test.AddInput<uint8_t>("zero_point", {3},
{0,
0,
0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 40, 80, 120});
test.Run();
}

// quantize with scalar zero point and scale
TEST(QuantizeLinearOpTest, QuantizeLinear_0) {
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{6};
Expand All @@ -54,19 +152,15 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_0) {
test.Run();
}

// TODO this test is failing for Mac. needs to be debugged.
#if defined(__MACH__)
TEST(QuantizeLinearOpTest, DISABLED_QuantizeLinear_1) {
#else
// quantize with broadcasting
TEST(QuantizeLinearOpTest, QuantizeLinear_1) {
#endif
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddAttribute<int64_t>("axis", 1);
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("scale", {3}, {1, 2, 4});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddOutput<uint8_t>("Y", dims,
Expand All @@ -76,5 +170,22 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_1) {
test.Run();
}

// quantize with broadcasting and negative axis (-2 resolves to axis 0)
TEST(QuantizeLinearOpTest, QuantizeLinear_2) {
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddAttribute<int64_t>("axis", -2);
test.AddInput<float>("scale", {3}, {1, 2, 4});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddOutput<uint8_t>("Y", dims,
{0, 2, 3, 255,
0, 1, 2, 255,
0, 1, 1, 250});
test.Run();
}
} // namespace test
} // namespace onnxruntime