Skip to content

Commit

Permalink
Add NNAPI support of QDQ Resize
Browse files Browse the repository at this point in the history
  • Loading branch information
guoyu-wang committed Jan 31, 2022
1 parent 68262cc commit 8ec8490
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
} else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
if (op_type == "Conv")
return QuantizedOpType::QDQConv;
else if (op_type == "Resize")
return QuantizedOpType::QDQResize;
} else {
// throw?
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ enum class QuantizedOpType : uint8_t {
// QLinearMul,
// QLinearReduceMean,
QDQConv,
QDQResize,
// TODO, add other QDQ NodeUnit types
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2258,10 +2258,22 @@ class ResizeOpBuilder : public BaseOpBuilder {

private:
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
};

/* static */ bool ResizeOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
static bool is_quant_op_type =
GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
return is_quant_op_type;
}

void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
const auto& inputs = node_unit.Inputs();
if (IsQuantizedOp(node_unit)) {
AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param); // x_scale, x_zp
AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp
}

// We don't really use ROI here, so add them to skipped list
model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // ROI

Expand Down Expand Up @@ -2296,6 +2308,15 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
}
}

// Check if the quantization scale and ZP is correct
if (IsQuantizedOp(node_unit)) {
float x_scale = 0.0f;
int32_t x_zero_point = 0;
ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point));
ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
}

bool is_linear_resize = helper.Get("mode", "nearest") == "linear";

int32_t operationCode = is_linear_resize ? ANEURALNETWORKS_RESIZE_BILINEAR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1466,8 +1466,16 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 11; }

bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
};

/* static */ bool ResizeOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
static bool is_quant_op_type =
GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
return is_quant_op_type;
}

bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const OpSupportCheckParams& params) const {
Shape input_shape;
Expand Down Expand Up @@ -1587,6 +1595,32 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
}
}
}

if (IsQuantizedOp(node_unit)) {
// For QDQResize, we only support uint8 output now
int32_t output_type;
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
return false;

if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
LOGS_DEFAULT(VERBOSE) << "[Resize] output type: [" << output_type
<< "] is not supported for now";
return false;
}

// Check input scales and ZPs
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
return false;
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
return false;

// Check output scale and ZP
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
return false;
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
return false;
}

return true;
}

Expand Down
41 changes: 41 additions & 0 deletions onnxruntime/test/optimizer/qdq_test_utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "qdq_test_utils.h"

namespace onnxruntime {
namespace test {

GetQDQTestCaseFn BuildQDQResizeTestCase(
const std::vector<int64_t>& input_shape,
const std::vector<int64_t>& sizes_data) {
return [input_shape, sizes_data](ModelTestBuilder& builder) {
auto* input1_arg = builder.MakeInput<uint8_t>(input_shape,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
auto* roi = builder.MakeInitializer<float>({0}, {});
auto* scales = builder.MakeInitializer<float>({0}, {});
auto* sizes = builder.Make1DInitializer<int64_t>(sizes_data);
auto* output_arg = builder.MakeOutput();

// add DQ
auto* dq_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);

// add Resize
auto* resize_output = builder.MakeIntermediate();
Node& resize_node = builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});

// NNAPI EP does not support the default setting of Resize Op
// Use bi-linear and asymmetric for NNAPI EP only
#ifdef USE_NNAPI
resize_node.AddAttribute("mode", "linear");
resize_node.AddAttribute("coordinate_transformation_mode", "asymmetric");
#endif
// add Q
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
};
}

} // namespace test
} // namespace onnxruntime
10 changes: 6 additions & 4 deletions onnxruntime/test/optimizer/qdq_test_utils.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include "graph_transform_test_builder.h"

#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
Expand All @@ -12,7 +14,7 @@
namespace onnxruntime {
namespace test {

using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
using GetQDQTestCaseFn = std::function<void(ModelTestBuilder& builder)>;

template <typename T>
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
Expand All @@ -24,10 +26,8 @@ AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp =
return dq_output;
}

// TODO: for now it just builds a conv qdq graph.
// can be modified and made it shared among different qdq test graphs associated with other operators
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
return [input_shape, weights_shape](ModelTestBuilder& builder) {
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
auto* output_arg = builder.MakeOutput();
Expand Down Expand Up @@ -78,5 +78,7 @@ GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shap
};
}

GetQDQTestCaseFn BuildQDQResizeTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& sizes_data);

} // namespace test
} // namespace onnxruntime
27 changes: 4 additions & 23 deletions onnxruntime/test/optimizer/qdq_transformer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -633,40 +633,21 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
TEST(QDQTransformerTests, Resize) {
auto test_case = [&](const std::vector<int64_t>& input1_shape,
const std::vector<int64_t>& sizes_shape) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input1_arg = builder.MakeInput<uint8_t>(input1_shape,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
auto* roi = builder.MakeInitializer<float>({0}, {});
auto* scales = builder.MakeInitializer<float>({0}, {});
auto* sizes = builder.MakeInitializer<int64_t>(sizes_shape, 1, 16);
auto* output_arg = builder.MakeOutput();

// add DQ
auto* dq_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);

// add Resize
auto* resize_output = builder.MakeIntermediate();
builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});

// add Q
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
};

auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
auto op_to_count = CountOpsInGraph(session.GetGraph());
EXPECT_EQ(op_to_count["Resize"], 1);
EXPECT_EQ(op_to_count["QuantizeLinear"], 0);
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
};

TransformerTester(build_test_case, check_matmul_graph,
TransformerTester(BuildQDQResizeTestCase(input1_shape, sizes_shape),
check_matmul_graph,
TransformerLevel::Level1,
TransformerLevel::Level2);
};

test_case({2, 13, 12, 37}, {4});
RandomValueGenerator rand_gen{optional<RandomValueGenerator::RandomSeedType>{2345}};
test_case({2, 13, 12, 37}, rand_gen.Uniform<int64_t>(std::vector<int64_t>{4}, 1, 16));
}

TEST(QDQTransformerTests, Resize_No_Fusion) {
Expand Down
25 changes: 19 additions & 6 deletions onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,10 @@ TEST(NnapiExecutionProviderTest, TestNoShapeInputModel) {
}

#if defined(__ANDROID__)
TEST(NnapiExecutionProviderTest, TestQDQModel) {
onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char* test_description) {
onnxruntime::Model model(test_description, false, DefaultLoggingManager().DefaultLogger());
Graph& graph = model.MainGraph();
ModelTestBuilder helper(graph);

auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
{1, 1, 3, 3} /*weights_shape*/);
build_test_case(helper);
helper.SetGraphOutputs();
ASSERT_STATUS_OK(model.MainGraph().Resolve());
Expand All @@ -259,7 +256,23 @@ TEST(NnapiExecutionProviderTest, TestQDQModel) {
std::make_unique<NnapiExecutionProvider>(0),
helper.feeds_);

// TODO: can add test load only verfication here later
// TODO: can add test load only verification here later
}

TEST(NnapiExecutionProviderTest, TestQDQConv) {
RunQDQModelTest(BuildQDQConvTestCase<uint8_t /* InputType */,
uint8_t /* WeightType */,
int32_t /* BiasType */,
uint8_t /* OutputType */>(
{1, 1, 5, 5} /*input_shape*/,
{1, 1, 3, 3} /*weights_shape*/),
"nnapi_qdq_test_graph_conv");
}

TEST(NnapiExecutionProviderTest, TestQDQResize) {
RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
{1, 3, 32, 32} /* sizes_data */),
"nnapi_qdq_test_graph_conv");
}
#endif // defined(__ANDROID__)

Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/test/util/test_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ static void VerifyOutputs(const std::vector<std::string>& output_names,
EXPECT_THAT(ltensor.DataAsSpan<int64_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<int64_t>()))
<< " mismatch for " << output_names[i];
break;
case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
EXPECT_THAT(ltensor.DataAsSpan<uint8_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<uint8_t>()))
<< " mismatch for " << output_names[i];
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
constexpr float abs_err = 1e-5f;

Expand Down

0 comments on commit 8ec8490

Please sign in to comment.