Add NNAPI support of QDQ Resize

microsoft · Jan 31, 2022 · 8ec8490 · 8ec8490
1 parent 68262cc
commit 8ec8490
Show file tree

Hide file tree

Showing 9 changed files with 132 additions and 33 deletions.
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -68,6 +68,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
   } else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
     if (op_type == "Conv")
       return QuantizedOpType::QDQConv;
+    else if (op_type == "Resize")
+      return QuantizedOpType::QDQResize;
   } else {
     // throw?
   }

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -86,6 +86,7 @@ enum class QuantizedOpType : uint8_t {
   // QLinearMul,
   // QLinearReduceMean,
   QDQConv,
+  QDQResize,
   // TODO, add other QDQ NodeUnit types
 };
 

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -2258,10 +2258,22 @@ class ResizeOpBuilder : public BaseOpBuilder {
 
  private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
+  static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT;  // TODO, see if we want to move this to BaseOpBuilder
 };
 
+/* static */ bool ResizeOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
+  static bool is_quant_op_type =
+      GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
+  return is_quant_op_type;
+}
+
 void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
   const auto& inputs = node_unit.Inputs();
+  if (IsQuantizedOp(node_unit)) {
+    AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param);               // x_scale, x_zp
+    AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param);  // y_scale, y_zp
+  }
+
   // We don't really use ROI here, so add them to skipped list
   model_builder.AddInitializerToSkip(inputs[1].node_arg.Name());  // ROI
 
@@ -2296,6 +2308,15 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     }
   }
 
+  // Check if the quantization scale and ZP is correct
+  if (IsQuantizedOp(node_unit)) {
+    float x_scale = 0.0f;
+    int32_t x_zero_point = 0;
+    ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
+        initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point));
+    ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
+  }
+
   bool is_linear_resize = helper.Get("mode", "nearest") == "linear";
 
   int32_t operationCode = is_linear_resize ? ANEURALNETWORKS_RESIZE_BILINEAR

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@@ -1466,8 +1466,16 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
   int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 11; }
 
   bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
+  bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
+  static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT;  // TODO, see if we want to move this to BaseOpBuilder
 };
 
+/* static */ bool ResizeOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
+  static bool is_quant_op_type =
+      GetQuantizedOpType(node_unit) == QuantizedOpType::QDQResize;
+  return is_quant_op_type;
+}
+
 bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                                const OpSupportCheckParams& params) const {
   Shape input_shape;
@@ -1587,6 +1595,32 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
       }
     }
   }
+
+  if (IsQuantizedOp(node_unit)) {
+    // For QDQResize, we only support uint8 output now
+    int32_t output_type;
+    if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
+      return false;
+
+    if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+      LOGS_DEFAULT(VERBOSE) << "[Resize] output type: [" << output_type
+                            << "] is not supported for now";
+      return false;
+    }
+
+    // Check input scales and ZPs
+    if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
+      return false;
+    if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
+      return false;
+
+    // Check output scale and ZP
+    if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
+      return false;
+    if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
+      return false;
+  }
+
   return true;
 }
 

diff --git a/onnxruntime/test/optimizer/qdq_test_utils.cc b/onnxruntime/test/optimizer/qdq_test_utils.cc
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "qdq_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+GetQDQTestCaseFn BuildQDQResizeTestCase(
+    const std::vector<int64_t>& input_shape,
+    const std::vector<int64_t>& sizes_data) {
+  return [input_shape, sizes_data](ModelTestBuilder& builder) {
+    auto* input1_arg = builder.MakeInput<uint8_t>(input_shape,
+                                                  std::numeric_limits<uint8_t>::min(),
+                                                  std::numeric_limits<uint8_t>::max());
+    auto* roi = builder.MakeInitializer<float>({0}, {});
+    auto* scales = builder.MakeInitializer<float>({0}, {});
+    auto* sizes = builder.Make1DInitializer<int64_t>(sizes_data);
+    auto* output_arg = builder.MakeOutput();
+
+    // add DQ
+    auto* dq_output = builder.MakeIntermediate();
+    builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
+
+    // add Resize
+    auto* resize_output = builder.MakeIntermediate();
+    Node& resize_node = builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
+
+// NNAPI EP does not support the default setting of Resize Op
+// Use bi-linear and asymmetric for NNAPI EP only
+#ifdef USE_NNAPI
+    resize_node.AddAttribute("mode", "linear");
+    resize_node.AddAttribute("coordinate_transformation_mode", "asymmetric");
+#endif
+    // add Q
+    builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
+  };
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#pragma once
+
 #include "graph_transform_test_builder.h"
 
 #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
@@ -12,7 +14,7 @@
 namespace onnxruntime {
 namespace test {
 
-using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
+using GetQDQTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
 
 template <typename T>
 typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
@@ -24,10 +26,8 @@ AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp =
   return dq_output;
 }
 
-// TODO: for now it just builds a conv qdq graph.
-// can be modified and made it shared among different qdq test graphs associated with other operators
 template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
-GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
+GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
   return [input_shape, weights_shape](ModelTestBuilder& builder) {
     auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
     auto* output_arg = builder.MakeOutput();
@@ -78,5 +78,7 @@ GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shap
   };
 }
 
+GetQDQTestCaseFn BuildQDQResizeTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& sizes_data);
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc
@@ -633,40 +633,21 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
 TEST(QDQTransformerTests, Resize) {
   auto test_case = [&](const std::vector<int64_t>& input1_shape,
                        const std::vector<int64_t>& sizes_shape) {
-    auto build_test_case = [&](ModelTestBuilder& builder) {
-      auto* input1_arg = builder.MakeInput<uint8_t>(input1_shape,
-                                                    std::numeric_limits<uint8_t>::min(),
-                                                    std::numeric_limits<uint8_t>::max());
-      auto* roi = builder.MakeInitializer<float>({0}, {});
-      auto* scales = builder.MakeInitializer<float>({0}, {});
-      auto* sizes = builder.MakeInitializer<int64_t>(sizes_shape, 1, 16);
-      auto* output_arg = builder.MakeOutput();
-
-      // add DQ
-      auto* dq_output = builder.MakeIntermediate();
-      builder.AddDequantizeLinearNode<uint8_t>(input1_arg, .003f, 1, dq_output);
-
-      // add Resize
-      auto* resize_output = builder.MakeIntermediate();
-      builder.AddNode("Resize", {dq_output, roi, scales, sizes}, {resize_output});
-
-      // add Q
-      builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
-    };
-
     auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
       auto op_to_count = CountOpsInGraph(session.GetGraph());
       EXPECT_EQ(op_to_count["Resize"], 1);
       EXPECT_EQ(op_to_count["QuantizeLinear"], 0);
       EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
     };
 
-    TransformerTester(build_test_case, check_matmul_graph,
+    TransformerTester(BuildQDQResizeTestCase(input1_shape, sizes_shape),
+                      check_matmul_graph,
                       TransformerLevel::Level1,
                       TransformerLevel::Level2);
   };
 
-  test_case({2, 13, 12, 37}, {4});
+  RandomValueGenerator rand_gen{optional<RandomValueGenerator::RandomSeedType>{2345}};
+  test_case({2, 13, 12, 37}, rand_gen.Uniform<int64_t>(std::vector<int64_t>{4}, 1, 16));
 }
 
 TEST(QDQTransformerTests, Resize_No_Fusion) {

diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -240,13 +240,10 @@ TEST(NnapiExecutionProviderTest, TestNoShapeInputModel) {
 }
 
 #if defined(__ANDROID__)
-TEST(NnapiExecutionProviderTest, TestQDQModel) {
-  onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
+static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char* test_description) {
+  onnxruntime::Model model(test_description, false, DefaultLoggingManager().DefaultLogger());
   Graph& graph = model.MainGraph();
   ModelTestBuilder helper(graph);
-
-  auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
-                                                                                  {1, 1, 3, 3} /*weights_shape*/);
   build_test_case(helper);
   helper.SetGraphOutputs();
   ASSERT_STATUS_OK(model.MainGraph().Resolve());
@@ -259,7 +256,23 @@ TEST(NnapiExecutionProviderTest, TestQDQModel) {
                             std::make_unique<NnapiExecutionProvider>(0),
                             helper.feeds_);
 
-  // TODO: can add test load only verfication here later
+  // TODO: can add test load only verification here later
+}
+
+TEST(NnapiExecutionProviderTest, TestQDQConv) {
+  RunQDQModelTest(BuildQDQConvTestCase<uint8_t /* InputType */,
+                                       uint8_t /* WeightType */,
+                                       int32_t /* BiasType */,
+                                       uint8_t /* OutputType */>(
+                      {1, 1, 5, 5} /*input_shape*/,
+                      {1, 1, 3, 3} /*weights_shape*/),
+                  "nnapi_qdq_test_graph_conv");
+}
+
+TEST(NnapiExecutionProviderTest, TestQDQResize) {
+  RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */,
+                                         {1, 3, 32, 32} /* sizes_data */),
+                  "nnapi_qdq_test_graph_conv");
 }
 #endif  // defined(__ANDROID__)
 

diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc
@@ -35,6 +35,10 @@ static void VerifyOutputs(const std::vector<std::string>& output_names,
         EXPECT_THAT(ltensor.DataAsSpan<int64_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<int64_t>()))
             << " mismatch for " << output_names[i];
         break;
+      case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
+        EXPECT_THAT(ltensor.DataAsSpan<uint8_t>(), ::testing::ContainerEq(rtensor.DataAsSpan<uint8_t>()))
+            << " mismatch for " << output_names[i];
+        break;
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
         constexpr float abs_err = 1e-5f;