microsoft · YUNQIUGUO · Feb 15, 2022 · Feb 12, 2022 · Feb 12, 2022 · Feb 12, 2022
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -80,6 +80,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
       return QuantizedOpType::QDQMul;
     else if (op_type == "Transpose")
       return QuantizedOpType::QDQTranspose;
+    else if (op_type == "Reshape")
+      return QuantizedOpType::QDQReshape;
   } else {
     // throw?
   }

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -91,6 +91,7 @@ enum class QuantizedOpType : uint8_t {
   QDQAdd,
   QDQMul,
   QDQTranspose,
+  QDQReshape,
   // TODO, add other QDQ NodeUnit types
 };
 

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -861,18 +861,28 @@ class ReshapeOpBuilder : public BaseOpBuilder {
  public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
   static Status AddReshapeOperator(ModelBuilder& model_builder, const NodeUnit& node_unit,
-                                   const std::string& input, const std::vector<int32_t>& shape);
+                                   const std::string& input, const std::vector<int32_t>& shape,
+                                   float scale = 0.0f, int32_t zero_point = 0);
 
  private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override;
   static bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit,
                              size_t input_rank, size_t output_rank);
+  static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT;  // TODO, see if we want to move this to BaseOpBuilder
 };
 
 void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
+  if (IsQuantizedOp(node_unit)) {
+    AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Inputs()[0].quant_param);   // x_scale, x_zp
+    AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param);  // y_scale, y_zp
+  }
   model_builder.AddInitializerToSkip(node_unit.Inputs()[1].node_arg.Name());
 }
 
+/* static */ bool ReshapeOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
+  return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQReshape;
+}
+
 // We can skip the Reshape if all the output edges satisfies both the following conditions
 // 1. The output the reshape/flatten is not an output of the graph
 // 2. The output of the reshape/flatten is the input 0 of one or more GEMM/Matmul operators,
@@ -947,7 +957,8 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
 /* static */ Status ReshapeOpBuilder::AddReshapeOperator(ModelBuilder& model_builder,
                                                          const NodeUnit& node_unit,
                                                          const std::string& input,
-                                                         const std::vector<int32_t>& shape) {
+                                                         const std::vector<int32_t>& shape,
+                                                         float scale, int32_t zero_point) {
   auto& shaper(model_builder.GetShaper());
   const auto& operand_indices(model_builder.GetOperandIndices());
   const auto& operand_types(model_builder.GetOperandTypes());
@@ -961,7 +972,7 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
   // NNAPI CPU impl and NNAPI hardware accelerator impl
   if (CanSkipReshape(model_builder, node_unit, input_rank, output_rank)) {
     // Since reshape can be skipped, only register the dimension and type, with same index and new name
-    const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
+    const OperandType output_operand_type(operand_types.at(input).type, shaper[output], scale, zero_point);
     model_builder.RegisterOperand(output, operand_indices.at(input), output_operand_type, false);
   } else {
     // We still need to perform a reshape here
@@ -971,11 +982,11 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
     // Add new shape
     Shape shape_dimen = {static_cast<uint32_t>(shape.size())};
     std::string shape_name = model_builder.GetUniqueName(node_unit.Name() + input + "newshape");
-    OperandType shape_operand_type(Type::TENSOR_INT32, shape_dimen);
+    OperandType shape_operand_type(Type::TENSOR_INT32, shape_dimen, scale, zero_point);
     ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(shape_name, shape.data(), shape_operand_type));
     input_indices.push_back(operand_indices.at(shape_name));
 
-    const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
+    const OperandType output_operand_type(operand_types.at(input).type, shaper[output], scale, zero_point);
     ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_RESHAPE, input_indices, {output}, {output_operand_type}, {false}));
   }
 
@@ -1006,7 +1017,16 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
     shape[i] = dim == 0 ? input_shape[i] : dim;
   }
 
-  return AddReshapeOperator(model_builder, node_unit, input, shape);
+  // Check if the quantization scale and ZP are correct
+  float x_scale = 0.0f;
+  int32_t x_zero_point = 0;
+  if (IsQuantizedOp(node_unit)) {
+    ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
+        initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point));
+    ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
+  }
+
+  return AddReshapeOperator(model_builder, node_unit, input, shape, x_scale, x_zero_point);
 }
 
 #pragma endregion op_reshape

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@@ -653,8 +653,17 @@ class ReshapeOpSupportChecker : public BaseOpSupportChecker {
 
   // Reshape opset 4- uses attributes for new shape which we do not support for now
   int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 5; }
+  bool HasSupportedInputOutputsImpl(
+      const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
+      const OpSupportCheckParams& /* params */) const override;
+  bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
+  static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT;  // TODO, see if we want to move this to BaseOpBuilder
 };
 
+/* static */ bool ReshapeOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
+  return GetQuantizedOpType(node_unit) == QuantizedOpType::QDQReshape;
+}
+
 bool ReshapeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                                 const OpSupportCheckParams& /* params */) const {
   const auto& inputs = node_unit.Inputs();
@@ -685,7 +694,7 @@ bool ReshapeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init
   const auto perm_size = SafeInt<uint32_t>(perm_tensor.dims()[0]);
 
   NodeAttrHelper helper(node_unit);
-  const bool allow_zero = helper.Get("allowzero ", 0) == 1;
+  const bool allow_zero = helper.Get("allowzero", 0) == 1;
   for (uint32_t i = 0; i < perm_size; i++) {
     // NNAPI reshape does not support 0 as dimension
     if (raw_perm[i] == 0) {
@@ -704,6 +713,24 @@ bool ReshapeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init
   return true;
 }
 
+bool ReshapeOpSupportChecker::HasSupportedInputOutputsImpl(
+    const InitializedTensorSet& initializers, const NodeUnit& node_unit,
+    const OpSupportCheckParams& params) const {
+  if (!IsQuantizedOp(node_unit)) {
+    return BaseOpSupportChecker::HasSupportedInputOutputsImpl(initializers, node_unit, params);
+  }
+
+  if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Input)) {
+    return false;
+  }
+
+  if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output)) {
+    return false;
+  }
+
+  return true;
+}
+
 #pragma endregion
 
 #pragma region op_batchnormalization

diff --git a/onnxruntime/test/optimizer/qdq_test_utils.cc b/onnxruntime/test/optimizer/qdq_test_utils.cc
@@ -36,5 +36,27 @@ GetQDQTestCaseFn BuildQDQResizeTestCase(
   };
 }
 
+GetQDQTestCaseFn BuildQDQReshapeTestCase(const std::vector<int64_t>& input_shape,
+                                         const std::vector<int64_t>& reshape_shape) {
+  return [input_shape, reshape_shape](ModelTestBuilder& builder) {
+    auto* input_arg = builder.MakeInput<uint8_t>(input_shape,
+                                                 std::numeric_limits<uint8_t>::min(),
+                                                 std::numeric_limits<uint8_t>::max());
+    auto* output_arg = builder.MakeOutput();
+
+    // add DQ
+    auto* dq_output = builder.MakeIntermediate();
+    builder.AddDequantizeLinearNode<uint8_t>(input_arg, .003f, 1, dq_output);
+
+    // add Reshape
+    auto* reshape_output = builder.MakeIntermediate();
+    auto* shape = builder.Make1DInitializer<int64_t>(reshape_shape);
+    builder.AddNode("Reshape", {dq_output, shape}, {reshape_output});
+
+    // add Q
+    builder.AddQuantizeLinearNode<uint8_t>(reshape_output, .003f, 1, output_arg);
+  };
+}
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h
@@ -212,5 +212,8 @@ GetQDQTestCaseFn BuildQDQTransposeTestCase(
     builder.AddQuantizeLinearNode<OutputType>(transpose_output, .003f, q_zp, output_arg);
   };
 }
+
+GetQDQTestCaseFn BuildQDQReshapeTestCase(const std::vector<int64_t>& input_shape,
+                                         const std::vector<int64_t>& reshape_shape);
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -370,6 +370,15 @@ TEST(NnapiExecutionProviderTest, TestQDQTranspose) {
                   });
 }
 
+TEST(NnapiExecutionProviderTest, TestQDQReshape) {
+  RunQDQModelTest(BuildQDQReshapeTestCase({1, 3, 64, 64} /* input_shape */,
+                                          {1, 64, 64, 3} /* reshape_shape */),
+                  "nnapi_qdq_test_graph_reshape",
+                  {
+                      true /* verify_entire_graph_use_ep */
+                  });
+}
+
 #endif  // !(ORT_MINIMAL_BUILD)
 
 TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {
-Original file line number
+Diff line change
@@ Expand Up / @@ -91,6 +91,7 @@ enum class QuantizedOpType : uint8_t { @@
       QDQAdd,
       QDQMul,
       QDQTranspose,
+      QDQReshape,
       // TODO, add other QDQ NodeUnit types
     };
@@ Expand Down @@