microsoft · guoyu-wang · Jan 31, 2022 · Jan 27, 2022 · Jan 28, 2022 · Jan 29, 2022
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -48,24 +48,31 @@ std::string GetErrorCause(int error_code) {
   }
 }
 
-QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) {
-  const auto& op_type = node.OpType();
-  if (op_type == "DequantizeLinear")
-    return QLinearOpType::DequantizeLinear;
-  else if (op_type == "QuantizeLinear")
-    return QLinearOpType::QuantizeLinear;
-  else if (op_type == "QLinearConv")
-    return QLinearOpType::QLinearConv;
-  else if (op_type == "QLinearMatMul")
-    return QLinearOpType::QLinearMatMul;
-  else if (op_type == "QLinearAdd")
-    return QLinearOpType::QLinearAdd;
-  else if (op_type == "QLinearSigmoid")
-    return QLinearOpType::QLinearSigmoid;
-  else if (op_type == "QLinearAveragePool")
-    return QLinearOpType::QLinearAveragePool;
-
-  return QLinearOpType::Unknown;
+QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
+  const auto& op_type = node_unit.OpType();
+  if (node_unit.UnitType() == NodeUnit::Type::SingleNode) {
+    if (op_type == "DequantizeLinear")
+      return QuantizedOpType::DequantizeLinear;
+    else if (op_type == "QuantizeLinear")
+      return QuantizedOpType::QuantizeLinear;
+    else if (op_type == "QLinearConv")
+      return QuantizedOpType::QLinearConv;
+    else if (op_type == "QLinearMatMul")
+      return QuantizedOpType::QLinearMatMul;
+    else if (op_type == "QLinearAdd")
+      return QuantizedOpType::QLinearAdd;
+    else if (op_type == "QLinearSigmoid")
+      return QuantizedOpType::QLinearSigmoid;
+    else if (op_type == "QLinearAveragePool")
+      return QuantizedOpType::QLinearAveragePool;
+  } else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
+    if (op_type == "Conv")
+      return QuantizedOpType::QDQConv;
+  } else {
+    // throw?
+  }
+
+  return QuantizedOpType::Unknown;
 }
 
 ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers) {
@@ -89,10 +96,15 @@ ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& init
     return ConvType::Grouped;
 }
 
-bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type) {
-  return qlinear_op_type == QLinearOpType::QLinearConv ||
-         qlinear_op_type == QLinearOpType::QLinearMatMul ||
-         qlinear_op_type == QLinearOpType::QLinearAdd;
+bool IsQuantizedConv(QuantizedOpType quant_op_type) {
+  return (quant_op_type == QuantizedOpType::QLinearConv) ||
+         (quant_op_type == QuantizedOpType::QDQConv);
+}
+
+bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type) {
+  return quant_op_type == QuantizedOpType::QLinearMatMul ||
+         quant_op_type == QuantizedOpType::QLinearAdd ||
+         IsQuantizedConv(quant_op_type);
 }
 
 bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit) {
@@ -111,9 +123,9 @@ bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit) {
 }
 
 bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
-  auto op_type = GetQLinearOpType(node_unit.GetNode());
+  auto quant_op_type = GetQuantizedOpType(node_unit);
   int32_t a_input_type, b_input_type;
-  if (!IsQLinearBinaryOp(op_type)) {
+  if (!IsQuantizedBinaryOp(quant_op_type)) {
     LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] is not a binary qlinear op";
     return false;
   }
@@ -126,14 +138,14 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
 
   // QlinearConv supports u8u8 or u8s8
   // QLinearMatMul/Add only support u8u8
-  bool is_qlinear_conv = op_type == QLinearOpType::QLinearConv;
+  bool is_quant_conv = IsQuantizedConv(quant_op_type);
   bool has_valid_qlinear_conv_weight =
       (b_input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
        b_input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8);
 
   if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
-      (!is_qlinear_conv && a_input_type != b_input_type) ||
-      (is_qlinear_conv && !has_valid_qlinear_conv_weight)) {
+      (!is_quant_conv && a_input_type != b_input_type) ||
+      (is_quant_conv && !has_valid_qlinear_conv_weight)) {
     LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
                           << "] A Input type: [" << a_input_type
                           << "] B Input type: [" << b_input_type
@@ -147,9 +159,9 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
 bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                 const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input) {
   const auto& op_type = node_unit.OpType();
-  auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
-  bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv);
-  bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul);
+  auto quant_op_type = GetQuantizedOpType(node_unit);
+  bool is_quant_conv = IsQuantizedConv(quant_op_type);
+  bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
   const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
   for (const auto idx : indices) {
     if (idx >= io_defs.size()) {
@@ -174,7 +186,7 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
     }
 
     // If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul)
-    bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 1;
+    bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
     bool is_conv_matmul_u8s8_weight = false;
 
     if (is_conv_matmul_weight) {
@@ -194,7 +206,7 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
       // For u8s8 Qlinear[Conv/MatMul], we support
       // 1. Per-tensor, the weight will be transformed to uint8 later
       // 2. Per-channel, only from Android API level 29
-      if (is_qlinear_matmul) {
+      if (is_quant_matmul) {
         LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
         return false;
       }
@@ -221,9 +233,9 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
 bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                     const std::vector<size_t>& indices, bool is_input) {
   const auto& op_type = node_unit.OpType();
-  auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
-  bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv);
-  bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul);
+  auto quant_op_type = GetQuantizedOpType(node_unit);
+  bool is_quant_conv = IsQuantizedConv(quant_op_type);
+  bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
 
   const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
   for (const auto idx : indices) {
@@ -251,7 +263,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
       return false;
     }
 
-    bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 1;
+    bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
     bool is_conv_matmul_u8s8_weight = false;
 
     if (is_conv_matmul_weight) {
@@ -279,7 +291,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
       }
 
       if (zero_dim != 1) {
-        if (is_qlinear_matmul) {
+        if (is_quant_matmul) {
           LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
           return false;
         }

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -73,8 +73,8 @@ struct OpSupportCheckParams;
 
 std::string GetErrorCause(int error_code);
 
-enum class QLinearOpType : uint8_t {
-  Unknown,  // Unknown or not a linear quantized op
+enum class QuantizedOpType : uint8_t {
+  Unknown,  // Unknown or not a quantized NodeUnit
   DequantizeLinear,
   QuantizeLinear,
   QLinearConv,
@@ -85,6 +85,8 @@ enum class QLinearOpType : uint8_t {
   // Not yet supported
   // QLinearMul,
   // QLinearReduceMean,
+  QDQConv,
+  // TODO, add other QDQ NodeUnit types
 };
 
 enum class ConvType : uint8_t {
@@ -93,15 +95,18 @@ enum class ConvType : uint8_t {
   Grouped,
 };
 
-QLinearOpType GetQLinearOpType(const onnxruntime::Node& node);
+QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit);
 
 // Return the type of the conv ops,
 // This function assumes the input is a 2d conv node
 ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers);
 
-// This qlinear op is an operator takes 2 inputs and produces 1 output
-// Such as QLinearConv, QLinearMatMul, QLinearAdd, ...
-bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);
+// If this is a quantized Conv (QLinearConv or QDQConv)
+bool IsQuantizedConv(QuantizedOpType quant_op_type);
+
+// This quantized op is an operator or qdq node unit takes 2 inputs and produces 1 output
+// Such as QLinearConv, QLinearMatMul, QLinearAdd, QDQConv,...
+bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type);
 
 // Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool]
 bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit);

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -158,14 +158,10 @@ void ModelBuilder::PreprocessNodeUnits() {
 // Help to get all quantized operators' input and the NodeUnit(s) using the input
 void ModelBuilder::GetAllQuantizedOpInputs() {
   for (const auto& node_unit : node_unit_holder_) {
-    // TODO, hookup getting quantized inputs with QDQ NodeUnits and remove the ORT_ENFORCE
-    ORT_ENFORCE(node_unit->UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented");
+    auto quant_op_type = GetQuantizedOpType(*node_unit);
 
-    auto qlinear_op_type = GetQLinearOpType(node_unit->GetNode());
-
-    // Not a qlinear op
-    // TODO, add handling for QDQ NodeUnit
-    if (qlinear_op_type == QLinearOpType::Unknown)
+    // Not a qlinear op or qdq node group
+    if (quant_op_type == QuantizedOpType::Unknown)
       continue;
 
     const auto add_quantized_input =
@@ -174,12 +170,12 @@ void ModelBuilder::GetAllQuantizedOpInputs() {
           all_quantized_op_inputs[input_name].push_back(&node_unit);
         };
 
-    // All qlinear ops EXCEPT QuantizeLinear has quantized input
-    if (qlinear_op_type != QLinearOpType::QuantizeLinear) {
+    // All quantized ops EXCEPT QuantizeLinear has quantized input
+    if (quant_op_type != QuantizedOpType::QuantizeLinear) {
       add_quantized_input(*node_unit, 0);
     }
 
-    if (IsQLinearBinaryOp(qlinear_op_type)) {
+    if (IsQuantizedBinaryOp(quant_op_type)) {
       add_quantized_input(*node_unit, 1);
     }
 
@@ -494,14 +490,29 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
 
 Status ModelBuilder::AddOperations() {
   const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
-  std::unordered_set<const NodeUnit*> processed_node_units;
-  processed_node_units.reserve(node_unit_holder_.size());
-  for (size_t i = 0; i < node_indices.size(); i++) {
-    const auto* node(graph_viewer_.GetNode(node_indices[i]));
+  for (const auto node_idx : node_indices) {
+    LOGS_DEFAULT(VERBOSE) << "Adding node [" << node_idx << "]";
+    const auto* node(graph_viewer_.GetNode(node_idx));
     const NodeUnit& node_unit = GetNodeUnit(node);
 
-    // Since a NodeUnit may contain multiple nodes, avoid processing the same NodeUnit multiple times
-    if (Contains(processed_node_units, &node_unit))
+    // Since we may have NodeUnit with multiple nodes, insert NodeUnit with the first occurrence of
+    // its node(s) in topological order may cause the incorrect topological order while inserting
+    // NodeUNits, for example,
+    //  Q1
+    //  |
+    //  DQ1  DQ2
+    //    \   |
+    //     CONV
+    //      |
+    //      Q2
+    // In the above graph, we will have 2 NodeUnits, NU1 [Q1] and NU2 [DQ1, DQ2, CONV, Q2]
+    // The Q1 and DQ2 have the same topological order, if we insert DQ2 (as part of NU2) when we visit DQ2
+    // first in the topological order, the input from Q1 required by NU2 is not yet inserted, this will
+    // cause failure finding the inputs for NU2
+    //
+    // So we only insert the NodeUnit once when we hit the target node, to ensure the topological order
+    // of the NodeUnits
+    if (node != &node_unit.GetNode())
       continue;
 
     if (const auto* op_builder = GetOpBuilder(node_unit)) {
@@ -510,8 +521,6 @@ Status ModelBuilder::AddOperations() {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                              "Node [", node_unit.Name(), "], type [", node_unit.OpType(), "] is not supported");
     }
-
-    processed_node_units.insert(&node_unit);
   }
 
   return Status::OK();
@@ -535,6 +544,8 @@ Status ModelBuilder::AddOperation(int op, const std::vector<uint32_t>& input_ind
       "op = " + std::to_string(op));
 
   num_nnapi_ops_++;
+
+  LOGS_DEFAULT(VERBOSE) << "Added NNAPI Operation Type [" << op << "]";
   return Status::OK();
 }
 
@@ -640,8 +651,9 @@ int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) {
 
   // TODO, add support of activation fusion for quantized node group (qdq or qlinear)
   // We do not support activation fusion for quantized operators for now
-  auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
-  if (qlinear_op_type != QLinearOpType::Unknown)
+  // (usually the activations are fused already in the quantization)
+  auto quant_op_type = GetQuantizedOpType(node_unit);
+  if (quant_op_type != QuantizedOpType::Unknown)
     return fuse_code;
 
   for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) {

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -1260,8 +1260,7 @@ class ConvOpBuilder : public BaseOpBuilder {
 };
 
 /* static */ bool ConvOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
-  // TODO, add support for QDQ NodeUnit
-  return node_unit.OpType() == "QLinearConv";
+  return IsQuantizedConv(GetQuantizedOpType(node_unit));
 }
 
 /* static */ void
@@ -1296,7 +1295,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   const auto& initializers(model_builder.GetInitializerTensors());
   NodeAttrHelper helper(node_unit);
   const auto inputs = node_unit.Inputs();
-  bool is_qlinear_conv = IsQuantizedOp(node_unit);
+  bool is_quant_conv = IsQuantizedOp(node_unit);
 
   // onnx strides are in the order height, width
   // while nnapi strides are in the order width, height
@@ -1341,7 +1340,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   // this is for per-channel quantization weights
   optional<std::vector<float>> w_scales;
   bool is_per_tensor_u8s8 = false;
-  if (is_qlinear_conv) {
+  if (is_quant_conv) {
     ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node_unit,
                                                                      x_scale, w_scale, y_scale,
                                                                      x_zero_point, w_zero_point, y_zero_point,
@@ -1379,7 +1378,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   // Get weight operand type
   // Per-channel quantized weight is handled differently
   OperandType onnx_weight_operand_type =
-      (is_qlinear_conv && w_scales.has_value())
+      (is_quant_conv && w_scales.has_value())
           ? OperandType{onnx_weight_type, onnx_weight_shape,
                         SymmPerChannelQuantParams{w_scales.value(),
                                                   depthwise_conv_2d ? 3u : 0u}}  // channelDim is 3 for depthwise-conv
@@ -1392,7 +1391,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
     ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight, onnx_weight_operand_type, L_1230, is_per_tensor_u8s8));
   }
 
-  if (is_qlinear_conv) {
+  if (is_quant_conv) {
     // Verify if the scale and zero point matchs from onnx input/weight and nnapi input/weight
     ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
     ORT_RETURN_IF_ERROR(IsValidConvWeightQuantizedType(model_builder, weight, w_scale, w_zero_point, w_scales));
@@ -1420,7 +1419,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
     } else {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unknown weight type ", TypeToStr(weight_type));
     }
-  } else if (is_qlinear_conv) {
+  } else if (is_quant_conv) {
     // QLinearConv's bias type need special handling to add scale for quantization input
     const auto& bias_tensor = *model_builder.GetInitializerTensors().at(bias);
     ORT_RETURN_IF_NOT(bias_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32,