microsoft · guoyu-wang · Jan 18, 2022 · Dec 17, 2021 · Dec 17, 2021 · Dec 17, 2021
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -26,10 +26,13 @@ namespace onnxruntime {
 using Shape = std::vector<uint32_t>;
 using InitializerMap = std::unordered_map<std::string, const ONNX_NAMESPACE::TensorProto&>;
 
+class GraphViewer;
 class Node;
 class NodeArg;
 class NodeUnit;
-class GraphViewer;
+class Path;
+
+struct NodeUnitIODef;
 
 namespace nnapi {
 
@@ -94,36 +97,40 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node);
 
 // Return the type of the conv ops,
 // This function assumes the input is a 2d conv node
-ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers);
+ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers);
 
 // This qlinear op is an operator takes 2 inputs and produces 1 output
 // Such as QLinearConv, QLinearMatMul, QLinearAdd, ...
 bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);
 
 // Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool]
-bool HasValidUnaryOpQuantizedInputs(const Node& node);
+bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit);
 // Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add]
-bool HasValidBinaryOpQuantizedInputs(const Node& node);
+bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit);
+
 // Check if a qlinear op has valid scales for given indices
-bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node,
-                                const std::vector<size_t>& indices, const OpSupportCheckParams& params);
+bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
+                                const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input);
+
 // Check if a qlinear op has valid zero points for given indices
-bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node,
-                                    const std::vector<size_t>& indices);
+bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
+                                    const std::vector<size_t>& indices, bool is_input);
 
-common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node,
-                                    size_t idx, float& scale);
+common::Status GetQuantizationScaleAndZeroPoint(
+    const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
+    float& scale, int32_t& zero_point);
 
-common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
-                                        const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT;
+common::Status GetQuantizationScaleAndZeroPoint(
+    const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& name,
+    float& scale, int32_t& zero_point, bool is_input = true);
 
 // Get Shape/Type of a NodeArg
 // TODO, move to shared_utils
 bool GetShape(const NodeArg& node_arg, Shape& shape);
 bool GetType(const NodeArg& node_arg, int32_t& type);
 
 // Get the output shape of Flatten Op
-void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2);
+void GetFlattenOutputShape(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2);
 
 // If a node is supported by NNAPI
 bool IsNodeSupported(const NodeUnit& node_unit, const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
@@ -144,8 +151,10 @@ bool IsValidSupportedNodeGroup(const std::vector<const Node*>& supported_node_gr
 std::string Shape2String(const std::vector<uint32_t>& shape);
 
 // Check the given input is an initializer tensor
-bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node,
-                        size_t index, const char* input_name) ORT_MUST_USE_RESULT;
+// input_name is the name of the initializer
+// input_description is the string describing the input in the output message (if any)
+bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
+                        const std::string& input_name, const char* input_description);
 
 }  // namespace nnapi
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -1,22 +1,23 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include <core/common/logging/logging.h>
-#include <core/common/safeint.h>
-#include <core/framework/tensorprotoutils.h>
+#include "model_builder.h"
 
+#include "core/common/logging/logging.h"
+#include "core/common/safeint.h"
+#include "core/common/status.h"
+#include "core/framework/tensorprotoutils.h"
+#include "core/graph/graph_viewer.h"
 #include "core/providers/common.h"
 #include "core/providers/shared/node_unit/node_unit.h"
 #include "core/providers/shared/utils/utils.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
+
 #include "helper.h"
-#include "model_builder.h"
 #include "op_builder.h"
 #include "op_support_checker.h"
 
-using onnxruntime::NodeUnit;
 using namespace android::nn::wrapper;
-using std::vector;
 
 namespace onnxruntime {
 namespace nnapi {
@@ -31,7 +32,7 @@ int32_t ModelBuilder::GetNNAPIFeatureLevel() const {
 // Scalar operand is copied into the model, no need to persist
 #define DEFINE_ADD_OPERAND_FROM_SCALAR(scalar_type, op_type)                      \
   Status ModelBuilder::AddOperandFromScalar(scalar_type value, uint32_t& index) { \
-    OperandType operandType(Type::op_type, vector<uint32_t>{});                   \
+    OperandType operandType(Type::op_type, std::vector<uint32_t>{});              \
     ORT_RETURN_IF_ERROR(AddNewNNAPIOperand(operandType, index));                  \
     RETURN_STATUS_ON_ERROR_WITH_NOTE(                                             \
         nnapi_->ANeuralNetworksModel_setOperandValue(                             \
@@ -50,13 +51,12 @@ void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
   skipped_initializers_.insert(tensor_name);
 }
 
-static std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer);
-
 Status ModelBuilder::Prepare() {
   nnapi_model_ = std::unique_ptr<Model>(new Model());
   RETURN_STATUS_ON_ERROR(nnapi_->ANeuralNetworksModel_create(&nnapi_model_->model_));
   ORT_RETURN_IF_ERROR(GetTargetDevices());
-  all_quantized_op_inputs_ = GetAllQuantizedOpInputs(graph_viewer_);
+  PreprocessNodeUnits();
+  GetAllQuantizedOpInputs();
   PreprocessInitializers();
   PreprocessActivations();
   ORT_RETURN_IF_ERROR(RegisterInitializers());
@@ -118,74 +118,89 @@ Status ModelBuilder::GetTargetDevices() {
 }
 
 void ModelBuilder::PreprocessInitializers() {
-  const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
-  for (size_t i = 0; i < node_indices.size(); i++) {
-    const auto* node(graph_viewer_.GetNode(node_indices[i]));
-    if (const auto* op_builder = GetOpBuilder(*node)) {
-      const NodeUnit node_unit(*node);
-      op_builder->AddInitializersToSkip(*this, node_unit);
+  for (const auto& node_unit : node_unit_holder_) {
+    if (const auto* op_builder = GetOpBuilder(*node_unit)) {
+      op_builder->AddInitializersToSkip(*this, *node_unit);
     }
   }
 }
 
 void ModelBuilder::PreprocessActivations() {
-  const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
-  for (size_t i = 0; i < node_indices.size(); i++) {
-    const auto* node(graph_viewer_.GetNode(node_indices[i]));
-    const auto& op_type(node->OpType());
-
+  for (const auto& node_unit : node_unit_holder_) {
+    const auto& node = node_unit->GetNode();
+    const auto& op_type(node.OpType());
     if (op_type == "Relu") {
-      activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU);
+      activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU);
     } else if (op_type == "Clip") {  // Relu1 or Relu6
       float min, max;
-      if (!GetClipMinMax(GetInitializerTensors(), *node, min, max, logging::LoggingManager::DefaultLogger()))
+      if (!GetClipMinMax(GetInitializerTensors(), node, min, max, logging::LoggingManager::DefaultLogger()))
         continue;
 
       if (min == -1.0f && max == 1.0f) {
-        activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU1);
+        activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU1);
       } else if (min == 0.0f && max == 6.0f) {
-        activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU6);
+        activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU6);
       }
     }
   }
 }
 
-// Help to get all quantized operators' input and the node(s) using the input
-static std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer) {
-  std::unordered_map<std::string, vector<const Node*>> all_quantized_op_inputs;
-  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
-  for (const auto& node_idx : node_indices) {
-    const auto* node(graph_viewer.GetNode(node_idx));
-    auto qlinear_op_type = GetQLinearOpType(*node);
+const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const {
+  // In theory, if node_unit_map_ is generated correctly, see PreprocessNodeUnits(), a NodeUnit can be
+  // found for any single node in the graph_viewer_, unless the given node is not from graph_viewer_
+  return *node_unit_map_.at(node);
+}
+
+void ModelBuilder::PreprocessNodeUnits() {
+  // TODO, hookup shared QDQ selectors here to identify all the qdq NodeUnit in the graph
+  const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
+  for (size_t i = 0; i < node_indices.size(); i++) {
+    const auto node_idx = node_indices[i];
+    // TODO, check if the node is already part of a qdq group
+    const auto* node(graph_viewer_.GetNode(node_idx));
+    auto node_unit = std::make_unique<NodeUnit>(*node);
+    node_unit_map_.insert({node, node_unit.get()});
+    node_unit_holder_.push_back(std::move(node_unit));
+  }
+}
+
+// Help to get all quantized operators' input and the NodeUnit(s) using the input
+void ModelBuilder::GetAllQuantizedOpInputs() {
+  for (const auto& node_unit : node_unit_holder_) {
+    // TODO, hookup getting quantized inputs with QDQ NodeUnits and remove the ORT_ENFORCE
+    ORT_ENFORCE(node_unit->UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented");
+
+    auto qlinear_op_type = GetQLinearOpType(node_unit->GetNode());
 
     // Not a qlinear op
     if (qlinear_op_type == QLinearOpType::Unknown)
       continue;
 
+    const auto add_quantized_input =
+        [&all_quantized_op_inputs = all_quantized_op_inputs_](const NodeUnit& node_unit, size_t input_idx) {
+          const auto& input_name = node_unit.Inputs()[input_idx].node_arg.Name();
+          if (Contains(all_quantized_op_inputs, input_name))
+            all_quantized_op_inputs.at(input_name).push_back(&node_unit);
+          else
+            all_quantized_op_inputs.emplace(input_name, std::vector<const NodeUnit*>{&node_unit});
+        };
+
     // All qlinear ops EXCEPT QuantizeLinear has quantized input
     if (qlinear_op_type != QLinearOpType::QuantizeLinear) {
-      const auto& input_name = node->InputDefs()[0]->Name();
-      if (Contains(all_quantized_op_inputs, input_name))
-        all_quantized_op_inputs.at(input_name).push_back(node);
-      else
-        all_quantized_op_inputs.emplace(input_name, vector<const Node*>{node});
+      add_quantized_input(*node_unit, 0);
     }
 
     if (IsQLinearBinaryOp(qlinear_op_type)) {
-      const auto& input_name = node->InputDefs()[3]->Name();
-      if (Contains(all_quantized_op_inputs, input_name))
-        all_quantized_op_inputs.at(input_name).push_back(node);
-      else
-        all_quantized_op_inputs.emplace(input_name, vector<const Node*>{node});
+      add_quantized_input(*node_unit, 1);
     }
-  }
 
-  return all_quantized_op_inputs;
+    // TODO, add handling for varidiac nodes such as QLinearConcat
+  }
 }
 
 static Status GetInputDataType(
     const InitializedTensorSet& initializers,
-    const std::unordered_map<std::string, std::vector<const Node*>>& all_quantized_op_inputs,
+    const std::unordered_map<std::string, std::vector<const NodeUnit*>>& all_quantized_op_inputs,
     const std::string& name, int32_t data_type, const Shape& shape,
     OperandType& operand_type) {
   Type type = Type::TENSOR_FLOAT32;
@@ -208,10 +223,9 @@ static Status GetInputDataType(
       }
 
       // TODO, verify the scale and zero point match if there are multiple op using same input
-      const auto* node = all_quantized_op_inputs.at(name)[0];
-      const NodeUnit node_unit(*node);
-      ORT_RETURN_IF_ERROR(GetQuantizedInputScaleAndZeroPoint(
-          initializers, node_unit, name, scale, zero_point));
+      const auto* node_unit = all_quantized_op_inputs.at(name)[0];
+      ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
+          initializers, *node_unit, name, scale, zero_point, true /* is_input */));
       break;
     }
       // case ONNX_NAMESPACE::TensorProto_DataType_INT8:
@@ -491,15 +505,23 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
 
 Status ModelBuilder::AddOperations() {
   const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
+  std::unordered_set<const NodeUnit*> processed_node_units;
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer_.GetNode(node_indices[i]));
-    if (const auto* op_builder = GetOpBuilder(*node)) {
-      const NodeUnit node_unit(*node);
+    const NodeUnit& node_unit = GetNodeUnit(node);
+
+    // Since a NodeUnit may contain multiple nodes, avoid processing the same NodeUnit multiple times
+    if (Contains(processed_node_units, &node_unit))
+      continue;
+
+    if (const auto* op_builder = GetOpBuilder(node_unit)) {
       ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, node_unit));
     } else {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "Node [", node->Name(), "], type [", node->OpType(), "] is not supported");
+                             "Node [", node_unit.Name(), "], type [", node_unit.OpType(), "] is not supported");
     }
+
+    processed_node_units.insert(&node_unit);
   }
 
   return Status::OK();
@@ -605,20 +627,40 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
   return Status::OK();
 }
 
-int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
+int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) {
   int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE;
+  const auto& output_nodes = node_unit.GetOutputNodes();
+  if (node_unit.GetOutputNodes().size() != 1) {
+    LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name()
+                          << "] type [" << node_unit.OpType()
+                          << "], with " << output_nodes.size() << " output nodes";
+    return fuse_code;
+  }
+
+  const auto& outputs = node_unit.Outputs();
+  if (outputs.size() != 1) {
+    LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name()
+                          << "] type [" << node_unit.OpType()
+                          << "], with " << outputs.size() << " outputs";
+    return fuse_code;
+  }
 
+  const NodeArg& output = outputs[0].node_arg;
+  const auto& output_node = *output_nodes[0];
+
+  // TODO, add support of activation fusion for quantized node group (qdq or qlinear)
   // We do not support activation fusion for quantized operators for now
-  auto qlinear_op_type = GetQLinearOpType(node);
+  auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
   if (qlinear_op_type != QLinearOpType::Unknown)
     return fuse_code;
 
-  for (auto it = node.OutputEdgesBegin(), end = node.OutputEdgesEnd(); it != end; ++it) {
+  for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) {
     const auto& dst_node = it->GetNode();
     const auto* dst_input = dst_node.InputDefs()[it->GetDstArgIndex()];
-    if (Contains(activation_nodes_, dst_node.Index())) {
+    const auto& dst_node_unit = GetNodeUnit(&dst_node);
+    if (Contains(activation_node_units_, &dst_node_unit)) {
       if (&output == dst_input) {
-        fuse_code = activation_nodes_.at(dst_node.Index());
+        fuse_code = activation_node_units_.at(&dst_node_unit);
       }
     } else {
       // if there is any other non-relu node using the output
@@ -628,14 +670,14 @@ int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
     }
   }
 
-  // if output is a graph output, will add relu separately
+  // if output is a graph output, will add activation separately
   if (fuse_code != ANEURALNETWORKS_FUSED_NONE) {
-    for (const auto* graph_output : graph_viewer_.GetOutputs()) {
-      if (&output == graph_output)
-        return ANEURALNETWORKS_FUSED_NONE;
+    const auto& graph_outputs = graph_viewer_.GetOutputs();
+    if (std::find(graph_outputs.cbegin(), graph_outputs.cend(), &output) != graph_outputs.cend()) {
+      return ANEURALNETWORKS_FUSED_NONE;
     }
 
-    LOGS_DEFAULT(VERBOSE) << "Node [" << node.Name() << "] type [" << node.OpType()
+    LOGS_DEFAULT(VERBOSE) << "Node [" << node_unit.Name() << "] type [" << node_unit.OpType()
                           << "], fused the output [" << output.Name() << "]";
 
     fused_activations_.insert(output.Name());
@@ -644,12 +686,13 @@ int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
   return fuse_code;
 }
 
-/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const Node& node) {
+/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const NodeUnit& node_unit) {
   const auto& op_builders = GetOpBuilders();
-  if (!Contains(op_builders, node.OpType()))
+  const auto& op_type = node_unit.GetNode().OpType();
+  if (!Contains(op_builders, op_type))
     return nullptr;
 
-  return op_builders.at(node.OpType());
+  return op_builders.at(op_type);
 }
 
 std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
@@ -663,6 +706,10 @@ std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
   return unique_name;
 }
 
+const InitializedTensorSet& ModelBuilder::GetInitializerTensors() const {
+  return graph_viewer_.GetAllInitializedTensors();
+}
+
 void ModelBuilder::RegisterNHWCOperand(const std::string& name) {
   nhwc_operands_.insert(name);
 }