From 5331711e2245c3456ca36fa9da0411d628e3e9fb Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Thu, 16 Dec 2021 22:34:57 -0800 Subject: [PATCH 01/23] clear using in code --- .../nnapi/nnapi_builtin/builders/helper.cc | 4 - .../nnapi_builtin/builders/model_builder.cc | 14 ++- .../nnapi_builtin/builders/op_builder.cc | 86 +++++++++---------- .../builders/op_support_checker.cc | 7 +- .../nnapi/nnapi_builtin/builders/shaper.cc | 21 ++--- .../nnapi_builtin/nnapi_execution_provider.cc | 2 - 6 files changed, 59 insertions(+), 75 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index 2fc1afd435279..edbcaf590c379 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -17,10 +17,6 @@ #include "helper.h" #include "op_support_checker.h" -using onnxruntime::NodeUnit; -using std::string; -using std::vector; - namespace onnxruntime { namespace nnapi { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 645ab23a85109..7136251927e09 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -14,9 +14,7 @@ #include "op_builder.h" #include "op_support_checker.h" -using onnxruntime::NodeUnit; using namespace android::nn::wrapper; -using std::vector; namespace onnxruntime { namespace nnapi { @@ -31,7 +29,7 @@ int32_t ModelBuilder::GetNNAPIFeatureLevel() const { // Scalar operand is copied into the model, no need to persist #define DEFINE_ADD_OPERAND_FROM_SCALAR(scalar_type, op_type) \ Status ModelBuilder::AddOperandFromScalar(scalar_type value, uint32_t& index) { \ - OperandType operandType(Type::op_type, vector{}); \ + OperandType operandType(Type::op_type, std::vector{}); \ ORT_RETURN_IF_ERROR(AddNewNNAPIOperand(operandType, index)); \ RETURN_STATUS_ON_ERROR_WITH_NOTE( \ nnapi_->ANeuralNetworksModel_setOperandValue( \ @@ -50,7 +48,7 @@ void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) { skipped_initializers_.insert(tensor_name); } -static std::unordered_map> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer); +static std::unordered_map> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer); Status ModelBuilder::Prepare() { nnapi_model_ = std::unique_ptr(new Model()); @@ -151,8 +149,8 @@ void ModelBuilder::PreprocessActivations() { } // Help to get all quantized operators' input and the node(s) using the input -static std::unordered_map> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer) { - std::unordered_map> all_quantized_op_inputs; +static std::unordered_map> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer) { + std::unordered_map> all_quantized_op_inputs; const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder(); for (const auto& node_idx : node_indices) { const auto* node(graph_viewer.GetNode(node_idx)); @@ -168,7 +166,7 @@ static std::unordered_map> GetAllQuantizedOpInp if (Contains(all_quantized_op_inputs, input_name)) all_quantized_op_inputs.at(input_name).push_back(node); else - all_quantized_op_inputs.emplace(input_name, vector{node}); + all_quantized_op_inputs.emplace(input_name, std::vector{node}); } if (IsQLinearBinaryOp(qlinear_op_type)) { @@ -176,7 +174,7 @@ static std::unordered_map> GetAllQuantizedOpInp if (Contains(all_quantized_op_inputs, input_name)) all_quantized_op_inputs.at(input_name).push_back(node); else - all_quantized_op_inputs.emplace(input_name, vector{node}); + all_quantized_op_inputs.emplace(input_name, std::vector{node}); } } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 66f870df15074..45f4c975843bd 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -16,9 +16,7 @@ #include "model_builder.h" #include "op_support_checker.h" -using onnxruntime::NodeUnit; using namespace android::nn::wrapper; -using std::vector; namespace onnxruntime { namespace nnapi { @@ -40,13 +38,13 @@ struct OpBuilderRegistrations { Status AddTransposeOperator(ModelBuilder& model_builder, const std::string& input, const std::string& perm_name, - vector perm, + std::vector perm, const std::string& output, bool output_is_nhwc) ORT_MUST_USE_RESULT; Status AddTransposeOperator(ModelBuilder& model_builder, const std::string& input, const std::string& perm_name, - vector perm, + std::vector perm, const std::string& output, bool output_is_nhwc) { auto& shaper(model_builder.GetShaper()); @@ -83,7 +81,7 @@ Status TransposeBetweenNCHWAndNHWC(ModelBuilder& model_builder, "TransposeBetweenNCHWAndNHWC input has to be a 4d tensor, actual dimensions: ", shaper[input].size()); std::string perm_name; - vector perm; + std::vector perm; if (nchw_to_nhwc) { perm_name = model_builder.GetUniqueName(input + "nchw_to_nhwc_perm"); perm = {0, 2, 3, 1}; @@ -222,11 +220,11 @@ static Status AddBinaryOperator(int32_t op_type, static Status AddSqueezeOp(ModelBuilder& model_builder, const std::string& node_name, const std::string& input, const std::string& output, - vector axes) ORT_MUST_USE_RESULT; + std::vector axes) ORT_MUST_USE_RESULT; static Status AddSqueezeOp(ModelBuilder& model_builder, const std::string& node_name, const std::string& input, const std::string& output, - vector axes) { + std::vector axes) { if (model_builder.GetNNAPIFeatureLevel() < ANEURALNETWORKS_FEATURE_LEVEL_2) { return ORT_MAKE_STATUS( ONNXRUNTIME, FAIL, "Squeeze is not supported on API level ", model_builder.GetNNAPIFeatureLevel()); @@ -430,13 +428,13 @@ static Status ComputeConvPads( const uint32_t weight_size_y, const uint32_t weight_size_x, const std::vector& onnx_pads, const std::vector& onnx_strides, const std::vector& onnx_dilations, AutoPadType auto_pad_type, bool nchw, - vector& pads_out) ORT_MUST_USE_RESULT; + std::vector& pads_out) ORT_MUST_USE_RESULT; static Status ComputeConvPads( const Shape& input_dimen, const uint32_t weight_size_y, const uint32_t weight_size_x, const std::vector& onnx_pads, const std::vector& onnx_strides, const std::vector& onnx_dilations, AutoPadType auto_pad_type, bool nchw, - vector& pads_out) { + std::vector& pads_out) { const int32_t input_size_y = nchw ? input_dimen[2] : input_dimen[1]; const int32_t input_size_x = nchw ? input_dimen[3] : input_dimen[2]; const int32_t stride_y = onnx_strides[0]; @@ -467,21 +465,21 @@ static Status ComputeConvPads( static Status HandleAutoPad(const Shape& input_shape, const uint32_t weight_size_y, const uint32_t weight_size_x, - const vector& onnx_strides, - const vector& onnx_dilations, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, AutoPadType auto_pad_type, bool use_nchw, - vector& onnx_pads, + std::vector& onnx_pads, int32_t& nnapi_padding_code, bool& use_auto_pad) ORT_MUST_USE_RESULT; static Status HandleAutoPad(const Shape& input_shape, const uint32_t weight_size_y, const uint32_t weight_size_x, - const vector& onnx_strides, - const vector& onnx_dilations, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, AutoPadType auto_pad_type, bool use_nchw, - vector& onnx_pads, + std::vector& onnx_pads, int32_t& nnapi_padding_code, bool& use_auto_pad) { use_auto_pad = false; @@ -498,7 +496,7 @@ static Status HandleAutoPad(const Shape& input_shape, } } else if (onnx_dilations == std::vector{1, 1}) { // Since NNAPI runs more efficiently using auto_pad, we try to map the NOTSET padding to auto_pad - vector same_upper_pads; + std::vector same_upper_pads; ORT_RETURN_IF_ERROR(ComputeConvPads(input_shape, weight_size_y, weight_size_x, onnx_pads, onnx_strides, onnx_dilations, AutoPadType::SAME_UPPER, use_nchw, @@ -547,12 +545,12 @@ static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const Node& node, float& a_scale, float& w_scale, float& y_scale, int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, - optional>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT; + optional>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT; static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const Node& node, float& a_scale, float& w_scale, float& y_scale, int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, - optional>& w_scales, bool& is_per_tensor_u8s8) { + optional>& w_scales, bool& is_per_tensor_u8s8) { is_per_tensor_u8s8 = false; // Get scale and zero points // We will handle per-channel weight scale and zero point later @@ -593,7 +591,7 @@ static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(scale_tensor, unpacked_tensor)); const float* scales = reinterpret_cast(unpacked_tensor.data()); const size_t scales_size = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0]; - vector scales_vec(scales, scales + scales_size); + std::vector scales_vec(scales, scales + scales_size); w_scales = onnxruntime::make_optional(std::move(scales_vec)); return Status::OK(); } @@ -631,12 +629,12 @@ static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, const std::string& input_name, float scale, int32_t zero_point, - const optional>& scales) ORT_MUST_USE_RESULT; + const optional>& scales) ORT_MUST_USE_RESULT; static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, const std::string& input_name, float scale, int32_t zero_point, - const optional>& scales) { + const optional>& scales) { // first verify as the weight has no per-channel quantization ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input_name, scale, zero_point)); @@ -902,7 +900,7 @@ Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co auto input = node.InputDefs()[0]->Name(); const auto& output = node.OutputDefs()[0]->Name(); NodeAttrHelper helper(node); - vector perm = helper.Get("perm", vector()); + std::vector perm = helper.Get("perm", std::vector()); auto input_dims = shaper[input].size(); if (perm.empty()) { for (int32_t i = input_dims - 1; i >= 0; i--) @@ -1118,7 +1116,7 @@ Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_bu const auto eps = helper.Get("epsilon", 1e-5f); const auto size = SafeInt(scale_tensor.dims()[0]); - vector a, b; + std::vector a, b; a.reserve(size); b.reserve(size); @@ -1267,15 +1265,15 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N else // (op_type == "MaxPool" || op_type == "GlobalMaxPool") op_code = ANEURALNETWORKS_MAX_POOL_2D; - vector onnx_pads, onnx_strides, kernel_shape; + std::vector onnx_pads, onnx_strides, kernel_shape; bool use_auto_pad = false; int32_t nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID; const auto& input_shape = shaper[input]; if (is_average_pool || op_type == "MaxPool") { const auto auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET")); - kernel_shape = helper.Get("kernel_shape", vector{0, 0}); - onnx_strides = helper.Get("strides", vector{1, 1}); - onnx_pads = helper.Get("pads", vector{0, 0, 0, 0}); + kernel_shape = helper.Get("kernel_shape", std::vector{0, 0}); + onnx_strides = helper.Get("strides", std::vector{1, 1}); + onnx_pads = helper.Get("pads", std::vector{0, 0, 0, 0}); const auto weight_size_y = static_cast(kernel_shape[0]); const auto weight_size_x = static_cast(kernel_shape[1]); ORT_RETURN_IF_ERROR( @@ -1286,14 +1284,14 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } else { // (op_type == "GlobalAveragePool" || op_type == "GlobalMaxPool") use_auto_pad = true; nnapi_padding_code = ANEURALNETWORKS_PADDING_VALID; - onnx_strides = vector{1, 1}; - onnx_pads = vector{0, 0, 0, 0}; + onnx_strides = std::vector{1, 1}; + onnx_pads = std::vector{0, 0, 0, 0}; if (use_nchw) { - kernel_shape = vector{static_cast(input_shape[2]), - static_cast(input_shape[3])}; + kernel_shape = std::vector{static_cast(input_shape[2]), + static_cast(input_shape[3])}; } else { - kernel_shape = vector{static_cast(input_shape[1]), - static_cast(input_shape[2])}; + kernel_shape = std::vector{static_cast(input_shape[1]), + static_cast(input_shape[2])}; } } @@ -1403,15 +1401,15 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N // onnx strides are in the order height, width // while nnapi strides are in the order width, height - const auto onnx_strides = helper.Get("strides", vector{1, 1}); + const auto onnx_strides = helper.Get("strides", std::vector{1, 1}); // onnx pads are in the order top, left, bottom, right // while nnapi pads is in the order left, right, top, bottom - auto onnx_pads = helper.Get("pads", vector{0, 0, 0, 0}); + auto onnx_pads = helper.Get("pads", std::vector{0, 0, 0, 0}); // onnx dilations is in the order height, width // while nnapi dilations are in the order width, height - const auto onnx_dilations = helper.Get("dilations", vector{1, 1}); + const auto onnx_dilations = helper.Get("dilations", std::vector{1, 1}); const auto group = helper.Get("group", 1); size_t x_idx = 0, @@ -1446,7 +1444,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N y_zero_point = 0; // this is for per-channel quantization weights - optional> w_scales; + optional> w_scales; bool is_per_tensor_u8s8 = false; if (is_qlinear_conv) { ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node, @@ -1517,11 +1515,11 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N const auto& weight_type = operand_types.at(weight).type; if (weight_type == Type::TENSOR_FLOAT32) { - vector buffer(bias_dimen[0], 0.0f); + std::vector buffer(bias_dimen[0], 0.0f); OperandType bias_operand_type(Type::TENSOR_FLOAT32, bias_dimen, x_scale * w_scale); ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(bias, buffer.data(), bias_operand_type)); } else if (weight_type == Type::TENSOR_QUANT8_ASYMM || weight_type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL) { - vector buffer(bias_dimen[0], 0); + std::vector buffer(bias_dimen[0], 0); OperandType bias_operand_type(Type::TENSOR_INT32, bias_dimen, x_scale * w_scale); ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(bias, buffer.data(), bias_operand_type)); } else { @@ -1819,7 +1817,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N bool is_per_tensor_u8s8 = false; if (is_qlinear_matmul) { - optional> w_scales; + optional> w_scales; ORT_RETURN_IF_ERROR( GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node, a_scale, b_scale, y_scale, @@ -2118,7 +2116,7 @@ class SqueezeOpBuilder : public BaseOpBuilder { private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; - static Status GetAxes(ModelBuilder& model_builder, const Node& node, vector& axes); + static Status GetAxes(ModelBuilder& model_builder, const Node& node, std::vector& axes); }; void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2129,7 +2127,7 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } /* static */ Status SqueezeOpBuilder::GetAxes(ModelBuilder& model_builder, - const Node& node, vector& axes) { + const Node& node, std::vector& axes) { // Squeeze opset 13 use input as axes if (node.SinceVersion() > 12) { // If axes is not supplied, return an empty axes as default to squeeze all @@ -2148,7 +2146,7 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } } else { NodeAttrHelper helper(node); - axes = helper.Get("axes", vector()); + axes = helper.Get("axes", std::vector()); } return Status::OK(); @@ -2163,7 +2161,7 @@ Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, 0, input)); } - vector axes; + std::vector axes; ORT_RETURN_IF_ERROR(GetAxes(model_builder, node, axes)); return AddSqueezeOp(model_builder, node.Name(), input, node.OutputDefs()[0]->Name(), axes); } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index ca2ba5e90fb93..331b2d2e9e745 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -12,9 +12,6 @@ #include "helper.h" #include "op_support_checker.h" -using onnxruntime::NodeUnit; -using std::vector; - namespace onnxruntime { namespace nnapi { @@ -777,8 +774,8 @@ bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial return false; } - const auto onnx_dilations = helper.Get("dilations", vector{1, 1}); - if (onnx_dilations != vector{1, 1}) { + const auto onnx_dilations = helper.Get("dilations", std::vector{1, 1}); + if (onnx_dilations != std::vector{1, 1}) { if (group != 1 && tensor.dims()[1] != 1) { LOGS_DEFAULT(VERBOSE) << "dilation is not supported on grouped conv"; return false; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc index 9d675ecfa84c0..2bcc167622b3e 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc @@ -9,14 +9,11 @@ namespace onnxruntime { namespace nnapi { -using std::string; -using std::vector; - std::pair ComputeConvOutputShape(const uint32_t input_size_y, const uint32_t input_size_x, const uint32_t weight_size_y, const uint32_t weight_size_x, - const vector& onnx_pads, - const vector& onnx_strides, - const vector& onnx_dilations) { + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations) { int32_t padding_top = onnx_pads[0]; int32_t padding_bottom = onnx_pads[2]; int32_t padding_left = onnx_pads[1]; @@ -53,9 +50,9 @@ std::pair ComputeConvOutputShape(const uint32_t input_size_y Status Shaper::Conv(const std::string& input_name, const std::string& weight_name, - const vector& onnx_pads, - const vector& onnx_strides, - const vector& onnx_dilations, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, bool nchw, const std::string& output_name) { SHAPER_FUNC(Conv, @@ -150,9 +147,9 @@ Status Shaper::ResizeUsingOutputSizes(const std::string& input_name, Status Shaper::ConvImpl(const std::string& input_name, const std::string& weight_name, - const vector& onnx_pads, - const vector& onnx_strides, - const vector& onnx_dilations, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, bool nchw, const std::string& output_name) { const Shape& input_dimen = shape_map_.at(input_name); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc index fa876a7ef6bca..150db8f593e11 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc @@ -21,8 +21,6 @@ #include "core/providers/nnapi/nnapi_builtin/model.h" #endif -using onnxruntime::NodeUnit; - namespace onnxruntime { namespace { From e0f844c477099b8bcacde90319261b4a61e2df24 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 17 Dec 2021 11:46:15 -0800 Subject: [PATCH 02/23] add qlinear iodef generation --- .../providers/shared/node_unit/node_unit.cc | 160 ++++++++++++++++-- 1 file changed, 147 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.cc b/onnxruntime/core/providers/shared/node_unit/node_unit.cc index 80492ce701eff..49133f5d66bf0 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.cc +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.cc @@ -6,6 +6,81 @@ namespace onnxruntime { +namespace { + +// The QLinearOpType GetQLinearOpType, is very similar to the one in NNAPI +// However, the NNAPI ones are only the subset of the ones here, +// TODO, make these shared +enum class QLinearOpType : uint8_t { + Unknown, // Unknown or not a linear quantized op + DequantizeLinear, + QuantizeLinear, + QLinearConv, + QLinearMatMul, + QLinearAdd, + QLinearSigmoid, + QLinearAveragePool, + QLinearMul, + QLinearReduceMean, + QLinearConcat, + QLinearGlobalAveragePool, + QLinearLeakyRelu, +}; + +QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) { + const auto& op_type = node.OpType(); + if (op_type == "DequantizeLinear") + return QLinearOpType::DequantizeLinear; + else if (op_type == "QuantizeLinear") + return QLinearOpType::QuantizeLinear; + else if (op_type == "QLinearConv") + return QLinearOpType::QLinearConv; + else if (op_type == "QLinearMatMul") + return QLinearOpType::QLinearMatMul; + else if (op_type == "QLinearAdd") + return QLinearOpType::QLinearAdd; + else if (op_type == "QLinearSigmoid") + return QLinearOpType::QLinearSigmoid; + else if (op_type == "QLinearAveragePool") + return QLinearOpType::QLinearAveragePool; + else if (op_type == "QLinearMul") + return QLinearOpType::QLinearMul; + else if (op_type == "QLinearReduceMean") + return QLinearOpType::QLinearReduceMean; + else if (op_type == "QLinearConcat") + return QLinearOpType::QLinearConcat; + else if (op_type == "QLinearGlobalAveragePool") + return QLinearOpType::QLinearGlobalAveragePool; + else if (op_type == "QLinearLeakyRelu") + return QLinearOpType::QLinearLeakyRelu; + + return QLinearOpType::Unknown; +} + +// Ops have 1 input +bool IsUnaryQLinearOp(QLinearOpType type) { + return type == QLinearOpType::QLinearSigmoid || + type == QLinearOpType::QLinearAveragePool || + type == QLinearOpType::QLinearGlobalAveragePool || + type == QLinearOpType::QLinearLeakyRelu || + type == QLinearOpType::QLinearReduceMean; +} + +// Ops have 2 inputs +bool IsBinaryQLinearOp(QLinearOpType type) { + return type == QLinearOpType::QLinearConv || + type == QLinearOpType::QLinearMatMul || + type == QLinearOpType::QLinearAdd || + type == QLinearOpType::QLinearMul; +} + +// Ops have 1 or more inputs +bool IsVariadicQLinearOp(QLinearOpType type) { + return type == QLinearOpType::QLinearConcat; +} + +} // namespace + NodeUnit::NodeUnit(const Node& node) : nodes_{&node}, node_(node), @@ -26,20 +101,79 @@ void NodeUnit::InitForNode() { const auto& output_defs = node_.OutputDefs(); // The 1st step is to hookup the NodeUnit with the NNAPI builder interface // So we are not handling quantization here now - // TODO, enable quantization - // auto qlinear_type = GetQLinearOpType(node_); - // if (qlinear_type == QLinearOpType::Unknown) { - // Not a Qlinear op, add all inputs/outputs - auto add_all_io = [](std::vector& defs, - const ConstPointerContainer>& node_defs) { - defs.reserve(node_defs.size()); - - for (const auto def : node_defs) { - defs.push_back(NodeUnit::IODef{*def, std::nullopt}); + auto qlinear_type = GetQLinearOpType(node_); + if (qlinear_type == QLinearOpType::Unknown) { + //Not a Qlinear op, add all inputs / outputs + auto add_all_io = [](std::vector& defs, + const ConstPointerContainer>& node_defs) { + defs.reserve(node_defs.size()); + + for (const auto def : node_defs) { + defs.push_back(NodeUnit::IODef{*def, std::nullopt}); + } + }; + add_all_io(input_defs_, input_defs); + add_all_io(output_defs_, output_defs); + } else if (IsUnaryQLinearOp(qlinear_type)) { + // Unary QLinear Op has 5 inputs + // x, x_scale, x_zp, y_scale, y_zp (optional) + input_defs_.push_back(NodeUnit::IODef{ + *input_defs[0], + NodeUnit::IODef::QuantParam{*input_defs[1], input_defs[2]}}); + + output_defs_.push_back(NodeUnit::IODef{ + *output_defs[0], + NodeUnit::IODef::QuantParam{*input_defs[3], + input_defs_.size() > 4 + ? input_defs[4] + : nullptr}}); + } else if (IsBinaryQLinearOp(qlinear_type)) { + // Binary QLinear Op has 9 inputs + // x1, x1_scale, x1_zp, x2/w, x2_scale, x2_zp, y_scale , y_zp, B + input_defs_.push_back(NodeUnit::IODef{ + *input_defs[0], + NodeUnit::IODef::QuantParam{*input_defs[1], input_defs[2]}}); + input_defs_.push_back(NodeUnit::IODef{ + *input_defs[3], + NodeUnit::IODef::QuantParam{*input_defs[4], input_defs[5]}}); + + if (input_defs_.size() == 9) { // has Bias + input_defs_.push_back(NodeUnit::IODef{ + *input_defs[8], + std::nullopt}); // for Bias the scale and zp are optional } - }; - add_all_io(input_defs_, input_defs); - add_all_io(output_defs_, output_defs); + + output_defs_.push_back(NodeUnit::IODef{ + *output_defs[0], + NodeUnit::IODef::QuantParam{*input_defs[6], input_defs[7]}}); + } else if (IsVariadicQLinearOp(qlinear_type)) { + // TODO, add variadic support + ORT_NOT_IMPLEMENTED(); + } else if (qlinear_type == QLinearOpType::DequantizeLinear) { + // DequantizeLinear has 3 inputs + // x, x_scale, x_zp + // output is not quantized + input_defs_.push_back(NodeUnit::IODef{ + *input_defs[0], + NodeUnit::IODef::QuantParam{*input_defs[1], + input_defs_.size() == 3 + ? input_defs[2] + : nullptr}}); + output_defs_.push_back(NodeUnit::IODef{*output_defs[0], std::nullopt}); + } else if (qlinear_type == QLinearOpType::QuantizeLinear) { + // QuantizeLinear the input is not quantized and has 3 inputs + // x, y_scale, y_zp (optional) + // The output is quantized + input_defs_.push_back(NodeUnit::IODef{*input_defs[0], std::nullopt}); + output_defs_.push_back(NodeUnit::IODef{ + *output_defs[0], + NodeUnit::IODef::QuantParam{*input_defs[1], + input_defs_.size() == 3 + ? input_defs[2] + : nullptr}}); + } else { + ORT_THROW("The QLinear op [", static_cast(qlinear_type), "] is not supported"); + } } } // namespace onnxruntime From 72f0324465dfb6206b433bff63253e23ad1a3525 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 17 Dec 2021 15:47:22 -0800 Subject: [PATCH 03/23] Move iodef out of nodeunit --- .../providers/shared/node_unit/node_unit.cc | 56 +++++++++---------- .../providers/shared/node_unit/node_unit.h | 36 ++++++------ 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.cc b/onnxruntime/core/providers/shared/node_unit/node_unit.cc index 49133f5d66bf0..340662e66b6e4 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.cc +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.cc @@ -104,12 +104,12 @@ void NodeUnit::InitForNode() { auto qlinear_type = GetQLinearOpType(node_); if (qlinear_type == QLinearOpType::Unknown) { //Not a Qlinear op, add all inputs / outputs - auto add_all_io = [](std::vector& defs, + auto add_all_io = [](std::vector& defs, const ConstPointerContainer>& node_defs) { defs.reserve(node_defs.size()); for (const auto def : node_defs) { - defs.push_back(NodeUnit::IODef{*def, std::nullopt}); + defs.push_back(NodeUnitIODef{*def, std::nullopt}); } }; add_all_io(input_defs_, input_defs); @@ -117,35 +117,35 @@ void NodeUnit::InitForNode() { } else if (IsUnaryQLinearOp(qlinear_type)) { // Unary QLinear Op has 5 inputs // x, x_scale, x_zp, y_scale, y_zp (optional) - input_defs_.push_back(NodeUnit::IODef{ + input_defs_.push_back(NodeUnitIODef{ *input_defs[0], - NodeUnit::IODef::QuantParam{*input_defs[1], input_defs[2]}}); + NodeUnitIODef::QuantParam{*input_defs[1], input_defs[2]}}); - output_defs_.push_back(NodeUnit::IODef{ + output_defs_.push_back(NodeUnitIODef{ *output_defs[0], - NodeUnit::IODef::QuantParam{*input_defs[3], - input_defs_.size() > 4 - ? input_defs[4] - : nullptr}}); + NodeUnitIODef::QuantParam{*input_defs[3], + input_defs_.size() > 4 + ? input_defs[4] + : nullptr}}); } else if (IsBinaryQLinearOp(qlinear_type)) { // Binary QLinear Op has 9 inputs // x1, x1_scale, x1_zp, x2/w, x2_scale, x2_zp, y_scale , y_zp, B - input_defs_.push_back(NodeUnit::IODef{ + input_defs_.push_back(NodeUnitIODef{ *input_defs[0], - NodeUnit::IODef::QuantParam{*input_defs[1], input_defs[2]}}); - input_defs_.push_back(NodeUnit::IODef{ + NodeUnitIODef::QuantParam{*input_defs[1], input_defs[2]}}); + input_defs_.push_back(NodeUnitIODef{ *input_defs[3], - NodeUnit::IODef::QuantParam{*input_defs[4], input_defs[5]}}); + NodeUnitIODef::QuantParam{*input_defs[4], input_defs[5]}}); if (input_defs_.size() == 9) { // has Bias - input_defs_.push_back(NodeUnit::IODef{ + input_defs_.push_back(NodeUnitIODef{ *input_defs[8], std::nullopt}); // for Bias the scale and zp are optional } - output_defs_.push_back(NodeUnit::IODef{ + output_defs_.push_back(NodeUnitIODef{ *output_defs[0], - NodeUnit::IODef::QuantParam{*input_defs[6], input_defs[7]}}); + NodeUnitIODef::QuantParam{*input_defs[6], input_defs[7]}}); } else if (IsVariadicQLinearOp(qlinear_type)) { // TODO, add variadic support ORT_NOT_IMPLEMENTED(); @@ -153,24 +153,24 @@ void NodeUnit::InitForNode() { // DequantizeLinear has 3 inputs // x, x_scale, x_zp // output is not quantized - input_defs_.push_back(NodeUnit::IODef{ + input_defs_.push_back(NodeUnitIODef{ *input_defs[0], - NodeUnit::IODef::QuantParam{*input_defs[1], - input_defs_.size() == 3 - ? input_defs[2] - : nullptr}}); - output_defs_.push_back(NodeUnit::IODef{*output_defs[0], std::nullopt}); + NodeUnitIODef::QuantParam{*input_defs[1], + input_defs_.size() == 3 + ? input_defs[2] + : nullptr}}); + output_defs_.push_back(NodeUnitIODef{*output_defs[0], std::nullopt}); } else if (qlinear_type == QLinearOpType::QuantizeLinear) { // QuantizeLinear the input is not quantized and has 3 inputs // x, y_scale, y_zp (optional) // The output is quantized - input_defs_.push_back(NodeUnit::IODef{*input_defs[0], std::nullopt}); - output_defs_.push_back(NodeUnit::IODef{ + input_defs_.push_back(NodeUnitIODef{*input_defs[0], std::nullopt}); + output_defs_.push_back(NodeUnitIODef{ *output_defs[0], - NodeUnit::IODef::QuantParam{*input_defs[1], - input_defs_.size() == 3 - ? input_defs[2] - : nullptr}}); + NodeUnitIODef::QuantParam{*input_defs[1], + input_defs_.size() == 3 + ? input_defs[2] + : nullptr}}); } else { ORT_THROW("The QLinear op [", static_cast(qlinear_type), "] is not supported"); } diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.h b/onnxruntime/core/providers/shared/node_unit/node_unit.h index d94b0e2fc55e7..0e4e9687f6c98 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.h +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.h @@ -21,6 +21,20 @@ namespace QDQ { struct NodeGroup; } +// Definition of one input or output +// If the optional quant_param is present, then this is a quantized input, +// otherwise this is a regular input +struct NodeUnitIODef { + // The quantization parmeter, scale is manadatory, and zero_point is optional + struct QuantParam { + const NodeArg& scale; + const NodeArg* zero_point{nullptr}; + }; + + const NodeArg& node_arg; + const std::optional quant_param; +}; + /** @class NodeUnit Class to represent a single node or a QDQ group of nodes, which will be used as a single unit. @@ -33,27 +47,13 @@ class NodeUnit { QDQGroup, // The NodeUnit contain a QDQ group of nodes, such as "DQ->Sigmoid->Q" }; - // Definition of one input or output - // If the optional quant_param is present, then this is a quantized input, - // otherwise this is a regular input - struct IODef { - // The quantization parmeter, scale is manadatory, and zero_point is optional - struct QuantParam { - const NodeArg& scale; - const NodeArg* zero_point{nullptr}; - }; - - const NodeArg& node_arg; - const std::optional quant_param; - }; - public: explicit NodeUnit(const Node& node); Type UnitType() const noexcept { return type_; } - const std::vector& Inputs() const noexcept { return input_defs_; } - const std::vector& Outputs() const noexcept { return output_defs_; } + const std::vector& Inputs() const noexcept { return input_defs_; } + const std::vector& Outputs() const noexcept { return output_defs_; } const std::string& Domain() const noexcept; const std::string& OpType() const noexcept; @@ -68,8 +68,8 @@ class NodeUnit { const std::vector GetAllNodes() const noexcept { return nodes_; } private: - std::vector input_defs_; - std::vector output_defs_; + std::vector input_defs_; + std::vector output_defs_; const std::vector nodes_; // all nodes in this NodeUnit const Node& node_; // target Node From b55527b9a98fa4be51ea2db4b2cb47914471fe6a Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 17 Dec 2021 22:02:36 -0800 Subject: [PATCH 04/23] Move AddInitializersToSkip to use iodef --- .../nnapi_builtin/builders/op_builder.cc | 213 +++++++++--------- 1 file changed, 107 insertions(+), 106 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 45f4c975843bd..17acf39a4635c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -654,15 +654,17 @@ static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, return Status::OK(); } -static void AddBinaryOpQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) { - const auto& node = node_unit.GetNode(); - const auto input_defs(node.InputDefs()); - model_builder.AddInitializerToSkip(input_defs[1]->Name()); // a_scale - model_builder.AddInitializerToSkip(input_defs[2]->Name()); // a_zero_point - model_builder.AddInitializerToSkip(input_defs[4]->Name()); // b_scale - model_builder.AddInitializerToSkip(input_defs[5]->Name()); // b_zero_point - model_builder.AddInitializerToSkip(input_defs[6]->Name()); // y_scale - model_builder.AddInitializerToSkip(input_defs[7]->Name()); // y_zero_point +static void AddQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { + // If we reach here, we assume the io_def has quant_param + model_builder.AddInitializerToSkip(io_def.quant_param->scale.Name()); // scale + if (io_def.quant_param->zero_point) { + model_builder.AddInitializerToSkip(io_def.quant_param->zero_point->Name()); // zero_point + } +} + +static void AddInputToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { + model_builder.AddInitializerToSkip(io_def.node_arg.Name()); // main input + AddQuantizationScaleAndZeroPointToSkip(model_builder, io_def); } Status GetQuantizedInputScaleAndZeroPoint(const InitializedTensorSet& initializers, @@ -759,14 +761,23 @@ class BinaryOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: + static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; +/* static */ bool BinaryOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { + // TODO, add support for QDQ NodeUnit + return node_unit.OpType() == "QLinearAdd"; +} + void BinaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& op = node_unit.OpType(); - if (op == "QLinearAdd") { - AddBinaryOpQuantizationScaleAndZeroPointToSkip(model_builder, node_unit); - } + if (!IsQuantizedOp(node_unit)) + return; + + const auto& inputs = node_unit.Inputs(); + AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[0]); // a_scale, a_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[1]); // b_scale, b_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp } /* static */ void BinaryOpBuilder::CreateSharedOpBuilder( @@ -945,8 +956,7 @@ class ReshapeOpBuilder : public BaseOpBuilder { }; void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); + model_builder.AddInitializerToSkip(node_unit.Inputs()[1].node_arg.Name()); } // We can skip the Reshape if all the output edges satisfies both the following conditions @@ -1089,12 +1099,11 @@ class BatchNormalizationOpBuilder : public BaseOpBuilder { }; void BatchNormalizationOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); // skip everything except input0 for BatchNormalization - model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); // scale - model_builder.AddInitializerToSkip(node.InputDefs()[2]->Name()); // B - model_builder.AddInitializerToSkip(node.InputDefs()[3]->Name()); // mean - model_builder.AddInitializerToSkip(node.InputDefs()[4]->Name()); //var + model_builder.AddInitializerToSkip(node_unit.Inputs()[1].node_arg.Name()); // scale + model_builder.AddInitializerToSkip(node_unit.Inputs()[2].node_arg.Name()); // B + model_builder.AddInitializerToSkip(node_unit.Inputs()[3].node_arg.Name()); // mean + model_builder.AddInitializerToSkip(node_unit.Inputs()[4].node_arg.Name()); //var } Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -1199,24 +1208,22 @@ class PoolOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: + static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; +/* static */ bool PoolOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { + // TODO, add support for QDQ NodeUnit + return node_unit.OpType() == "QLinearAveragePool"; +} + void PoolOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto& op = node.OpType(); - if (op != "QLinearAveragePool") + if (!IsQuantizedOp(node_unit)) return; - const auto input_defs = node.InputDefs(); - // skip input/output scales and zeropoints - model_builder.AddInitializerToSkip(input_defs[1]->Name()); // X_scale - model_builder.AddInitializerToSkip(input_defs[2]->Name()); // X_zero_point - model_builder.AddInitializerToSkip(input_defs[3]->Name()); // Y_scale - - if (input_defs.size() == 5) // has Y_zero_point input - model_builder.AddInitializerToSkip(input_defs[4]->Name()); // Y_zero_point + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Inputs()[0]); // x_scale, x_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp } /* static */ void PoolOpBuilder::CreateSharedOpBuilder( @@ -1359,10 +1366,17 @@ class ConvOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: + static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; -/* static */ void ConvOpBuilder::CreateSharedOpBuilder( +/* static */ bool ConvOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { + // TODO, add support for QDQ NodeUnit + return node_unit.OpType() == "QLinearConv"; +} + +/* static */ void +ConvOpBuilder::CreateSharedOpBuilder( const std::string& op_type, OpBuilderRegistrations& op_registrations) { CreateSharedOpBuilderImpl( op_type, op_registrations, @@ -1373,18 +1387,16 @@ class ConvOpBuilder : public BaseOpBuilder { } void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto& op = node.OpType(); - const auto input_defs = node.InputDefs(); - + const auto& inputs = node_unit.Inputs(); // skip the weight for conv as we need to transpose - if (op == "QLinearConv") { - AddBinaryOpQuantizationScaleAndZeroPointToSkip(model_builder, node_unit); - model_builder.AddInitializerToSkip(input_defs[3]->Name()); // w - if (input_defs.size() > 8) - model_builder.AddInitializerToSkip(input_defs[8]->Name()); // B + if (IsQuantizedOp(node_unit)) { + AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[0]); // x_scale, x_zp + AddInputToSkip(model_builder, inputs[1]); // w, w_scale, w_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + if (inputs.size() > 2) + AddInputToSkip(model_builder, inputs[2]); // B, B_scale, B_zp } else { - model_builder.AddInitializerToSkip(input_defs[1]->Name()); // w + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // w } } @@ -1396,8 +1408,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N const auto& initializers(model_builder.GetInitializerTensors()); NodeAttrHelper helper(node); const auto input_defs = node.InputDefs(); - const auto& op_type = node.OpType(); - bool is_qlinear_conv = (op_type == "QLinearConv"); + bool is_qlinear_conv = IsQuantizedOp(node_unit); // onnx strides are in the order height, width // while nnapi strides are in the order width, height @@ -1754,9 +1765,15 @@ class GemmOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: + static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; +/* static */ bool GemmOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { + // TODO, add support for QDQ NodeUnit + return node_unit.OpType() == "QLinearMatMul"; +} + /* static */ void GemmOpBuilder::CreateSharedOpBuilder( const std::string& op_type, OpBuilderRegistrations& op_registrations) { CreateSharedOpBuilderImpl( @@ -1769,20 +1786,22 @@ class GemmOpBuilder : public BaseOpBuilder { } void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - - const auto& op = node.OpType(); - const auto input_defs(node.InputDefs()); - if (op == "MatMul") { - model_builder.AddInitializerToSkip(input_defs[1]->Name()); - } else if (op == "Gemm") { - NodeAttrHelper helper(node); - const auto transB = helper.Get("transB", 0); - if (transB == 0) - model_builder.AddInitializerToSkip(input_defs[1]->Name()); - } else if (op == "QLinearMatMul") { - AddBinaryOpQuantizationScaleAndZeroPointToSkip(model_builder, node_unit); - model_builder.AddInitializerToSkip(input_defs[3]->Name()); // b + const auto& inputs = node_unit.Inputs(); + if (IsQuantizedOp(node_unit)) { + AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[0]); // b_scale, b_zp + AddInputToSkip(model_builder, inputs[1]); // b, b_scale, b_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + } else { + const auto& op = node_unit.OpType(); + const auto& inputs = node_unit.Inputs(); + if (op == "MatMul") { + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); + } else if (op == "Gemm") { + NodeAttrHelper helper(node_unit.GetNode()); + const auto transB = helper.Get("transB", 0); + if (transB == 0) + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); + } } } @@ -1913,24 +1932,21 @@ class UnaryOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: + static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; +/* static */ bool UnaryOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { + // TODO, add support for QDQ NodeUnit + return node_unit.OpType() == "QLinearSigmoid"; +} + void UnaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto& op = node.OpType(); - if (op != "QLinearSigmoid") + if (!IsQuantizedOp(node_unit)) return; - const auto input_defs = node.InputDefs(); - - // skip input/output scales and zeropoints - model_builder.AddInitializerToSkip(input_defs[1]->Name()); // X_scale - model_builder.AddInitializerToSkip(input_defs[2]->Name()); // X_zero_point - model_builder.AddInitializerToSkip(input_defs[3]->Name()); // Y_scale - - if (input_defs.size() == 5) // has Y_zero_point input - model_builder.AddInitializerToSkip(input_defs[4]->Name()); // Y_zero_point + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Inputs()[0]); // x_scale, x_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp } /* static */ void UnaryOpBuilder::CreateSharedOpBuilder( @@ -2120,9 +2136,8 @@ class SqueezeOpBuilder : public BaseOpBuilder { }; void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - if (node.SinceVersion() > 12 && node.InputDefs().size() > 1) { - model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); + if (node_unit.SinceVersion() > 12 && node_unit.Inputs().size() > 1) { + model_builder.AddInitializerToSkip(node_unit.Inputs()[1].node_arg.Name()); } } @@ -2179,13 +2194,7 @@ class QuantizeLinearOpBuilder : public BaseOpBuilder { }; void QuantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto input_defs(node.InputDefs()); - - model_builder.AddInitializerToSkip(input_defs[1]->Name()); - - if (input_defs.size() == 3) // has zero_point input - model_builder.AddInitializerToSkip(input_defs[2]->Name()); + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp } Status QuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2229,13 +2238,7 @@ class DequantizeLinearOpBuilder : public BaseOpBuilder { }; void DequantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto input_defs(node.InputDefs()); - - model_builder.AddInitializerToSkip(input_defs[1]->Name()); - - if (input_defs.size() == 3) // has zero_point input - model_builder.AddInitializerToSkip(input_defs[2]->Name()); + AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Inputs()[0]); // x_scale, x_zp } Status DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2340,12 +2343,12 @@ class ClipOpBuilder : public BaseOpBuilder { }; void ClipOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - if (node.InputDefs().size() > 1) - model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); // min + const auto& inputs = node_unit.Inputs(); + if (inputs.size() > 1) + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // min - if (node.InputDefs().size() > 2) - model_builder.AddInitializerToSkip(node.InputDefs()[2]->Name()); // max + if (inputs.size() > 2) + model_builder.AddInitializerToSkip(inputs[2].node_arg.Name()); // max } Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2400,16 +2403,16 @@ class ResizeOpBuilder : public BaseOpBuilder { }; void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); + const auto& inputs = node_unit.Inputs(); // We don't really use ROI here, so add them to skipped list - model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name()); // ROI + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // ROI // We will still add scales to the skipped list even sizes are present // since there is no use of it, we will not process it later - model_builder.AddInitializerToSkip(node.InputDefs()[2]->Name()); // scales + model_builder.AddInitializerToSkip(inputs[2].node_arg.Name()); // scales - if (node.InputDefs().size() > 3) - model_builder.AddInitializerToSkip(node.InputDefs()[3]->Name()); // sizes + if (inputs.size() > 3) + model_builder.AddInitializerToSkip(inputs[3].node_arg.Name()); // sizes } Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2641,16 +2644,14 @@ class SliceOpBuilder : public BaseOpBuilder { }; void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - // Skip everything except input0 for Slice - const auto input_defs = node.InputDefs(); - model_builder.AddInitializerToSkip(input_defs[1]->Name()); // starts - model_builder.AddInitializerToSkip(input_defs[2]->Name()); // ends - if (input_defs.size() > 3) { - model_builder.AddInitializerToSkip(input_defs[3]->Name()); // axes - if (input_defs.size() > 4) { - model_builder.AddInitializerToSkip(input_defs[4]->Name()); // steps + const auto& inputs = node_unit.Inputs(); + model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); // starts + model_builder.AddInitializerToSkip(inputs[2].node_arg.Name()); // ends + if (inputs.size() > 3) { + model_builder.AddInitializerToSkip(inputs[3].node_arg.Name()); // axes + if (inputs.size() > 4) { + model_builder.AddInitializerToSkip(inputs[4].node_arg.Name()); // steps } } } From 5252da79b9bfe93aae4d45f9f6bb5fbe82a079cc Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Sat, 18 Dec 2021 19:35:00 -0800 Subject: [PATCH 05/23] minor fix --- .../nnapi_builtin/builders/op_builder.cc | 5 ++- .../providers/shared/node_unit/node_unit.cc | 40 +++++++++---------- .../providers/shared/node_unit/node_unit.h | 8 ++-- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 17acf39a4635c..c4e1f103a0212 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -657,14 +657,17 @@ static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, static void AddQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { // If we reach here, we assume the io_def has quant_param model_builder.AddInitializerToSkip(io_def.quant_param->scale.Name()); // scale + LOGS_DEFAULT(VERBOSE) << io_def.quant_param->scale.Name() << "is skipped"; if (io_def.quant_param->zero_point) { model_builder.AddInitializerToSkip(io_def.quant_param->zero_point->Name()); // zero_point + LOGS_DEFAULT(VERBOSE) << io_def.quant_param->zero_point->Name() << "is skipped"; } } static void AddInputToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { model_builder.AddInitializerToSkip(io_def.node_arg.Name()); // main input - AddQuantizationScaleAndZeroPointToSkip(model_builder, io_def); + if (io_def.quant_param) + AddQuantizationScaleAndZeroPointToSkip(model_builder, io_def); } Status GetQuantizedInputScaleAndZeroPoint(const InitializedTensorSet& initializers, diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.cc b/onnxruntime/core/providers/shared/node_unit/node_unit.cc index 340662e66b6e4..3d761dfbe971f 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.cc +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.cc @@ -102,8 +102,9 @@ void NodeUnit::InitForNode() { // The 1st step is to hookup the NodeUnit with the NNAPI builder interface // So we are not handling quantization here now auto qlinear_type = GetQLinearOpType(node_); - if (qlinear_type == QLinearOpType::Unknown) { - //Not a Qlinear op, add all inputs / outputs + if (qlinear_type == QLinearOpType::Unknown || + IsVariadicQLinearOp(qlinear_type)) { // TODO, add variadic support + // Not a Qlinear op, add all inputs / outputs auto add_all_io = [](std::vector& defs, const ConstPointerContainer>& node_defs) { defs.reserve(node_defs.size()); @@ -112,63 +113,60 @@ void NodeUnit::InitForNode() { defs.push_back(NodeUnitIODef{*def, std::nullopt}); } }; - add_all_io(input_defs_, input_defs); - add_all_io(output_defs_, output_defs); + add_all_io(inputs_, input_defs); + add_all_io(outputs_, output_defs); } else if (IsUnaryQLinearOp(qlinear_type)) { // Unary QLinear Op has 5 inputs // x, x_scale, x_zp, y_scale, y_zp (optional) - input_defs_.push_back(NodeUnitIODef{ + inputs_.push_back(NodeUnitIODef{ *input_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], input_defs[2]}}); - output_defs_.push_back(NodeUnitIODef{ + outputs_.push_back(NodeUnitIODef{ *output_defs[0], NodeUnitIODef::QuantParam{*input_defs[3], - input_defs_.size() > 4 + input_defs.size() > 4 ? input_defs[4] : nullptr}}); } else if (IsBinaryQLinearOp(qlinear_type)) { // Binary QLinear Op has 9 inputs // x1, x1_scale, x1_zp, x2/w, x2_scale, x2_zp, y_scale , y_zp, B - input_defs_.push_back(NodeUnitIODef{ + inputs_.push_back(NodeUnitIODef{ *input_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], input_defs[2]}}); - input_defs_.push_back(NodeUnitIODef{ + inputs_.push_back(NodeUnitIODef{ *input_defs[3], NodeUnitIODef::QuantParam{*input_defs[4], input_defs[5]}}); - if (input_defs_.size() == 9) { // has Bias - input_defs_.push_back(NodeUnitIODef{ + if (input_defs.size() == 9) { // has Bias + inputs_.push_back(NodeUnitIODef{ *input_defs[8], std::nullopt}); // for Bias the scale and zp are optional } - output_defs_.push_back(NodeUnitIODef{ + outputs_.push_back(NodeUnitIODef{ *output_defs[0], NodeUnitIODef::QuantParam{*input_defs[6], input_defs[7]}}); - } else if (IsVariadicQLinearOp(qlinear_type)) { - // TODO, add variadic support - ORT_NOT_IMPLEMENTED(); } else if (qlinear_type == QLinearOpType::DequantizeLinear) { // DequantizeLinear has 3 inputs // x, x_scale, x_zp // output is not quantized - input_defs_.push_back(NodeUnitIODef{ + inputs_.push_back(NodeUnitIODef{ *input_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], - input_defs_.size() == 3 + input_defs.size() == 3 ? input_defs[2] : nullptr}}); - output_defs_.push_back(NodeUnitIODef{*output_defs[0], std::nullopt}); + outputs_.push_back(NodeUnitIODef{*output_defs[0], std::nullopt}); } else if (qlinear_type == QLinearOpType::QuantizeLinear) { // QuantizeLinear the input is not quantized and has 3 inputs // x, y_scale, y_zp (optional) // The output is quantized - input_defs_.push_back(NodeUnitIODef{*input_defs[0], std::nullopt}); - output_defs_.push_back(NodeUnitIODef{ + inputs_.push_back(NodeUnitIODef{*input_defs[0], std::nullopt}); + outputs_.push_back(NodeUnitIODef{ *output_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], - input_defs_.size() == 3 + input_defs.size() == 3 ? input_defs[2] : nullptr}}); } else { diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.h b/onnxruntime/core/providers/shared/node_unit/node_unit.h index 0e4e9687f6c98..1d02854c327b4 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.h +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.h @@ -52,8 +52,8 @@ class NodeUnit { Type UnitType() const noexcept { return type_; } - const std::vector& Inputs() const noexcept { return input_defs_; } - const std::vector& Outputs() const noexcept { return output_defs_; } + const std::vector& Inputs() const noexcept { return inputs_; } + const std::vector& Outputs() const noexcept { return outputs_; } const std::string& Domain() const noexcept; const std::string& OpType() const noexcept; @@ -68,8 +68,8 @@ class NodeUnit { const std::vector GetAllNodes() const noexcept { return nodes_; } private: - std::vector input_defs_; - std::vector output_defs_; + std::vector inputs_; + std::vector outputs_; const std::vector nodes_; // all nodes in this NodeUnit const Node& node_; // target Node From 6189bb16234f970995126a184b39d3eec7e15ccb Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 3 Jan 2022 15:20:39 -0800 Subject: [PATCH 06/23] Move get quantized inputs to NodeUnit --- .../nnapi/nnapi_builtin/builders/helper.cc | 57 ++++++++++++++ .../nnapi/nnapi_builtin/builders/helper.h | 13 +++- .../nnapi_builtin/builders/model_builder.cc | 74 ++++++++++++------- .../nnapi_builtin/builders/model_builder.h | 22 +++++- 4 files changed, 135 insertions(+), 31 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index edbcaf590c379..b26d8cef94f76 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -331,6 +331,63 @@ common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers return Status::OK(); } +common::Status GetQuantizationScaleAndZeroPoint( + const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, + float& scale, int32_t& zero_point) { + scale = 0.0f; + zero_point = 0; + + if (!io_def.quant_param) { // Not a quantized IO + return Status::OK(); + } + + const auto unpack_tensor = [&model_path](const InitializedTensorSet& initializers, + const std::string& name, std::vector& unpacked_tensor) { + unpacked_tensor.clear(); + const auto& tensor = *initializers.at(name); + ORT_RETURN_IF_ERROR( + onnxruntime::utils::UnpackInitializerData(tensor, model_path, unpacked_tensor)); + return Status::OK(); + }; + + const auto& quant_param = *io_def.quant_param; + { // get the scale + std::vector unpacked_tensor; + const auto& name = quant_param.scale.Name(); + ORT_RETURN_IF_ERROR(unpack_tensor(initializers, name, unpacked_tensor)); + // The scale should be one or more floats + ORT_RETURN_IF(unpacked_tensor.size() < 4, + "The initializer [", name, "] should have one or more floats ", + "with size no less than 4, actual size: ", unpacked_tensor.size()); + scale = reinterpret_cast(unpacked_tensor.data())[0]; + } + + if (quant_param.zero_point) { // get the zero point if it's there + std::vector unpacked_tensor; + const auto& name = quant_param.zero_point->Name(); + ORT_RETURN_IF_ERROR(unpack_tensor(initializers, name, unpacked_tensor)); + ORT_RETURN_IF(unpacked_tensor.empty(), "The initializer [", name, "] is empty"); + // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI + zero_point = static_cast(unpacked_tensor[0]); + } + + return Status::OK(); +} + +common::Status GetQuantizationScaleAndZeroPoint( + const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& name, + float& scale, int32_t& zero_point, bool is_input) { + const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs(); + for (const auto& io_def : io_defs) { + if (io_def.node_arg.Name() == name) + return GetQuantizationScaleAndZeroPoint(initializers, io_def, node_unit.GetNode().ModelPath(), + scale, zero_point); + } + + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Unknown input: ", name, ", for NodeUnit with node index: ", node_unit.Index()); +} + bool GetShape(const NodeArg& node_arg, Shape& shape) { shape.clear(); const auto* shape_proto = node_arg.Shape(); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index d8d89269c9f55..62d0d8e87915a 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -26,10 +26,13 @@ namespace onnxruntime { using Shape = std::vector; using InitializerMap = std::unordered_map; +class GraphViewer; class Node; class NodeArg; class NodeUnit; -class GraphViewer; +class Path; + +struct NodeUnitIODef; namespace nnapi { @@ -117,6 +120,14 @@ common::Status GetQuantizationScale(const InitializedTensorSet& initializers, co common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; +common::Status GetQuantizationScaleAndZeroPoint( + const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, + float& scale, int32_t& zero_point); + +common::Status GetQuantizationScaleAndZeroPoint( + const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& name, + float& scale, int32_t& zero_point, bool is_input = true); + // Get Shape/Type of a NodeArg // TODO, move to shared_utils bool GetShape(const NodeArg& node_arg, Shape& shape); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 7136251927e09..8664646a9af5c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -48,13 +48,12 @@ void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) { skipped_initializers_.insert(tensor_name); } -static std::unordered_map> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer); - Status ModelBuilder::Prepare() { nnapi_model_ = std::unique_ptr(new Model()); RETURN_STATUS_ON_ERROR(nnapi_->ANeuralNetworksModel_create(&nnapi_model_->model_)); ORT_RETURN_IF_ERROR(GetTargetDevices()); - all_quantized_op_inputs_ = GetAllQuantizedOpInputs(graph_viewer_); + PreprocessNodeUnits(); + GetAllQuantizedOpInputs(); PreprocessInitializers(); PreprocessActivations(); ORT_RETURN_IF_ERROR(RegisterInitializers()); @@ -148,42 +147,66 @@ void ModelBuilder::PreprocessActivations() { } } -// Help to get all quantized operators' input and the node(s) using the input -static std::unordered_map> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer) { - std::unordered_map> all_quantized_op_inputs; - const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder(); +const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const { + // Do we want to throw here if the node is not in the map? + return *node_unit_map_.at(node); +} + +void ModelBuilder::PreprocessNodeUnits() { + // TODO, hookup shared QDQ selectors here to identify all the qdq NodeUnit in the graph + const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); + for (size_t i = 0; i < node_indices.size(); i++) { + const auto node_idx = node_indices[i]; + // TODO, check if the node is already part of a qdq group + const auto* node(graph_viewer_.GetNode(node_idx)); + auto node_unit = std::make_unique(*node); + node_unit_map_.insert({node, node_unit.get()}); + node_unit_holder_.push_back(std::move(node_unit)); + } +} + +// Help to get all quantized operators' input and the NodeUnit(s) using the input +void ModelBuilder::GetAllQuantizedOpInputs() { + const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); for (const auto& node_idx : node_indices) { - const auto* node(graph_viewer.GetNode(node_idx)); - auto qlinear_op_type = GetQLinearOpType(*node); + const auto* node(graph_viewer_.GetNode(node_idx)); + // TODO check if the node_unit has already been processed + const auto& node_unit = GetNodeUnit(node); + + // TODO, hookup getting quantized inputs with QDQ NodeUnits and remove the ORT_ENFORCE + ORT_ENFORCE(node_unit.UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented"); + + auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode()); // Not a qlinear op if (qlinear_op_type == QLinearOpType::Unknown) continue; + const auto add_quantized_input = + [&all_quantized_op_inputs = all_quantized_op_inputs_](const NodeUnit& node_unit, size_t input_idx) { + const auto& input_name = node_unit.Inputs()[input_idx].node_arg.Name(); + if (Contains(all_quantized_op_inputs, input_name)) + all_quantized_op_inputs.at(input_name).push_back(&node_unit); + else + all_quantized_op_inputs.emplace(input_name, std::vector{&node_unit}); + }; + // All qlinear ops EXCEPT QuantizeLinear has quantized input if (qlinear_op_type != QLinearOpType::QuantizeLinear) { - const auto& input_name = node->InputDefs()[0]->Name(); - if (Contains(all_quantized_op_inputs, input_name)) - all_quantized_op_inputs.at(input_name).push_back(node); - else - all_quantized_op_inputs.emplace(input_name, std::vector{node}); + add_quantized_input(node_unit, 0); } if (IsQLinearBinaryOp(qlinear_op_type)) { - const auto& input_name = node->InputDefs()[3]->Name(); - if (Contains(all_quantized_op_inputs, input_name)) - all_quantized_op_inputs.at(input_name).push_back(node); - else - all_quantized_op_inputs.emplace(input_name, std::vector{node}); + add_quantized_input(node_unit, 1); } - } - return all_quantized_op_inputs; + // TODO, add handling for varidiac nodes such as QLinearConcat + } } static Status GetInputDataType( const InitializedTensorSet& initializers, - const std::unordered_map>& all_quantized_op_inputs, + const std::unordered_map>& all_quantized_op_inputs, const std::string& name, int32_t data_type, const Shape& shape, OperandType& operand_type) { Type type = Type::TENSOR_FLOAT32; @@ -206,10 +229,9 @@ static Status GetInputDataType( } // TODO, verify the scale and zero point match if there are multiple op using same input - const auto* node = all_quantized_op_inputs.at(name)[0]; - const NodeUnit node_unit(*node); - ORT_RETURN_IF_ERROR(GetQuantizedInputScaleAndZeroPoint( - initializers, node_unit, name, scale, zero_point)); + const auto* node_unit = all_quantized_op_inputs.at(name)[0]; + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, *node_unit, name, scale, zero_point, true /* is_input */)); break; } // case ONNX_NAMESPACE::TensorProto_DataType_INT8: diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index d7dfd78ac0a43..73c99486a959a 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -12,6 +12,9 @@ #include "shaper.h" namespace onnxruntime { + +class NodeUnit; + namespace nnapi { class IOpBuilder; @@ -33,7 +36,6 @@ class ModelBuilder { }; ModelBuilder(const GraphViewer& graph_viewer); - ~ModelBuilder() = default; Status Compile(std::unique_ptr& model) ORT_MUST_USE_RESULT; @@ -149,9 +151,14 @@ class ModelBuilder { std::vector input_index_vec_; std::vector output_index_vec_; - // Contains all quantized operators' input and the node(s) using the input - // In the form of {input_name, [node(s) using the input]} - std::unordered_map> all_quantized_op_inputs_; + // Contains all quantized operators' input and the NodeUnit(s) using the input + // In the form of {input_name, [NodeUnit(s) using the input]} + std::unordered_map> all_quantized_op_inputs_; + + // Holder for the NodeUnits in the graph, this will guarantee the NodeUnits is + // valid throughout the lifetime of the ModelBuilder + std::vector> node_unit_holder_; + std::unordered_map node_unit_map_; std::unordered_set unique_names_; @@ -180,6 +187,13 @@ class ModelBuilder { // After constructing the NNAPI model, will set the shape inferencing record to the Model void RegisterModelShaper(); + // Get all quantized inputs in the underlying graph_viewer + void GetAllQuantizedOpInputs(); + // Go through the underlying graph_viewer, and generate NodeUnits + void PreprocessNodeUnits(); + // Get the NodeUnit which contains the given node + const NodeUnit& GetNodeUnit(const Node* node) const; + Status SetOperandValue(uint32_t index, Model::NNMemory* memory, size_t size, size_t offset) ORT_MUST_USE_RESULT; From 2baac120265f9804468d2c5031590fa0080666d3 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 3 Jan 2022 22:44:32 -0800 Subject: [PATCH 07/23] move isvalidzp to nodeunit, fix some minor bug --- .../nnapi/nnapi_builtin/builders/helper.cc | 100 ++++++++++++++++++ .../nnapi/nnapi_builtin/builders/helper.h | 3 + .../nnapi_builtin/builders/model_builder.cc | 17 +-- .../nnapi_builtin/builders/model_builder.h | 7 +- 4 files changed, 116 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index b26d8cef94f76..785eca5753334 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -214,6 +214,106 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const return true; } +bool HasValidQuantizationZeroPoint(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::vector& indices) { + const auto& op_type = node_unit.OpType(); + auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode()); + bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv); + bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul); + + const auto& inputs = node_unit.Inputs(); + for (const auto idx : indices) { + if (idx >= inputs.size()) { + LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx + << " >= input number, " << inputs.size(); + return false; + } + + const auto& input = inputs[idx]; + if (!input.quant_param.has_value()) { + LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx + << " has no quant_param"; + return false; + } + + // zero point is optional here + if (!input.quant_param->zero_point) + return true; + + const auto& zero_point_name = input.quant_param->zero_point->Name(); + const auto& weight_tensor = *initializers.at(input.node_arg.Name()); + if (!Contains(initializers, zero_point_name)) { + LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor"; + return false; + } + + bool is_conv_matmul_weight = (is_qlinear_conv || is_qlinear_matmul) && idx == 2; + bool is_conv_matmul_u8s8_weight = false; + + if (is_conv_matmul_weight) { + is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8; + } + + const auto& zero_tensor = *initializers.at(zero_point_name); + int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0]; + + if (!is_conv_matmul_u8s8_weight) { + if (zero_dim != 1) { + LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, " + << " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+"; + return false; + } + } else { + // For u8s8 Qlinear[Conv/MatMul], we support + // 1. Per-tensor, the weight will be transformed to uint8 later + // 2. Per-channel, only from Android API level 29 + if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) { + LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only supports int8 zero point for weight, " + << "actual zero point type: [" << zero_tensor.data_type() << "]"; + return false; + } + + if (zero_dim != 1) { + if (is_qlinear_matmul) { + LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization"; + return false; + } + } + + // For onnx, u8s8 QlinearConv, the weight zero point can be a scalar, + // or a tensor with same channel as weight, for NNAPI we only support it be + // 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel + // quantization is 0 there is no input for it + if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) { + LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight," + << " weight dimension[0] " << weight_tensor.dims()[0] + << " zero point dimension " << zero_dim; + return false; + } + + std::vector unpacked_tensor; + auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node_unit.ModelPath(), unpacked_tensor); + if (!status.IsOK()) { + LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name + << ", error msg: " << status.ErrorMessage(); + return false; + } + + // Verify all onnx weight zero point(s) are 0(s) + const int8_t* zero_points = reinterpret_cast(unpacked_tensor.data()); + for (size_t i = 0; i < unpacked_tensor.size(); i++) { + if (zero_points[i] != 0) { + LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only support 0 as zero point, " + << "zero_points[" << i << "] has value: " << zero_points[i]; + return false; + } + } + } + } + + return true; +} + bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node, const std::vector& indices) { const auto& op_type = node.OpType(); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 62d0d8e87915a..215d21857fd55 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -120,6 +120,9 @@ common::Status GetQuantizationScale(const InitializedTensorSet& initializers, co common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; +bool HasValidQuantizationZeroPoint(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::vector& indices); + common::Status GetQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, float& scale, int32_t& zero_point); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 8664646a9af5c..f0c209ddf8d25 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -118,8 +118,8 @@ void ModelBuilder::PreprocessInitializers() { const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); - if (const auto* op_builder = GetOpBuilder(*node)) { - const NodeUnit node_unit(*node); + const auto& node_unit = GetNodeUnit(node); + if (const auto* op_builder = GetOpBuilder(node_unit)) { op_builder->AddInitializersToSkip(*this, node_unit); } } @@ -513,12 +513,12 @@ Status ModelBuilder::AddOperations() { const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); - if (const auto* op_builder = GetOpBuilder(*node)) { - const NodeUnit node_unit(*node); + const NodeUnit& node_unit = GetNodeUnit(node); + if (const auto* op_builder = GetOpBuilder(node_unit)) { ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, node_unit)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Node [", node->Name(), "], type [", node->OpType(), "] is not supported"); + "Node [", node_unit.Name(), "], type [", node_unit.OpType(), "] is not supported"); } } @@ -664,12 +664,13 @@ int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) { return fuse_code; } -/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const Node& node) { +/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const NodeUnit& node_unit) { const auto& op_builders = GetOpBuilders(); - if (!Contains(op_builders, node.OpType())) + const auto& op_type = node_unit.GetNode().OpType(); + if (!Contains(op_builders, op_type)) return nullptr; - return op_builders.at(node.OpType()); + return op_builders.at(op_type); } std::string ModelBuilder::GetUniqueName(const std::string& base_name) { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 73c99486a959a..9b7dd0a289aad 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -109,6 +109,9 @@ class ModelBuilder { bool GetNCHWOperand(const std::string& nhwc_name, std::string& nchw_name); bool GetNHWCOperand(const std::string& nchw_name, std::string& nhwc_name); + // Get the NodeUnit which contains the given node + const NodeUnit& GetNodeUnit(const Node* node) const; + Status SetNHWCToNCHWOperandMap(const std::string& nhwc_name, const std::string& nchw_name) ORT_MUST_USE_RESULT; Status SetNCHWToNHWCOperandMap(const std::string& nchw_name, @@ -191,8 +194,6 @@ class ModelBuilder { void GetAllQuantizedOpInputs(); // Go through the underlying graph_viewer, and generate NodeUnits void PreprocessNodeUnits(); - // Get the NodeUnit which contains the given node - const NodeUnit& GetNodeUnit(const Node* node) const; Status SetOperandValue(uint32_t index, Model::NNMemory* memory, size_t size, size_t offset) ORT_MUST_USE_RESULT; @@ -203,7 +204,7 @@ class ModelBuilder { bool is_nhwc, uint32_t& index) ORT_MUST_USE_RESULT; - static const IOpBuilder* GetOpBuilder(const Node& node); + static const IOpBuilder* GetOpBuilder(const NodeUnit& node_unit); }; } // namespace nnapi From 5504358ae33d294c46c0a60c56083dab5ccee835 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 3 Jan 2022 23:34:08 -0800 Subject: [PATCH 08/23] Let HasValidQuantizationZeroPoints handle output def of node_unit --- .../nnapi/nnapi_builtin/builders/helper.cc | 25 ++++++++++--------- .../nnapi/nnapi_builtin/builders/helper.h | 4 +-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index 785eca5753334..af038c08eeffc 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -214,40 +214,41 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const return true; } -bool HasValidQuantizationZeroPoint(const InitializedTensorSet& initializers, const NodeUnit& node_unit, - const std::vector& indices) { +bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::vector& indices, bool is_input) { const auto& op_type = node_unit.OpType(); auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode()); bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv); bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul); - const auto& inputs = node_unit.Inputs(); + const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs(); for (const auto idx : indices) { - if (idx >= inputs.size()) { - LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx - << " >= input number, " << inputs.size(); + if (idx >= io_defs.size()) { + LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, " + << (is_input ? "Input" : "Output") << " index, " << idx + << " >= input number, " << io_defs.size(); return false; } - const auto& input = inputs[idx]; - if (!input.quant_param.has_value()) { + const auto& io_def = io_defs[idx]; + if (!io_def.quant_param.has_value()) { LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx << " has no quant_param"; return false; } // zero point is optional here - if (!input.quant_param->zero_point) + if (!io_def.quant_param->zero_point) return true; - const auto& zero_point_name = input.quant_param->zero_point->Name(); - const auto& weight_tensor = *initializers.at(input.node_arg.Name()); + const auto& zero_point_name = io_def.quant_param->zero_point->Name(); + const auto& weight_tensor = *initializers.at(io_def.node_arg.Name()); if (!Contains(initializers, zero_point_name)) { LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor"; return false; } - bool is_conv_matmul_weight = (is_qlinear_conv || is_qlinear_matmul) && idx == 2; + bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 2; bool is_conv_matmul_u8s8_weight = false; if (is_conv_matmul_weight) { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 215d21857fd55..7d920cfa1a8eb 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -120,8 +120,8 @@ common::Status GetQuantizationScale(const InitializedTensorSet& initializers, co common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; -bool HasValidQuantizationZeroPoint(const InitializedTensorSet& initializers, const NodeUnit& node_unit, - const std::vector& indices); +bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::vector& indices, bool is_input); common::Status GetQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, From 87c1204cd4e8b7ce2ac8b49afd144c1342c9e72e Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Thu, 6 Jan 2022 15:48:34 -0800 Subject: [PATCH 09/23] move op_support_checked to node_unit --- .../nnapi/nnapi_builtin/builders/helper.cc | 153 +++------- .../nnapi/nnapi_builtin/builders/helper.h | 21 +- .../nnapi_builtin/builders/op_builder.cc | 42 --- .../nnapi/nnapi_builtin/builders/op_builder.h | 5 - .../builders/op_support_checker.cc | 271 +++++++++--------- 5 files changed, 179 insertions(+), 313 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index af038c08eeffc..71f5628211655 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -100,13 +100,13 @@ bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type) { qlinear_op_type == QLinearOpType::QLinearAdd; } -bool HasValidUnaryOpQuantizedInputs(const Node& node) { +bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit) { int32_t input_type; - if (!GetType(*node.InputDefs()[0], input_type)) + if (!GetType(node_unit.Inputs()[0].node_arg, input_type)) return false; if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { - LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() + LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] Input type: [" << input_type << "] is not supported for now"; return false; @@ -115,18 +115,18 @@ bool HasValidUnaryOpQuantizedInputs(const Node& node) { return true; } -bool HasValidBinaryOpQuantizedInputs(const Node& node) { - auto op_type = GetQLinearOpType(node); +bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) { + auto op_type = GetQLinearOpType(node_unit.GetNode()); int32_t a_input_type, b_input_type; if (!IsQLinearBinaryOp(op_type)) { - LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() << "] is not a binary qlinear op"; + LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] is not a binary qlinear op"; return false; } - const auto input_defs(node.InputDefs()); - if (!GetType(*input_defs[0], a_input_type)) + const auto& inputs = node_unit.Inputs(); + if (!GetType(inputs[0].node_arg, a_input_type)) return false; - if (!GetType(*input_defs[3], b_input_type)) + if (!GetType(inputs[1].node_arg, b_input_type)) return false; // QlinearConv supports u8u8 or u8s8 @@ -139,7 +139,7 @@ bool HasValidBinaryOpQuantizedInputs(const Node& node) { if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 || (!is_qlinear_conv && a_input_type != b_input_type) || (is_qlinear_conv && !has_valid_qlinear_conv_weight)) { - LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() + LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] A Input type: [" << a_input_type << "] B Input type: [" << b_input_type << "] is not supported for now"; @@ -149,32 +149,41 @@ bool HasValidBinaryOpQuantizedInputs(const Node& node) { return true; } -bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node, - const std::vector& indices, const OpSupportCheckParams& params) { - const auto& op_type = node.OpType(); - auto qlinear_op_type = GetQLinearOpType(node); +bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::vector& indices, const OpSupportCheckParams& params, bool is_input) { + const auto& op_type = node_unit.OpType(); + auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode()); bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv); bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul); - const auto input_defs(node.InputDefs()); + const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs(); for (const auto idx : indices) { - if (idx >= input_defs.size()) { - LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationScales, Input index, " << idx - << " >= input number, " << input_defs.size(); + if (idx >= io_defs.size()) { + LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationScales, " + << (is_input ? "Input" : "Output") << " index, " << idx + << " >= size, " << io_defs.size(); return false; } - const auto scale_name = input_defs[idx]->Name(); + const auto& io_def = io_defs[idx]; + if (!io_def.quant_param.has_value()) { + LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx + << " has no quant_param"; + return false; + } + + const auto scale_name = io_def.quant_param->scale.Name(); + if (!Contains(initializers, scale_name)) { LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be an initializer tensor"; return false; } // If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul) - bool is_conv_matmul_weight = (is_qlinear_conv || is_qlinear_matmul) && idx == 4; + bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 1; bool is_conv_matmul_u8s8_weight = false; if (is_conv_matmul_weight) { - const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name()); + const auto& weight_tensor = *initializers.at(io_def.node_arg.Name()); is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8; } @@ -201,7 +210,7 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const return false; } - const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name()); + const auto& weight_tensor = *initializers.at(io_def.node_arg.Name()); if (weight_tensor.dims()[0] != scales_dim) { LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight," << " weight dimension[0] " << weight_tensor.dims()[0] @@ -226,7 +235,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co if (idx >= io_defs.size()) { LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, " << (is_input ? "Input" : "Output") << " index, " << idx - << " >= input number, " << io_defs.size(); + << " >= size, " << io_defs.size(); return false; } @@ -242,16 +251,16 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co return true; const auto& zero_point_name = io_def.quant_param->zero_point->Name(); - const auto& weight_tensor = *initializers.at(io_def.node_arg.Name()); if (!Contains(initializers, zero_point_name)) { LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor"; return false; } - bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 2; + bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 1; bool is_conv_matmul_u8s8_weight = false; if (is_conv_matmul_weight) { + const auto& weight_tensor = *initializers.at(io_def.node_arg.Name()); is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8; } @@ -285,6 +294,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co // or a tensor with same channel as weight, for NNAPI we only support it be // 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel // quantization is 0 there is no input for it + const auto& weight_tensor = *initializers.at(io_def.node_arg.Name()); if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) { LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight," << " weight dimension[0] " << weight_tensor.dims()[0] @@ -315,94 +325,6 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co return true; } -bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node, - const std::vector& indices) { - const auto& op_type = node.OpType(); - auto qlinear_op_type = GetQLinearOpType(node); - bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv); - bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul); - const auto input_defs(node.InputDefs()); - for (const auto idx : indices) { - if (idx >= input_defs.size()) { - LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx - << " >= input number, " << input_defs.size(); - return false; - } - - const auto zero_point_name = input_defs[idx]->Name(); - if (!Contains(initializers, zero_point_name)) { - LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor"; - return false; - } - - bool is_conv_matmul_weight = is_qlinear_conv && idx == 5; - bool is_conv_matmul_u8s8_weight = false; - if (is_conv_matmul_weight) { - const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name()); - is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8; - } - - const auto& zero_tensor = *initializers.at(zero_point_name); - int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0]; - - if (!is_conv_matmul_u8s8_weight) { - if (zero_dim != 1) { - LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, " - << " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+"; - return false; - } - } else { - // For u8s8 Qlinear[Conv/MatMul], we support - // 1. Per-tensor, the weight will be transformed to uint8 later - // 2. Per-channel, only from Android API level 29 - if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) { - LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only supports int8 zero point for weight, " - << "actual zero point type: [" << zero_tensor.data_type() << "]"; - return false; - } - - if (zero_dim != 1) { - if (is_qlinear_matmul) { - LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization"; - return false; - } - } - - // For onnx, u8s8 QlinearConv, the weight zero point can be a scalar, - // or a tensor with same channel as weight, for NNAPI we only support it be - // 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel - // quantization is 0 there is no input for it - const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name()); - if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) { - LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight," - << " weight dimension[0] " << weight_tensor.dims()[0] - << " zero point dimension " << zero_dim; - return false; - } - - std::vector unpacked_tensor; - auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node.ModelPath(), unpacked_tensor); - if (!status.IsOK()) { - LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name - << ", error msg: " << status.ErrorMessage(); - return false; - } - - // Verify all onnx weight zero point(s) are 0(s) - const int8_t* zero_points = reinterpret_cast(unpacked_tensor.data()); - for (size_t i = 0; i < unpacked_tensor.size(); i++) { - if (zero_points[i] != 0) { - LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only support 0 as zero point, " - << "zero_points[" << i << "] has value: " << zero_points[i]; - return false; - } - } - } - } - - return true; -} - common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, size_t idx, float& scale) { std::vector unpacked_tensor; @@ -439,7 +361,8 @@ common::Status GetQuantizationScaleAndZeroPoint( zero_point = 0; if (!io_def.quant_param) { // Not a quantized IO - return Status::OK(); + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "NodeArg: ", io_def.node_arg.Name(), " is not quantized"); } const auto unpack_tensor = [&model_path](const InitializedTensorSet& initializers, @@ -481,7 +404,7 @@ common::Status GetQuantizationScaleAndZeroPoint( const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs(); for (const auto& io_def : io_defs) { if (io_def.node_arg.Name() == name) - return GetQuantizationScaleAndZeroPoint(initializers, io_def, node_unit.GetNode().ModelPath(), + return GetQuantizationScaleAndZeroPoint(initializers, io_def, node_unit.ModelPath(), scale, zero_point); } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 7d920cfa1a8eb..35f24dda439fb 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -103,23 +103,22 @@ ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& // Such as QLinearConv, QLinearMatMul, QLinearAdd, ... bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type); -// Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool] -bool HasValidUnaryOpQuantizedInputs(const Node& node); -// Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add] -bool HasValidBinaryOpQuantizedInputs(const Node& node); -// Check if a qlinear op has valid scales for given indices -bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node, - const std::vector& indices, const OpSupportCheckParams& params); -// Check if a qlinear op has valid zero points for given indices -bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node, - const std::vector& indices); - common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, size_t idx, float& scale); common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; +// Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool] +bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit); +// Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add] +bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit); + +// Check if a qlinear op has valid scales for given indices +bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::vector& indices, const OpSupportCheckParams& params, bool is_input); + +// Check if a qlinear op has valid zero points for given indices bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::vector& indices, bool is_input); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index c4e1f103a0212..bc9fa92f30e06 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -670,48 +670,6 @@ static void AddInputToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_ AddQuantizationScaleAndZeroPointToSkip(model_builder, io_def); } -Status GetQuantizedInputScaleAndZeroPoint(const InitializedTensorSet& initializers, - const NodeUnit& node_unit, - const std::string& input_name, - float& scale, - int32_t& zero_point) { - const auto& node = node_unit.GetNode(); - const auto& op_type = node.OpType(); - auto qlinear_op_type = GetQLinearOpType(node); - assert(qlinear_op_type != QLinearOpType::Unknown && - qlinear_op_type != QLinearOpType::QuantizeLinear); - - size_t scale_idx, zero_point_idx; - if (qlinear_op_type == QLinearOpType::DequantizeLinear || - qlinear_op_type == QLinearOpType::QLinearSigmoid || - qlinear_op_type == QLinearOpType::QLinearAveragePool) { - scale_idx = 1; - zero_point_idx = 2; - } else if (IsQLinearBinaryOp(qlinear_op_type)) { - const auto input_defs(node.InputDefs()); - if (input_name == input_defs[0]->Name()) { - scale_idx = 1; - zero_point_idx = 2; - } else if (input_name == input_defs[3]->Name()) { - scale_idx = 4; - zero_point_idx = 5; - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Unknown input: ", input_name, ", for op: ", op_type); - } - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported op: ", op_type); - } - - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, scale_idx, scale)); - zero_point = 0; - if (node.InputDefs().size() > zero_point_idx) { - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, zero_point_idx, zero_point)); - } - - return Status::OK(); -} - template void CreateSharedOpBuilderImpl(const std::string& op_type, OpBuilderRegistrations& op_registrations, diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h index 46acbc4eff4b9..f25a6591040bc 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h @@ -43,10 +43,5 @@ const std::unordered_map& GetOpBuilders(); common::Status TransposeNHWCToNCHW(ModelBuilder& model_builder, const std::string& input, const std::string& output) ORT_MUST_USE_RESULT; -// Get the quantized input's scale and zero point for the given input -common::Status GetQuantizedInputScaleAndZeroPoint(const InitializedTensorSet& initializers, - const NodeUnit& node_unit, const std::string& input_name, - float& scale, int32_t& zero_point) ORT_MUST_USE_RESULT; - } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 331b2d2e9e745..262440859843c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -22,19 +22,37 @@ struct OpSupportCheckerRegistrations { std::unordered_map op_support_checker_map; }; -bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node) { - for (const auto* node_arg : node.InputDefs()) { - const auto& input_name(node_arg->Name()); - if (!Contains(initializers, input_name)) - continue; - - const auto& tensor = *initializers.at(input_name); - if (tensor.has_data_location() && - tensor.data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL) { - LOGS_DEFAULT(VERBOSE) << "Initializer [" << input_name - << "] with external data location are not currently supported"; +bool HasExternalInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit) { + const auto is_ext_initializer = + [&](const NodeArg& node_arg) { + const auto& input_name(node_arg.Name()); + if (!Contains(initializers, input_name)) + return false; + + const auto& tensor = *initializers.at(input_name); + if (tensor.has_data_location() && + tensor.data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL) { + LOGS_DEFAULT(VERBOSE) << "Initializer [" << input_name + << "] with external data location are not currently supported"; + return true; + } + + return false; + }; + + const auto& inputs = node_unit.Inputs(); + for (const auto& input : inputs) { + if (is_ext_initializer(input.node_arg)) + return true; + + if (!input.quant_param) + return false; + + if (is_ext_initializer(input.quant_param->scale)) + return true; + + if (input.quant_param->zero_point && is_ext_initializer(*input.quant_param->zero_point)) return true; - } } return false; @@ -115,10 +133,8 @@ bool BaseOpSupportChecker::IsOpSupported(const InitializedTensorSet& initializer if (!HasSupportedInputs(node_unit)) return false; - const auto& node = node_unit.GetNode(); - // We do not support external initializers for now - if (HasExternalInitializer(initializers, node)) + if (HasExternalInitializer(initializers, node_unit)) return false; if (!HasSupportedOpSet(node_unit)) @@ -244,30 +260,25 @@ int BinaryOpSupportChecker::GetMinSupportedOpSet(const NodeUnit& node_unit) cons } bool BinaryOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const { - // TODO, change to use node unit and quant_param of IODef - const auto& node = node_unit.GetNode(); - bool is_qlinear_add = node.OpType() == "QLinearAdd"; - bool is_pow = node.OpType() == "Pow"; + bool is_qlinear_add = node_unit.OpType() == "QLinearAdd"; + bool is_pow = node_unit.OpType() == "Pow"; if (!is_qlinear_add && !is_pow) return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit); if (is_qlinear_add) { // QLinearAdd - if (!HasValidBinaryOpQuantizedInputs(node)) + if (!HasValidBinaryOpQuantizedInputs(node_unit)) return false; } // Pow we only support both input as fp32 now if (is_pow) { - const auto& input1 = *node.InputDefs()[0]; - const auto& input2 = *node.InputDefs()[1]; - int32_t input_type_1; - if (!GetType(input1, input_type_1)) + if (!GetType(node_unit.Inputs()[0].node_arg, input_type_1)) return false; int32_t input_type_2; - if (!GetType(input2, input_type_2)) + if (!GetType(node_unit.Inputs()[1].node_arg, input_type_2)) return false; if (input_type_1 != ONNX_NAMESPACE::TensorProto_DataType_FLOAT || input_type_1 != input_type_2) { @@ -283,24 +294,18 @@ bool BinaryOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) c bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - - const auto& op_type(node.OpType()); - const auto input_defs(node.InputDefs()); + const auto& op_type(node_unit.OpType()); + const auto& inputs = node_unit.Inputs(); bool op_is_qlinear = op_type == "QLinearAdd"; - size_t a_idx = 0, b_idx = 1; - if (op_is_qlinear) { - b_idx = 3; - } Shape input1_shape, input2_shape; - if (!GetShape(*input_defs[a_idx], input1_shape) || - !GetShape(*input_defs[b_idx], input2_shape)) + if (!GetShape(inputs[0].node_arg, input1_shape) || + !GetShape(inputs[1].node_arg, input2_shape)) return false; const auto input1_size = input1_shape.size(); const auto input2_size = input2_shape.size(); if (input1_size > 4 || input2_size > 4) { - LOGS_DEFAULT(VERBOSE) << node.OpType() << " only support up to 4d shape, input1 is " + LOGS_DEFAULT(VERBOSE) << op_type << " only support up to 4d shape, input1 is " << input1_size << "d shape, input 2 is " << input2_size << "d shape"; return false; @@ -309,7 +314,7 @@ bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi if (op_is_qlinear) { // For QLinearAdd, we only support uint8 output now int32_t output_type; - if (!GetType(*node.OutputDefs()[0], output_type)) + if (!GetType(inputs[0].node_arg, output_type)) return false; if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { @@ -319,13 +324,16 @@ bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi return false; } - // All scale/zero points are initializer scalars - // a/b/y_scale - if (!HasValidQuantizationScales(initializers, node, {1, 4, 6}, params)) + // Check input scales and ZPs + if (!HasValidQuantizationScales(initializers, node_unit, {0, 1}, params, true /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0, 1}, true /* is_input */)) return false; - // a/b/y_zero_point - if (!HasValidQuantizationZeroPoints(initializers, node, {2, 5, 7})) + // Check output scale and ZP + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */)) return false; } @@ -351,9 +359,8 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker { bool TransposeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); @@ -397,15 +404,15 @@ class ReshapeOpSupportChecker : public BaseOpSupportChecker { bool ReshapeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); - const auto& perm_name = node.InputDefs()[1]->Name(); + const auto& inputs = node_unit.Inputs(); + const auto& perm_name = inputs[1].node_arg.Name(); if (!Contains(initializers, perm_name)) { LOGS_DEFAULT(VERBOSE) << "New shape of reshape must be known"; return false; } Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(inputs[0].node_arg, input_shape)) return false; if (input_shape.size() > 4 || input_shape.empty()) { @@ -424,7 +431,7 @@ bool ReshapeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init const int64_t* raw_perm = reinterpret_cast(unpacked_tensor.data()); const auto perm_size = SafeInt(perm_tensor.dims()[0]); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit.GetNode()); const bool allow_zero = helper.Get("allowzero ", 0) == 1; for (uint32_t i = 0; i < perm_size; i++) { // NNAPI reshape does not support 0 as dimension @@ -610,15 +617,18 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial // the output zero point can be optional bool has_output_zp = input_defs.size() == 5; - if (!HasValidQuantizationScales(initializers, node, {1, 3}, params)) + // Check input scales and ZPs + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */)) return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */)) + return false; + + // Check output scale and ZP - if (!HasValidQuantizationZeroPoints(initializers, node, - has_output_zp - ? std::vector{2} - : std::vector{2, 4})) { + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */)) return false; - } // NNAPI requires Quantized Average Pool has same scale and zero point for both input and output float input_scale = 0.0f; @@ -678,26 +688,24 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial } bool PoolOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const { - // TODO, change to use node unit and quant_param of IODef - const auto& node = node_unit.GetNode(); - bool is_max_pool = node.OpType() == "MaxPool"; - bool is_qlinear_average_pool = node.OpType() == "QLinearAveragePool"; + bool is_max_pool = node_unit.OpType() == "MaxPool"; + bool is_qlinear_average_pool = node_unit.OpType() == "QLinearAveragePool"; if (!is_max_pool && !is_qlinear_average_pool) return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit); if (is_qlinear_average_pool) { - return HasValidUnaryOpQuantizedInputs(node); + return HasValidUnaryOpQuantizedInputs(node_unit); } // is_max_pool // For max pool, we can support both float and uint8 input int32_t input_type; - if (!GetType(*node.InputDefs()[0], input_type)) + if (!GetType(node_unit.Inputs()[0].node_arg, input_type)) return false; if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT && input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { - LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() + LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] Input type: [" << input_type << "] is not supported for now"; return false; @@ -738,13 +746,11 @@ class ConvOpSupportChecker : public BaseOpSupportChecker { } bool ConvOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const { - // TODO, change to use node unit and quant_param of IODef - const auto& node = node_unit.GetNode(); - if (node.OpType() != "QLinearConv") + if (node_unit.OpType() != "QLinearConv") return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit); // QLinearConv only supports input of uint8 for now - if (!HasValidBinaryOpQuantizedInputs(node)) + if (!HasValidBinaryOpQuantizedInputs(node_unit)) return false; return true; @@ -810,12 +816,16 @@ bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial return false; } - // a/b/y_scale - if (!HasValidQuantizationScales(initializers, node, {1, 4, 6}, params)) + // Check input scales and ZPs + if (!HasValidQuantizationScales(initializers, node_unit, {0, 1}, params, true /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0, 1}, true /* is_input */)) return false; - // a/b/y_zero_point - if (!HasValidQuantizationZeroPoints(initializers, node, {2, 5, 7})) + // Check output scale and ZP + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */)) return false; } @@ -914,13 +924,11 @@ class GemmOpSupportChecker : public BaseOpSupportChecker { }; bool GemmOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const { - // TODO, change to use node unit and quant_param of IODef - const auto& node = node_unit.GetNode(); - if (node.OpType() != "QLinearMatMul") + if (node_unit.OpType() != "QLinearMatMul") return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit); // QLinearMatMul - if (!HasValidBinaryOpQuantizedInputs(node)) + if (!HasValidBinaryOpQuantizedInputs(node_unit)) return false; return true; @@ -1078,12 +1086,16 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial } // All scale/zero points are initializer scalars - // a/b/y_scale - if (!HasValidQuantizationScales(initializers, node, {1, 4, 6}, params)) + // Check input scales and ZPs + if (!HasValidQuantizationScales(initializers, node_unit, {0, 1}, params, true /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0, 1}, true /* is_input */)) return false; - // a/b/y_zero_point - if (!HasValidQuantizationZeroPoints(initializers, node, {2, 5, 7})) + // Check output scale and ZP + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */)) return false; } } else { @@ -1113,7 +1125,7 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker { int GetMinSupportedOpSet(const NodeUnit& node_unit) const override; - static bool IsQuantizedOpSupported(const InitializedTensorSet& initializers, const Node& node, + static bool IsQuantizedOpSupported(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params); }; @@ -1137,9 +1149,8 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker { bool UnaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - if (node.OpType() == "QLinearSigmoid") - return IsQuantizedOpSupported(initializers, node, params); + if (node_unit.OpType() == "QLinearSigmoid") + return IsQuantizedOpSupported(initializers, node_unit, params); else // Everything except "QLinearSigmoid" are by default supported return true; } @@ -1160,13 +1171,11 @@ int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const NodeUnit& } bool UnaryOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const { - // TODO, change to use node unit and quant_param of IODef - const auto& node = node_unit.GetNode(); // We only need to override input check for QLinearSigmoid - if (node.OpType() != "QLinearSigmoid") + if (node_unit.OpType() != "QLinearSigmoid") return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit); - return HasValidUnaryOpQuantizedInputs(node); + return HasValidUnaryOpQuantizedInputs(node_unit); } // All ops except "Sin" opset 5- uses consumed_inputs attribute which is not supported for now @@ -1180,35 +1189,35 @@ int UnaryOpSupportChecker::GetMinSupportedOpSet(const NodeUnit& node_unit) const } /* static */ bool UnaryOpSupportChecker::IsQuantizedOpSupported( - const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) { - const auto& op_type = node.OpType(); + const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) { + const auto& op_type = node_unit.OpType(); ORT_ENFORCE(op_type == "QLinearSigmoid"); - const auto& op_name = node.Name(); - const auto input_defs(node.InputDefs()); - // const auto output_defs(node.OutputDefs()); + const auto& op_name = node_unit.Name(); - if (input_defs.size() < 4) + // Check input scales and ZPs + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */)) return false; - - bool has_output_zp = input_defs.size() == 5; - - if (!HasValidQuantizationScales(initializers, node, {1, 3}, params)) + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */)) return false; - if (!HasValidQuantizationZeroPoints(initializers, node, - has_output_zp - ? std::vector{2} - : std::vector{2, 4})) + // Check output scale and ZP + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */)) return false; + return false; + // NNAPI requires the scale be 1.f/256 and zero point to be 0 // See https://android.googlesource.com/platform/frameworks/ml/+/refs/heads/android10-c2f2-release/nn/common/operations/Activation.cpp#180 float output_scale = 0.0f; - auto status = GetQuantizationScale(initializers, node, 3, output_scale); + int32_t output_zp = 0; + auto status = GetQuantizationScaleAndZeroPoint(initializers, node_unit.Outputs()[0], node_unit.ModelPath(), + output_scale, output_zp); if (!status.IsOK()) { LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name - << "] GetQuantizationScale failed, message: " << status.ErrorMessage(); + << "] GetQuantizationScaleAndZeroPoint failed, message: " << status.ErrorMessage(); return false; } @@ -1218,20 +1227,10 @@ int UnaryOpSupportChecker::GetMinSupportedOpSet(const NodeUnit& node_unit) const return false; } - int32_t output_zp; - if (has_output_zp) { - status = GetQuantizationZeroPoint(initializers, node, 4, output_zp); - if (!status.IsOK()) { - LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name - << "] GetQuantizationZeroPoint failed, message: " << status.ErrorMessage(); - return false; - } - - if (output_zp != 0) { - LOGS_DEFAULT(VERBOSE) << "Op [" << op_type << "] name [" << op_name - << "] output zero point can only be 0, actual zero point: " << output_scale; - return false; - } + if (output_zp != 0) { + LOGS_DEFAULT(VERBOSE) << "Op [" << op_type << "] name [" << op_name + << "] output zero point can only be 0, actual zero point: " << output_scale; + return false; } return true; @@ -1299,12 +1298,12 @@ class SqueezeOpSupportChecker : public BaseOpSupportChecker { bool SqueezeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); + const auto& inputs = node_unit.Inputs(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(inputs[0].node_arg, input_shape)) return false; - const auto input_size = input_shape.size(); + const auto input_size = inputs.size(); if (input_size > 4 || input_size == 0) { LOGS_DEFAULT(VERBOSE) << "Squeeze only supports 1-4d shape, input is " << input_size << "d shape"; @@ -1312,8 +1311,8 @@ bool SqueezeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init } // Squeeze opset 13 use input 1 as axes, if we have input 1 then it need to be an initializer - if (node.SinceVersion() > 12 && node.InputDefs().size() > 1) { - const auto& axes_name = node.InputDefs()[1]->Name(); + if (node_unit.SinceVersion() > 12 && input_size > 1) { + const auto& axes_name = inputs[1].node_arg.Name(); if (!Contains(initializers, axes_name)) { LOGS_DEFAULT(VERBOSE) << "Input axes of Squeeze must be known"; return false; @@ -1340,28 +1339,23 @@ class QuantizeLinearOpSupportChecker : public BaseOpSupportChecker { bool QuantizeLinearOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - const auto input_defs(node.InputDefs()); - const auto output_defs(node.OutputDefs()); - int32_t output_type; - if (!GetType(*output_defs[0], output_type)) + if (!GetType(node_unit.Outputs()[0].node_arg, output_type)) return false; if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { - LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() + LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] output type: [" << output_type << "] is not supported for now"; return false; } - if (!HasValidQuantizationScales(initializers, node, {1}, params)) + // For QuantizeLinear only output is quantized + // Check output scale and ZP + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */)) return false; - - if (input_defs.size() == 3) { // has zero_point input - if (!HasValidQuantizationZeroPoints(initializers, node, {2})) - return false; - } return true; } @@ -1384,15 +1378,12 @@ class DequantizeLinearOpSupportChecker : public BaseOpSupportChecker { bool DequantizeLinearOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - const auto input_defs(node.InputDefs()); - if (!HasValidQuantizationScales(initializers, node, {1}, params)) + // For DequantizeLinear only input is quantized + // Check input scale and ZP + if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */)) + return false; + if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */)) return false; - - if (input_defs.size() == 3) { // has zero_point input - if (!HasValidQuantizationZeroPoints(initializers, node, {2})) - return false; - } return true; } From 7ee1ecb7474b6fa9dad9444d7d94f0a2a0dc9976 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Thu, 6 Jan 2022 23:46:18 -0800 Subject: [PATCH 10/23] minor update --- .../providers/nnapi/nnapi_builtin/builders/helper.h | 12 ++++++------ .../nnapi/nnapi_builtin/builders/op_builder.cc | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 35f24dda439fb..8a87f119c72cf 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -103,12 +103,6 @@ ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& // Such as QLinearConv, QLinearMatMul, QLinearAdd, ... bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type); -common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, - size_t idx, float& scale); - -common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, - const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; - // Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool] bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit); // Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add] @@ -122,6 +116,12 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::vector& indices, bool is_input); +common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, + size_t idx, float& scale); + +common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, + const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; + common::Status GetQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, float& scale, int32_t& zero_point); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index bc9fa92f30e06..a1ae271f7d066 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -664,6 +664,8 @@ static void AddQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, } } +// Ignore the input (with quantization scale and ZP if available) +// The input (usually weight) is already embedded in the NNAPI model static void AddInputToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { model_builder.AddInitializerToSkip(io_def.node_arg.Name()); // main input if (io_def.quant_param) From 221776b9a0082832b37426c55b6dcee3e61a843c Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 7 Jan 2022 12:42:28 -0800 Subject: [PATCH 11/23] add activation handleing for node_unit --- .../nnapi_builtin/builders/model_builder.cc | 99 ++++++++++++++++--- .../nnapi_builtin/builders/model_builder.h | 9 +- .../providers/shared/node_unit/node_unit.cc | 1 + .../providers/shared/node_unit/node_unit.h | 7 +- 4 files changed, 96 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index f0c209ddf8d25..a96c67960b7d3 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -56,6 +56,7 @@ Status ModelBuilder::Prepare() { GetAllQuantizedOpInputs(); PreprocessInitializers(); PreprocessActivations(); + PreprocessActivations_nu(); ORT_RETURN_IF_ERROR(RegisterInitializers()); ORT_RETURN_IF_ERROR(RegisterModelInputs()); ORT_RETURN_IF_ERROR(AddOperations()); @@ -115,12 +116,9 @@ Status ModelBuilder::GetTargetDevices() { } void ModelBuilder::PreprocessInitializers() { - const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); - for (size_t i = 0; i < node_indices.size(); i++) { - const auto* node(graph_viewer_.GetNode(node_indices[i])); - const auto& node_unit = GetNodeUnit(node); - if (const auto* op_builder = GetOpBuilder(node_unit)) { - op_builder->AddInitializersToSkip(*this, node_unit); + for (const auto& node_unit : node_unit_holder_) { + if (const auto* op_builder = GetOpBuilder(*node_unit)) { + op_builder->AddInitializersToSkip(*this, *node_unit); } } } @@ -147,6 +145,26 @@ void ModelBuilder::PreprocessActivations() { } } +void ModelBuilder::PreprocessActivations_nu() { + for (const auto& node_unit : node_unit_holder_) { + const auto& node = node_unit->GetNode(); + const auto& op_type(node.OpType()); + if (op_type == "Relu") { + activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU); + } else if (op_type == "Clip") { // Relu1 or Relu6 + float min, max; + if (!GetClipMinMax(GetInitializerTensors(), node, min, max, logging::LoggingManager::DefaultLogger())) + continue; + + if (min == -1.0f && max == 1.0f) { + activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU1); + } else if (min == 0.0f && max == 6.0f) { + activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU6); + } + } + } +} + const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const { // Do we want to throw here if the node is not in the map? return *node_unit_map_.at(node); @@ -167,16 +185,11 @@ void ModelBuilder::PreprocessNodeUnits() { // Help to get all quantized operators' input and the NodeUnit(s) using the input void ModelBuilder::GetAllQuantizedOpInputs() { - const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); - for (const auto& node_idx : node_indices) { - const auto* node(graph_viewer_.GetNode(node_idx)); - // TODO check if the node_unit has already been processed - const auto& node_unit = GetNodeUnit(node); - + for (const auto& node_unit : node_unit_holder_) { // TODO, hookup getting quantized inputs with QDQ NodeUnits and remove the ORT_ENFORCE - ORT_ENFORCE(node_unit.UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented"); + ORT_ENFORCE(node_unit->UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented"); - auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode()); + auto qlinear_op_type = GetQLinearOpType(node_unit->GetNode()); // Not a qlinear op if (qlinear_op_type == QLinearOpType::Unknown) @@ -193,11 +206,11 @@ void ModelBuilder::GetAllQuantizedOpInputs() { // All qlinear ops EXCEPT QuantizeLinear has quantized input if (qlinear_op_type != QLinearOpType::QuantizeLinear) { - add_quantized_input(node_unit, 0); + add_quantized_input(*node_unit, 0); } if (IsQLinearBinaryOp(qlinear_op_type)) { - add_quantized_input(node_unit, 1); + add_quantized_input(*node_unit, 1); } // TODO, add handling for varidiac nodes such as QLinearConcat @@ -511,15 +524,23 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer( Status ModelBuilder::AddOperations() { const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); + std::unordered_set processed_node_units; for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); const NodeUnit& node_unit = GetNodeUnit(node); + + // Since a NodeUnit may contain multiple nodes, avoid processing the same NodeUnit multiple times + if (Contains(processed_node_units, &node_unit)) + continue; + if (const auto* op_builder = GetOpBuilder(node_unit)) { ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, node_unit)); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Node [", node_unit.Name(), "], type [", node_unit.OpType(), "] is not supported"); } + + processed_node_units.insert(&node_unit); } return Status::OK(); @@ -625,6 +646,52 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { return Status::OK(); } +int32_t ModelBuilder::FindActivation_nu(const NodeUnit& node_unit, const NodeArg& output) { + (void)node_unit; + int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; + if (node_unit.GetOutputNodes().size() != 1) + return fuse_code; + + const auto& output_node = *node_unit.GetOutputNodes()[0]; + + // TODO, add support of activation fusion for quantized node group (qdq or qlinear) + // We do not support activation fusion for quantized operators for now + auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode()); + if (qlinear_op_type != QLinearOpType::Unknown) + return fuse_code; + + for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) { + const auto& dst_node = it->GetNode(); + const auto* dst_input = dst_node.InputDefs()[it->GetDstArgIndex()]; + const auto& dst_node_unit = GetNodeUnit(&dst_node); + if (Contains(activation_node_units_, &dst_node_unit)) { + if (&output == dst_input) { + fuse_code = activation_node_units_.at(&dst_node_unit); + } + } else { + // if there is any other non-relu node using the output + // will add relu separately + if (&output == dst_input) + return ANEURALNETWORKS_FUSED_NONE; + } + } + + // if output is a graph output, will add activation separately + if (fuse_code != ANEURALNETWORKS_FUSED_NONE) { + const auto& graph_outputs = graph_viewer_.GetOutputs(); + if (std::find(graph_outputs.cbegin(), graph_outputs.cend(), &output) != graph_outputs.cend()) { + return ANEURALNETWORKS_FUSED_NONE; + } + + LOGS_DEFAULT(VERBOSE) << "Node [" << node_unit.Name() << "] type [" << node_unit.OpType() + << "], fused the output [" << output.Name() << "]"; + + fused_activations_.insert(output.Name()); + } + + return fuse_code; +} + int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) { int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 9b7dd0a289aad..07b8035d96f5a 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -49,6 +49,7 @@ class ModelBuilder { // Find if an output has a fuseable activation (Relu) int32_t FindActivation(const Node& node, const NodeArg& output); + int32_t FindActivation_nu(const NodeUnit& node_unit, const NodeArg& output); // Add an NNAPI scalar operand Status AddOperandFromScalar(bool value, uint32_t& index) ORT_MUST_USE_RESULT; @@ -142,6 +143,9 @@ class ModelBuilder { // All activation nodes (Relu, Relu1, Relu6) as a map std::unordered_map activation_nodes_; + // All activation nodes (Relu, Relu1, Relu6) as a map + std::unordered_map activation_node_units_; + std::unordered_map> op_support_checkers_; // Operands in nhwc @@ -182,6 +186,7 @@ class ModelBuilder { void PreprocessInitializers(); // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later void PreprocessActivations(); + void PreprocessActivations_nu(); // Copy and process all the initializers to NNAPI model Status RegisterInitializers() ORT_MUST_USE_RESULT; Status RegisterModelInputs() ORT_MUST_USE_RESULT; @@ -192,7 +197,9 @@ class ModelBuilder { // Get all quantized inputs in the underlying graph_viewer void GetAllQuantizedOpInputs(); - // Go through the underlying graph_viewer, and generate NodeUnits + + // Go through the underlying graph_viewer, and generate NodeUnits, Many initializing functions are + // using the result of PreprocessNodeUnits, this need to run early in the Prepare() void PreprocessNodeUnits(); Status SetOperandValue(uint32_t index, Model::NNMemory* memory, diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.cc b/onnxruntime/core/providers/shared/node_unit/node_unit.cc index 3d761dfbe971f..3c5a829a4d497 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.cc +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.cc @@ -83,6 +83,7 @@ bool IsVariadicQLinearOp(QLinearOpType type) { NodeUnit::NodeUnit(const Node& node) : nodes_{&node}, + output_nodes_{&node}, node_(node), type_(Type::SingleNode) { InitForNode(); diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.h b/onnxruntime/core/providers/shared/node_unit/node_unit.h index 1d02854c327b4..73fcca032a013 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.h +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.h @@ -64,15 +64,16 @@ class NodeUnit { ProviderType GetExecutionProviderType() const noexcept; const Node& GetNode() const noexcept { return node_; } - + const std::vector GetOutputNodes() const noexcept { return output_nodes_; } const std::vector GetAllNodes() const noexcept { return nodes_; } private: std::vector inputs_; std::vector outputs_; - const std::vector nodes_; // all nodes in this NodeUnit - const Node& node_; // target Node + const std::vector nodes_; // all nodes in this NodeUnit + const std::vector output_nodes_; // all the nodes producing outputs for this NodeUnit + const Node& node_; // target Node Type type_; void InitForNode(); // Initializing for single Node From 35292cb58e16e841b3126eb4b35b69bab483ea4c Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 7 Jan 2022 16:03:43 -0800 Subject: [PATCH 12/23] add bin and relu support --- .../nnapi_builtin/builders/op_builder.cc | 104 ++++++++++++++---- 1 file changed, 84 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index a1ae271f7d066..88a2e2824d70c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -126,6 +126,28 @@ Status TransposeNCHWToNHWC(ModelBuilder& model_builder, return TransposeBetweenNCHWAndNHWC(model_builder, input, output, true /* nchw_to_nhwc */); } +// Convert the input from nchw to nhwc +// Caller should ensure input is currently in nchw format using ModelBuilder::IsOperandNHWC +Status GetNHWCInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_index, std::string& nhwc_input) { + const auto& nchw_input = node_unit.Inputs()[input_index].node_arg.Name(); + if (!model_builder.GetNHWCOperand(nchw_input, nhwc_input)) { + nhwc_input = model_builder.GetUniqueName(nchw_input + "_nchw_to_nhwc"); + ORT_RETURN_IF_ERROR(TransposeNCHWToNHWC(model_builder, nchw_input, nhwc_input)); + } + return Status::OK(); +} + +// Convert the input from nhwc to nchw +// Caller should ensure input is currently in nhwc format using ModelBuilder::IsOperandNHWC +Status GetNCHWInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_index, std::string& nchw_input) { + const auto& nhwc_input = node_unit.Inputs()[input_index].node_arg.Name(); + if (!model_builder.GetNCHWOperand(nhwc_input, nchw_input)) { + nchw_input = model_builder.GetUniqueName(nhwc_input + "_nhwc_to_nchw"); + ORT_RETURN_IF_ERROR(TransposeNHWCToNCHW(model_builder, nhwc_input, nchw_input)); + } + return Status::OK(); +} + // Convert the input from nchw to nhwc // Caller should ensure input is currently in nchw format using ModelBuilder::IsOperandNHWC Status GetNHWCInput(ModelBuilder& model_builder, const Node& node, size_t input_index, std::string& input) { @@ -148,6 +170,33 @@ Status GetNCHWInput(ModelBuilder& model_builder, const Node& node, size_t input_ return Status::OK(); } +// Transpose layouts if necessary for element wise operators with 2 inputs +// and return the layout type of output tensor +// If both inputs have same layout, the output will have the same layout +// Otherwise we will need transpose the nhwc input back to nchw, and output will be nchw +Status TransposeBinaryOpInputLayout_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, + std::string& input1, std::string& input2, + bool& output_is_nhwc) ORT_MUST_USE_RESULT; +Status TransposeBinaryOpInputLayout_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, + std::string& input1, std::string& input2, + bool& output_is_nhwc) { + bool input1_is_nhwc = model_builder.IsOperandNHWC(input1); + bool input2_is_nhwc = model_builder.IsOperandNHWC(input2); + output_is_nhwc = false; + + if (input1_is_nhwc == input2_is_nhwc) { + output_is_nhwc = input1_is_nhwc; + } else if (input1_is_nhwc) { + // need transpose input1 back to nchw + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input1)); + } else { // input2_is_nhwc + // need transpose input2 back to nchw + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 1, input2)); + } + + return Status::OK(); +} + // Transpose layouts if necessary for element wise operators with 2 inputs // and return the layout type of output tensor // If both inputs have same layout, the output will have the same layout @@ -510,6 +559,28 @@ static Status HandleAutoPad(const Shape& input_shape, return Status::OK(); } +// Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output) +// QLinearConv, QLinearMatmul, QLinearAdd +// a, b are inputs, and y is output +static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( + const ModelBuilder& model_builder, const NodeUnit& node_unit, + float& a_scale, float& b_scale, float& y_scale, + int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) ORT_MUST_USE_RESULT; +static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( + const ModelBuilder& model_builder, const NodeUnit& node_unit, + float& a_scale, float& b_scale, float& y_scale, + int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) { + const auto& initializers = model_builder.GetInitializerTensors(); + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Inputs()[0], node_unit.ModelPath(), a_scale, a_zero_point)); + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Inputs()[1], node_unit.ModelPath(), b_scale, b_zero_point)); + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Outputs()[0], node_unit.ModelPath(), y_scale, y_zero_point)); + + return Status::OK(); +} + // Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output) // QLinearConv, QLinearMatmul, QLinearAdd // a, b are inputs, and y is output @@ -758,9 +829,8 @@ void BinaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const N } Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto& op_type(node.OpType()); - const auto input_defs(node.InputDefs()); + const auto& op_type(node_unit.OpType()); + const auto& inputs = node_unit.Inputs(); int32_t op_code; bool add_activation = true; @@ -780,18 +850,13 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "UnaryOpBuilder, unknown op: ", op_type); } - size_t a_idx = 0, b_idx = 1; - if (op_is_qlinear) { - b_idx = 3; - } - - std::string input1 = input_defs[a_idx]->Name(); - std::string input2 = input_defs[b_idx]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + std::string input1 = inputs[0].node_arg.Name(); + std::string input2 = inputs[1].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = false; ORT_RETURN_IF_ERROR( - TransposeBinaryOpInputLayout(model_builder, node, a_idx, b_idx, input1, input2, output_is_nhwc)); + TransposeBinaryOpInputLayout_nu(model_builder, node_unit, input1, input2, output_is_nhwc)); float a_scale = 0.0f, b_scale = 0.0f, @@ -801,9 +866,9 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const y_zero_point = 0; if (op_is_qlinear) { - ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint(model_builder, node, - a_scale, b_scale, y_scale, - a_zero_point, b_zero_point, y_zero_point)); + ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint_nu(model_builder, node_unit, + a_scale, b_scale, y_scale, + a_zero_point, b_zero_point, y_zero_point)); } // Verify if the scale and zero point matchs from onnx input and nnapi input match @@ -814,7 +879,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; if (add_activation) { - fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); + fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); } return AddBinaryOperator(op_code, model_builder, @@ -833,20 +898,19 @@ class ReluOpBuilder : public BaseOpBuilder { }; Status ReluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); ORT_RETURN_IF_ERROR(shaper.Identity(input, output)); const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); // skip this relu if it is some op's fuse output if (Contains(model_builder.GetFusedActivations(), input)) { - LOGS_DEFAULT(VERBOSE) << "Relu Node [" << node.Name() << "] fused"; + LOGS_DEFAULT(VERBOSE) << "Relu Node [" << node_unit.Name() << "] fused"; model_builder.RegisterOperand(output, operand_indices.at(input), output_operand_type, output_is_nhwc); } else { std::vector input_indices; From 21d055558db5b460d664f5ce47a41998140699c6 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Sat, 8 Jan 2022 22:28:50 -0800 Subject: [PATCH 13/23] move more ops to node_unit --- .../nnapi_builtin/builders/op_builder.cc | 45 +++++++++---------- .../core/providers/shared/utils/utils.cc | 4 ++ .../core/providers/shared/utils/utils.h | 8 +++- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 88a2e2824d70c..141319bb2be3f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -563,14 +563,13 @@ static Status HandleAutoPad(const Shape& input_shape, // QLinearConv, QLinearMatmul, QLinearAdd // a, b are inputs, and y is output static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( - const ModelBuilder& model_builder, const NodeUnit& node_unit, + const InitializedTensorSet& initializers, const NodeUnit& node_unit, float& a_scale, float& b_scale, float& y_scale, int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) ORT_MUST_USE_RESULT; static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( - const ModelBuilder& model_builder, const NodeUnit& node_unit, + const InitializedTensorSet& initializers, const NodeUnit& node_unit, float& a_scale, float& b_scale, float& y_scale, int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) { - const auto& initializers = model_builder.GetInitializerTensors(); ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( initializers, node_unit.Inputs()[0], node_unit.ModelPath(), a_scale, a_zero_point)); ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( @@ -866,9 +865,10 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const y_zero_point = 0; if (op_is_qlinear) { - ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint_nu(model_builder, node_unit, - a_scale, b_scale, y_scale, - a_zero_point, b_zero_point, y_zero_point)); + ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint_nu( + model_builder.GetInitializerTensors(), node_unit, + a_scale, b_scale, y_scale, + a_zero_point, b_zero_point, y_zero_point)); } // Verify if the scale and zero point matchs from onnx input and nnapi input match @@ -932,12 +932,11 @@ class TransposeOpBuilder : public BaseOpBuilder { }; Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); - auto input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); - NodeAttrHelper helper(node); + auto input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); + NodeAttrHelper helper(node_unit); std::vector perm = helper.Get("perm", std::vector()); auto input_dims = shaper[input].size(); if (perm.empty()) { @@ -956,7 +955,7 @@ Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co perm[i] = axis_nchw_to_nhwc[perm[i]]; } - std::string perm_name = model_builder.GetUniqueName(node.Name() + input + "perm"); + std::string perm_name = model_builder.GetUniqueName(node_unit.Name() + input + "perm"); // It is possible this onnx transpose operator can be nchw->nhwc, but so far I don't see // any scenario will do this since onnx is nchw only, assume the output is always not nhwc @@ -1267,15 +1266,13 @@ void PoolOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod } Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); - auto input = node.InputDefs()[0]->Name(); + auto input = node_unit.Inputs()[0].node_arg.Name(); bool use_nchw = model_builder.UseNCHW(); bool input_is_nhwc = model_builder.IsOperandNHWC(input); bool output_is_nhwc = false; @@ -1284,12 +1281,12 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } else { output_is_nhwc = true; if (!input_is_nhwc) { - ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); } } - const auto& output = node.OutputDefs()[0]->Name(); - const auto& op_type = node.OpType(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); + const auto& op_type = node_unit.OpType(); int32_t op_code; bool is_qlinear_average_pool = op_type == "QLinearAveragePool"; @@ -1329,7 +1326,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); + int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); // Get output scale and zero point if this is QLinearAveragePool // Otherwise we will use the scale and zero point of the input @@ -1339,16 +1336,14 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N if (is_qlinear_average_pool) { const auto& initializers = model_builder.GetInitializerTensors(); float x_scale = 0.0f; - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, 1 /* idx */, x_scale)); int32_t x_zero_point = 0; - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 2 /* idx */, x_zero_point)); + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point)); // Verify if the scale and zero point values from onnx input and nnapi input match ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point)); - - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, 3 /* idx */, y_scale)); - if (node.InputDefs().size() > 4) - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 4 /* idx */, y_zero_point)); + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Outputs()[0], node_unit.ModelPath(), y_scale, y_zero_point)); } std::vector input_indices; diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc index 6f38a8e368ea4..d0e33062d7f00 100644 --- a/onnxruntime/core/providers/shared/utils/utils.cc +++ b/onnxruntime/core/providers/shared/utils/utils.cc @@ -8,6 +8,7 @@ #include #include #include +#include "core/providers/shared/node_unit/node_unit.h" namespace onnxruntime { @@ -81,6 +82,9 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node, NodeAttrHelper::NodeAttrHelper(const onnxruntime::Node& node) : node_attributes_(node.GetAttributes()) {} +NodeAttrHelper::NodeAttrHelper(const NodeUnit& node_unit) + : node_attributes_(node_unit.GetNode().GetAttributes()) {} + float NodeAttrHelper::Get(const std::string& key, float def_val) const { if (!HasAttr(key)) return def_val; diff --git a/onnxruntime/core/providers/shared/utils/utils.h b/onnxruntime/core/providers/shared/utils/utils.h index 925df731fcee3..b6884f53d1c2e 100644 --- a/onnxruntime/core/providers/shared/utils/utils.h +++ b/onnxruntime/core/providers/shared/utils/utils.h @@ -17,6 +17,7 @@ class Logger; class Node; class NodeArg; +class NodeUnit; // Get the min/max of a Clip operator. // If min/max are not known initializer tensors, will return false @@ -34,7 +35,10 @@ bool GetType(const NodeArg& node_arg, int32_t& type, const logging::Logger& logg */ class NodeAttrHelper { public: - NodeAttrHelper(const onnxruntime::Node& node); + NodeAttrHelper(const Node& node); + + // Get the attributes from the target node of the node_unit + NodeAttrHelper(const NodeUnit& node_unit); float Get(const std::string& key, float def_val) const; @@ -52,7 +56,7 @@ class NodeAttrHelper { bool HasAttr(const std::string& key) const; private: - const onnxruntime::NodeAttributes& node_attributes_; + const NodeAttributes& node_attributes_; }; } // namespace onnxruntime From 61b7e7e4ad23e9c98b83e491d368360bd4ce4b55 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 10 Jan 2022 13:42:37 -0800 Subject: [PATCH 14/23] move conv to node_unit --- .../nnapi/nnapi_builtin/builders/helper.cc | 21 +++++ .../nnapi/nnapi_builtin/builders/helper.h | 1 + .../nnapi_builtin/builders/op_builder.cc | 91 ++++++++++++++----- 3 files changed, 91 insertions(+), 22 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index 71f5628211655..c0ded705a6c18 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -68,6 +68,27 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) { return QLinearOpType::Unknown; } +ConvType GetConvType_nu(const NodeUnit& node_unit, const InitializedTensorSet& initializers) { + NodeAttrHelper helper(node_unit); + const auto group = helper.Get("group", 1); + + const auto& weight = node_unit.Inputs()[1].node_arg.Name(); + const auto& weight_tensor = *initializers.at(weight); + + // For ONNX we only have 1 conv ops + // For NNAPI we have 3 + // Input is (N, C, H, W) + // group == 1, --> regular conv + // group != 1 && weight is (M, 1, kH, kW), --> depthwise conv + // group != 1 && weight is (M, C/group, kH, kW), --> grouped conv + if (group == 1) + return ConvType::Regular; + else if ((weight_tensor.dims()[1] == 1)) + return ConvType::Depthwise; + else + return ConvType::Grouped; +} + ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers) { const auto& op_type = node.OpType(); bool is_qlinear_conv = (op_type == "QLinearConv"); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 8a87f119c72cf..281157539c3dc 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -98,6 +98,7 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node); // Return the type of the conv ops, // This function assumes the input is a 2d conv node ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers); +ConvType GetConvType_nu(const NodeUnit& node_unit, const InitializedTensorSet& initializers); // This qlinear op is an operator takes 2 inputs and produces 1 output // Such as QLinearConv, QLinearMatMul, QLinearAdd, ... diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 141319bb2be3f..0b734a4a62773 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -580,9 +580,6 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( return Status::OK(); } -// Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output) -// QLinearConv, QLinearMatmul, QLinearAdd -// a, b are inputs, and y is output static Status GetBinaryOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const Node& node, float& a_scale, float& b_scale, float& y_scale, @@ -611,6 +608,61 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint( // If the Qlinear[Conv/MatMul] is using per-tensor u8s8, the weight/B tensor // will be convert to uint8 later, will return the same scale and 128 as zero point // Also will set is_per_tensor_u8s8 to true to be used later +static Status GetConvMatMulOpQuantizationScaleAndZeroPoint_nu( + const ModelBuilder& model_builder, const NodeUnit& node_unit, + float& a_scale, float& w_scale, float& y_scale, + int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, + optional>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT; +static Status GetConvMatMulOpQuantizationScaleAndZeroPoint_nu( + const ModelBuilder& model_builder, const NodeUnit& node_unit, + float& a_scale, float& w_scale, float& y_scale, + int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, + optional>& w_scales, bool& is_per_tensor_u8s8) { + is_per_tensor_u8s8 = false; + const auto& initializers(model_builder.GetInitializerTensors()); + // Get scale and zero points + // We will handle per-channel weight scale and zero point later + ORT_RETURN_IF_ERROR( + GetBinaryOpQuantizationScaleAndZeroPoint_nu(initializers, node_unit, + a_scale, w_scale, y_scale, + a_zero_point, w_zero_point, y_zero_point)); + + const auto& inputs = node_unit.Inputs(); + const auto& weight_tensor = *initializers.at(inputs[1].node_arg.Name()); + + // We are done here is this is u8u8 QLinearConv + if (weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_UINT8) + return Status::OK(); + + // This is per-tensor u8s8 + // NNAPI does not support per-tensor u8s8 + // For this case we will need to convert the int8 weight tensor to uint8 + // And have same scale and 128 as zero point + // The conversion of the weight tensor itself will be done in the OpBuilder + const auto& scale_tensor = *initializers.at(inputs[1].quant_param->scale.Name()); + int64_t scale_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0]; + if (scale_dim == 1) { + w_zero_point = 128; + is_per_tensor_u8s8 = true; + return Status::OK(); + } + + // Now we have u8s8 per-channel QlinearConv + // u8s8 QlinearConv always have 0 as zero point so we are not getting it here + // and we do not use w_scale here, so we reset them back to 0 + w_scale = 0.0f; + w_zero_point = 0; + + // We need to copy the 1d scales array for per-channel quantization + std::vector unpacked_tensor; + ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(scale_tensor, unpacked_tensor)); + const float* scales = reinterpret_cast(unpacked_tensor.data()); + const size_t scales_size = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0]; + std::vector scales_vec(scales, scales + scales_size); + w_scales = onnxruntime::make_optional(std::move(scales_vec)); + return Status::OK(); +} + static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const Node& node, float& a_scale, float& w_scale, float& y_scale, @@ -1423,13 +1475,12 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod } Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); const auto& initializers(model_builder.GetInitializerTensors()); - NodeAttrHelper helper(node); - const auto input_defs = node.InputDefs(); + NodeAttrHelper helper(node_unit); + const auto inputs = node_unit.Inputs(); bool is_qlinear_conv = IsQuantizedOp(node_unit); // onnx strides are in the order height, width @@ -1445,11 +1496,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N const auto onnx_dilations = helper.Get("dilations", std::vector{1, 1}); const auto group = helper.Get("group", 1); - size_t x_idx = 0, - w_idx = is_qlinear_conv ? 3 : 1, - b_idx = is_qlinear_conv ? 8 : 2; - - auto input = input_defs[x_idx]->Name(); + auto input = inputs[0].node_arg.Name(); bool use_nchw = model_builder.UseNCHW(); bool input_is_nhwc = model_builder.IsOperandNHWC(input); bool output_is_nhwc = false; @@ -1458,13 +1505,13 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } else { output_is_nhwc = true; if (!input_is_nhwc) { - ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node, x_idx, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); } } - const auto& weight = input_defs[w_idx]->Name(); + const auto& weight = inputs[1].node_arg.Name(); const auto& weight_tensor = *initializers.at(weight); - auto conv_type = GetConvType(node, model_builder.GetGraphViewer().GetAllInitializedTensors()); + auto conv_type = GetConvType_nu(node_unit, model_builder.GetInitializerTensors()); bool conv_2d = (conv_type == ConvType::Regular), depthwise_conv_2d = (conv_type == ConvType::Depthwise), grouped_conv_2d = (conv_type == ConvType::Grouped); @@ -1480,10 +1527,10 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N optional> w_scales; bool is_per_tensor_u8s8 = false; if (is_qlinear_conv) { - ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node, - x_scale, w_scale, y_scale, - x_zero_point, w_zero_point, y_zero_point, - w_scales, is_per_tensor_u8s8)); + ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint_nu(model_builder, node_unit, + x_scale, w_scale, y_scale, + x_zero_point, w_zero_point, y_zero_point, + w_scales, is_per_tensor_u8s8)); } Shape onnx_weight_shape; @@ -1536,8 +1583,8 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N ORT_RETURN_IF_ERROR(IsValidConvWeightQuantizedType(model_builder, weight, w_scale, w_zero_point, w_scales)); } - bool hasBias = (input_defs.size() > b_idx); - std::string bias = hasBias ? input_defs[b_idx]->Name() : weight + "_bias"; + bool hasBias = (inputs.size() > 2); + std::string bias = hasBias ? inputs[2].node_arg.Name() : weight + "_bias"; if (!hasBias) { const auto weight_dimen = shaper[weight]; Shape bias_dimen; @@ -1613,7 +1660,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); + int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[1].node_arg); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); if (model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { @@ -1631,7 +1678,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } int32_t operationCode; - const auto& output = node.OutputDefs()[0]->Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); if (conv_2d || grouped_conv_2d) { operationCode = conv_2d ? ANEURALNETWORKS_CONV_2D : ANEURALNETWORKS_GROUPED_CONV_2D; From 7aa4f9d91f233423d36cbfc55d3ebd4ba944c916 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 10 Jan 2022 14:11:39 -0800 Subject: [PATCH 15/23] move gemm/cast to node_unit --- .../nnapi_builtin/builders/op_builder.cc | 119 +++--------------- 1 file changed, 19 insertions(+), 100 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 0b734a4a62773..f116b37d200e2 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -580,25 +580,6 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( return Status::OK(); } -static Status GetBinaryOpQuantizationScaleAndZeroPoint( - const ModelBuilder& model_builder, const Node& node, - float& a_scale, float& b_scale, float& y_scale, - int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) ORT_MUST_USE_RESULT; -static Status GetBinaryOpQuantizationScaleAndZeroPoint( - const ModelBuilder& model_builder, const Node& node, - float& a_scale, float& b_scale, float& y_scale, - int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) { - const auto& initializers = model_builder.GetInitializerTensors(); - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, 1, a_scale)); - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, 4, b_scale)); - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, 6, y_scale)); - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 2, a_zero_point)); - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 5, b_zero_point)); - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 7, y_zero_point)); - - return Status::OK(); -} - // Get scale and zero point for // [QlinearConv] input, weight, output // [QlinearMatMul] A, B, Y @@ -663,61 +644,6 @@ static Status GetConvMatMulOpQuantizationScaleAndZeroPoint_nu( return Status::OK(); } -static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( - const ModelBuilder& model_builder, const Node& node, - float& a_scale, float& w_scale, float& y_scale, - int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, - optional>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT; -static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( - const ModelBuilder& model_builder, const Node& node, - float& a_scale, float& w_scale, float& y_scale, - int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, - optional>& w_scales, bool& is_per_tensor_u8s8) { - is_per_tensor_u8s8 = false; - // Get scale and zero points - // We will handle per-channel weight scale and zero point later - ORT_RETURN_IF_ERROR( - GetBinaryOpQuantizationScaleAndZeroPoint(model_builder, node, - a_scale, w_scale, y_scale, - a_zero_point, w_zero_point, y_zero_point)); - - const auto input_defs = node.InputDefs(); - const auto& initializers(model_builder.GetInitializerTensors()); - const auto& weight_tensor = *initializers.at(input_defs[3]->Name()); - - // We are done here is this is u8u8 QLinearConv - if (weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_UINT8) - return Status::OK(); - - // This is per-tensor u8s8 - // NNAPI does not support per-tensor u8s8 - // For this case we will need to convert the int8 weight tensor to uint8 - // And have same scale and 128 as zero point - // The conversion of the weight tensor itself will be done in the OpBuilder - const auto& scale_tensor = *initializers.at(input_defs[4]->Name()); - int64_t scale_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0]; - if (scale_dim == 1) { - w_zero_point = 128; - is_per_tensor_u8s8 = true; - return Status::OK(); - } - - // Now we have u8s8 per-channel QlinearConv - // u8s8 QlinearConv always have 0 as zero point so we are not getting it here - // and we do not use w_scale here, so we reset them back to 0 - w_scale = 0.0f; - w_zero_point = 0; - - // We need to copy the 1d scales array for per-channel quantization - std::vector unpacked_tensor; - ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(scale_tensor, unpacked_tensor)); - const float* scales = reinterpret_cast(unpacked_tensor.data()); - const size_t scales_size = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0]; - std::vector scales_vec(scales, scales + scales_size); - w_scales = onnxruntime::make_optional(std::move(scales_vec)); - return Status::OK(); -} - // NNAPI has the quantization scale and zero point embedded in the ANeuralNetworksOperandType // ONNX has the quantization scale and zero point as the inputs of the qlinear operators // We want to verify the scale and zeropoint of the ONNX inputs matches the values embedded in the NNAPI inputs @@ -1710,13 +1636,12 @@ class CastOpBuilder : public BaseOpBuilder { }; Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); auto to = helper.Get("to", 0); @@ -1875,25 +1800,19 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod } Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); const auto& initializers(model_builder.GetInitializerTensors()); - const auto& op = node.OpType(); - const auto input_defs(node.InputDefs()); - NodeAttrHelper helper(node); + const auto& op = node_unit.OpType(); + const auto& inputs = node_unit.Inputs(); + NodeAttrHelper helper(node_unit); bool is_qlinear_matmul = op == "QLinearMatMul"; - size_t a_idx = 0, - b_idx = is_qlinear_matmul ? 3 : 1, - c_idx = 2; // QLinearMatMul has no bias - - const auto& input1 = input_defs[a_idx]->Name(); - const auto& input2 = input_defs[b_idx]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input1 = inputs[0].node_arg.Name(); + const auto& input2 = inputs[1].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); const auto transB = helper.Get("transB", 0); float a_scale = 0.0f, @@ -1907,10 +1826,10 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N if (is_qlinear_matmul) { optional> w_scales; ORT_RETURN_IF_ERROR( - GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node, - a_scale, b_scale, y_scale, - a_zero_point, b_zero_point, y_zero_point, - w_scales, is_per_tensor_u8s8)); + GetConvMatMulOpQuantizationScaleAndZeroPoint_nu(model_builder, node_unit, + a_scale, b_scale, y_scale, + a_zero_point, b_zero_point, y_zero_point, + w_scales, is_per_tensor_u8s8)); } uint32_t input_2_idx; @@ -1939,14 +1858,14 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } uint32_t bias_idx; - bool has_bias = (op == "Gemm") && (input_defs.size() > 2); + bool has_bias = inputs.size() > 2; if (has_bias) { - const auto& bias = input_defs[c_idx]->Name(); + const auto& bias = inputs[2].node_arg.Name(); // We need squeeze the input tensor to 1d if necessary if (shaper[bias].size() > 1) { - std::string bias_squeezed = model_builder.GetUniqueName(node.Name() + op + "_bias_squeezed"); + std::string bias_squeezed = model_builder.GetUniqueName(node_unit.Name() + op + "_bias_squeezed"); // We will use squeeze all here - ORT_RETURN_IF_ERROR(AddSqueezeOp(model_builder, node.Name(), + ORT_RETURN_IF_ERROR(AddSqueezeOp(model_builder, node_unit.Name(), bias, bias_squeezed, {} /* axes */)); bias_idx = operand_indices.at(bias_squeezed); @@ -1959,7 +1878,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } else { // No C supplied, we need a vector of 0 - std::string bias = model_builder.GetUniqueName(node.Name() + op + "_bias"); + std::string bias = model_builder.GetUniqueName(node_unit.Name() + op + "_bias"); const auto& bias_type = operand_types.at(input2).type; const Shape& bias_dimen = {shaper[input2][0]}; if (bias_type == Type::TENSOR_FLOAT32) { @@ -1981,7 +1900,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N input_indices.push_back(operand_indices.at(input1)); // A input_indices.push_back(input_2_idx); // B input_indices.push_back(bias_idx); // C - int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); + int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); ORT_RETURN_IF_ERROR(shaper.FC(input1, input2, output)); From cc6d468823f168bb5c2d93063bd1ea04d29d1677 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 10 Jan 2022 21:13:48 -0800 Subject: [PATCH 16/23] remove redundant functions --- .../nnapi/nnapi_builtin/builders/helper.cc | 68 +--- .../nnapi/nnapi_builtin/builders/helper.h | 15 +- .../nnapi_builtin/builders/model_builder.cc | 62 --- .../nnapi_builtin/builders/model_builder.h | 2 - .../nnapi_builtin/builders/op_builder.cc | 354 +++++++----------- .../builders/op_support_checker.cc | 179 ++++----- 6 files changed, 225 insertions(+), 455 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index c0ded705a6c18..3cf271ae317b8 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -89,32 +89,6 @@ ConvType GetConvType_nu(const NodeUnit& node_unit, const InitializedTensorSet& i return ConvType::Grouped; } -ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers) { - const auto& op_type = node.OpType(); - bool is_qlinear_conv = (op_type == "QLinearConv"); - ORT_ENFORCE(op_type == "Conv" || is_qlinear_conv); - - NodeAttrHelper helper(node); - const auto group = helper.Get("group", 1); - - size_t w_idx = is_qlinear_conv ? 3 : 1; - const auto& weight = node.InputDefs()[w_idx]->Name(); - const auto& weight_tensor = *initializers.at(weight); - - // For ONNX we only have 1 conv ops - // For NNAPI we have 3 - // Input is (N, C, H, W) - // group == 1, --> regular conv - // group != 1 && weight is (M, 1, kH, kW), --> depthwise conv - // group != 1 && weight is (M, C/group, kH, kW), --> grouped conv - if (group == 1) - return ConvType::Regular; - else if ((weight_tensor.dims()[1] == 1)) - return ConvType::Depthwise; - else - return ConvType::Grouped; -} - bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type) { return qlinear_op_type == QLinearOpType::QLinearConv || qlinear_op_type == QLinearOpType::QLinearMatMul || @@ -346,35 +320,6 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co return true; } -common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, - size_t idx, float& scale) { - std::vector unpacked_tensor; - const auto& name = node.InputDefs()[idx]->Name(); - const auto& scale_tensor = *initializers.at(name); - ORT_RETURN_IF_ERROR( - onnxruntime::utils::UnpackInitializerData(scale_tensor, node.ModelPath(), unpacked_tensor)); - - // The scale should be one or more floats - ORT_RETURN_IF(unpacked_tensor.size() < 4, "The initializer [", name, "] should have one or more floats ", - "with size no less than 4, actual size: ", unpacked_tensor.size()); - scale = reinterpret_cast(unpacked_tensor.data())[0]; - return Status::OK(); -} - -common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, - const Node& node, size_t idx, int32_t& zero_point) { - std::vector unpacked_tensor; - const auto& name = node.InputDefs()[idx]->Name(); - const auto& zero_point_tensor = *initializers.at(name); - ORT_RETURN_IF_ERROR( - onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(), unpacked_tensor)); - - ORT_RETURN_IF(unpacked_tensor.empty(), "The initializer [", name, "] is empty"); - // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI - zero_point = static_cast(unpacked_tensor[0]); - return Status::OK(); -} - common::Status GetQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, float& scale, int32_t& zero_point) { @@ -461,9 +406,9 @@ bool GetType(const NodeArg& node_arg, int32_t& type) { return true; } -void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2) { +void GetFlattenOutputShape_nu(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2) { int32_t rank = static_cast(input_shape.size()); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); int32_t axis = helper.Get("axis", 1); // axis == rank is a valid input, but invalid for HandleNegativeAxis // Skip non-negative axis here @@ -589,10 +534,11 @@ std::string Shape2String(const std::vector& shape) { return os.str(); } -bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node, - size_t input_idx, const char* input_name) { - if (!Contains(initializers, node.InputDefs()[input_idx]->Name())) { - LOGS_DEFAULT(VERBOSE) << input_name << " of " << node.OpType() << " must be an initializer tensor"; +bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::string& input_name, const char* input_string) { + if (!Contains(initializers, input_name)) { + LOGS_DEFAULT(VERBOSE) << input_string << " of " << node_unit.Name() << "of type [" + << node_unit.OpType() << "] must be an initializer tensor"; return false; } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 281157539c3dc..b3e3ffe43f072 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -97,7 +97,6 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node); // Return the type of the conv ops, // This function assumes the input is a 2d conv node -ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers); ConvType GetConvType_nu(const NodeUnit& node_unit, const InitializedTensorSet& initializers); // This qlinear op is an operator takes 2 inputs and produces 1 output @@ -117,12 +116,6 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::vector& indices, bool is_input); -common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, - size_t idx, float& scale); - -common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers, - const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT; - common::Status GetQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path, float& scale, int32_t& zero_point); @@ -137,7 +130,7 @@ bool GetShape(const NodeArg& node_arg, Shape& shape); bool GetType(const NodeArg& node_arg, int32_t& type); // Get the output shape of Flatten Op -void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2); +void GetFlattenOutputShape_nu(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2); // If a node is supported by NNAPI bool IsNodeSupported(const NodeUnit& node_unit, const GraphViewer& graph_viewer, const OpSupportCheckParams& params); @@ -158,8 +151,10 @@ bool IsValidSupportedNodeGroup(const std::vector& supported_node_gr std::string Shape2String(const std::vector& shape); // Check the given input is an initializer tensor -bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node, - size_t index, const char* input_name) ORT_MUST_USE_RESULT; +// input_name is the name of the initializer +// input_string is the string describing the input in the output message (if nay) +bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit, + const std::string& input_name, const char* input_string) ORT_MUST_USE_RESULT; } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index a96c67960b7d3..7b5ac1aa6b0fb 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -55,7 +55,6 @@ Status ModelBuilder::Prepare() { PreprocessNodeUnits(); GetAllQuantizedOpInputs(); PreprocessInitializers(); - PreprocessActivations(); PreprocessActivations_nu(); ORT_RETURN_IF_ERROR(RegisterInitializers()); ORT_RETURN_IF_ERROR(RegisterModelInputs()); @@ -123,28 +122,6 @@ void ModelBuilder::PreprocessInitializers() { } } -void ModelBuilder::PreprocessActivations() { - const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); - for (size_t i = 0; i < node_indices.size(); i++) { - const auto* node(graph_viewer_.GetNode(node_indices[i])); - const auto& op_type(node->OpType()); - - if (op_type == "Relu") { - activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU); - } else if (op_type == "Clip") { // Relu1 or Relu6 - float min, max; - if (!GetClipMinMax(GetInitializerTensors(), *node, min, max, logging::LoggingManager::DefaultLogger())) - continue; - - if (min == -1.0f && max == 1.0f) { - activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU1); - } else if (min == 0.0f && max == 6.0f) { - activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU6); - } - } - } -} - void ModelBuilder::PreprocessActivations_nu() { for (const auto& node_unit : node_unit_holder_) { const auto& node = node_unit->GetNode(); @@ -692,45 +669,6 @@ int32_t ModelBuilder::FindActivation_nu(const NodeUnit& node_unit, const NodeArg return fuse_code; } -int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) { - int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; - - // We do not support activation fusion for quantized operators for now - auto qlinear_op_type = GetQLinearOpType(node); - if (qlinear_op_type != QLinearOpType::Unknown) - return fuse_code; - - for (auto it = node.OutputEdgesBegin(), end = node.OutputEdgesEnd(); it != end; ++it) { - const auto& dst_node = it->GetNode(); - const auto* dst_input = dst_node.InputDefs()[it->GetDstArgIndex()]; - if (Contains(activation_nodes_, dst_node.Index())) { - if (&output == dst_input) { - fuse_code = activation_nodes_.at(dst_node.Index()); - } - } else { - // if there is any other non-relu node using the output - // will add relu separately - if (&output == dst_input) - return ANEURALNETWORKS_FUSED_NONE; - } - } - - // if output is a graph output, will add relu separately - if (fuse_code != ANEURALNETWORKS_FUSED_NONE) { - for (const auto* graph_output : graph_viewer_.GetOutputs()) { - if (&output == graph_output) - return ANEURALNETWORKS_FUSED_NONE; - } - - LOGS_DEFAULT(VERBOSE) << "Node [" << node.Name() << "] type [" << node.OpType() - << "], fused the output [" << output.Name() << "]"; - - fused_activations_.insert(output.Name()); - } - - return fuse_code; -} - /* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const NodeUnit& node_unit) { const auto& op_builders = GetOpBuilders(); const auto& op_type = node_unit.GetNode().OpType(); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 07b8035d96f5a..45a05773b853c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -48,7 +48,6 @@ class ModelBuilder { const std::vector& is_nhwc_vec) ORT_MUST_USE_RESULT; // Find if an output has a fuseable activation (Relu) - int32_t FindActivation(const Node& node, const NodeArg& output); int32_t FindActivation_nu(const NodeUnit& node_unit, const NodeArg& output); // Add an NNAPI scalar operand @@ -185,7 +184,6 @@ class ModelBuilder { // If a NNAPI operation will use initializers directly, we will add the initializers to the skip list void PreprocessInitializers(); // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later - void PreprocessActivations(); void PreprocessActivations_nu(); // Copy and process all the initializers to NNAPI model Status RegisterInitializers() ORT_MUST_USE_RESULT; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index f116b37d200e2..da6e1e9841620 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -148,28 +148,6 @@ Status GetNCHWInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, s return Status::OK(); } -// Convert the input from nchw to nhwc -// Caller should ensure input is currently in nchw format using ModelBuilder::IsOperandNHWC -Status GetNHWCInput(ModelBuilder& model_builder, const Node& node, size_t input_index, std::string& input) { - const auto& nchw_input = node.InputDefs()[input_index]->Name(); - if (!model_builder.GetNHWCOperand(nchw_input, input)) { - input = model_builder.GetUniqueName(nchw_input + "_nchw_to_nhwc"); - ORT_RETURN_IF_ERROR(TransposeNCHWToNHWC(model_builder, nchw_input, input)); - } - return Status::OK(); -} - -// Convert the input from nhwc to nchw -// Caller should ensure input is currently in nhwc format using ModelBuilder::IsOperandNHWC -Status GetNCHWInput(ModelBuilder& model_builder, const Node& node, size_t input_index, std::string& input) { - const auto& nhwc_input = node.InputDefs()[input_index]->Name(); - if (!model_builder.GetNCHWOperand(nhwc_input, input)) { - input = model_builder.GetUniqueName(nhwc_input + "_nhwc_to_nchw"); - ORT_RETURN_IF_ERROR(TransposeNHWCToNCHW(model_builder, nhwc_input, input)); - } - return Status::OK(); -} - // Transpose layouts if necessary for element wise operators with 2 inputs // and return the layout type of output tensor // If both inputs have same layout, the output will have the same layout @@ -197,35 +175,6 @@ Status TransposeBinaryOpInputLayout_nu(ModelBuilder& model_builder, const NodeUn return Status::OK(); } -// Transpose layouts if necessary for element wise operators with 2 inputs -// and return the layout type of output tensor -// If both inputs have same layout, the output will have the same layout -// Otherwise we will need transpose the nhwc input back to nchw, and output will be nchw -Status TransposeBinaryOpInputLayout(ModelBuilder& model_builder, const Node& node, - size_t input1_idx, size_t input2_idx, - std::string& input1, std::string& input2, - bool& output_is_nhwc) ORT_MUST_USE_RESULT; -Status TransposeBinaryOpInputLayout(ModelBuilder& model_builder, const Node& node, - size_t input1_idx, size_t input2_idx, - std::string& input1, std::string& input2, - bool& output_is_nhwc) { - bool input1_is_nhwc = model_builder.IsOperandNHWC(input1); - bool input2_is_nhwc = model_builder.IsOperandNHWC(input2); - output_is_nhwc = false; - - if (input1_is_nhwc == input2_is_nhwc) { - output_is_nhwc = input1_is_nhwc; - } else if (input1_is_nhwc) { - // need transpose input1 back to nchw - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, input1_idx, input1)); - } else { // input2_is_nhwc - // need transpose input2 back to nchw - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, input2_idx, input2)); - } - - return Status::OK(); -} - static Status AddBinaryOperator(int32_t op_type, ModelBuilder& model_builder, const std::string& input1, @@ -746,16 +695,20 @@ class BaseOpBuilder : public IOpBuilder { protected: virtual Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const ORT_MUST_USE_RESULT = 0; + static bool IsOpSupported(const ModelBuilder& model_builder, const NodeUnit& node_unit) ORT_MUST_USE_RESULT; }; -Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const { +/* static */ bool BaseOpBuilder::IsOpSupported(const ModelBuilder& model_builder, const NodeUnit& node_unit) { OpSupportCheckParams params{ model_builder.GetNNAPIFeatureLevel(), model_builder.UseNCHW(), }; - ORT_RETURN_IF_NOT(IsNodeSupported(node_unit, model_builder.GetGraphViewer(), params), - "Unsupported operator ", node_unit.OpType()); + return IsNodeSupported(node_unit, model_builder.GetGraphViewer(), params); +} + +Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const { + ORT_RETURN_IF_NOT(IsOpSupported(model_builder, node_unit), "Unsupported operator ", node_unit.OpType()); ORT_RETURN_IF_ERROR(AddToModelBuilderImpl(model_builder, node_unit)); LOGS_DEFAULT(VERBOSE) << "Operator name: [" << node_unit.Name() << "] type: [" << node_unit.OpType() << "] was added"; @@ -951,12 +904,13 @@ Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co class ReshapeOpBuilder : public BaseOpBuilder { public: void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; - static Status AddReshapeOperator(ModelBuilder& model_builder, const Node& node, + static Status AddReshapeOperator(ModelBuilder& model_builder, const NodeUnit& node_unit, const std::string& input, const std::vector& shape) ORT_MUST_USE_RESULT; private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; - static bool CanSkipReshape(const ModelBuilder& model_builder, const Node& node, size_t input_rank, size_t output_rank); + static bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit, + size_t input_rank, size_t output_rank); }; void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -975,25 +929,34 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const // between NNAPI CPU impl and Hardware Accelerator impl and will speed up the execution // If we are going to skip the reshape, we will still add correct shape and operand type for the output in // onnxruntime::nnapi::Model. -/* static */ bool ReshapeOpBuilder::CanSkipReshape(const ModelBuilder& model_builder, const Node& node, +/* static */ bool ReshapeOpBuilder::CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_rank, size_t output_rank) { - const auto& output = node.OutputDefs()[0]->Name(); + const auto& output_node_arg = node_unit.Outputs()[0].node_arg; + const auto& output_name = output_node_arg.Name(); + const auto& output_node = *node_unit.GetOutputNodes()[0]; + // We will go through all the output edges - for (auto it = node.OutputEdgesBegin(), end = node.OutputEdgesEnd(); it != end; ++it) { - const auto& op_type = it->GetNode().OpType(); + for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) { + const auto& dest_node_unit = model_builder.GetNodeUnit(&it->GetNode()); + const auto& op_type = dest_node_unit.OpType(); // TODO add quantized matmul when reshape support quantized input if (op_type != "Gemm" && op_type != "MatMul") { LOGS_DEFAULT(VERBOSE) << "Reshape/Flatten can only be skipped when the output is Gemm/Matmul" << " or no op is using the output (output is graph output)" - << ", output name, " << output + << ", output name, " << output_name << " is used by " << op_type; return false; } + // Now the dest node is Gemm/Matmul, we want to make sure it is supported + if (!BaseOpBuilder::IsOpSupported(model_builder, node_unit)) { + return false; + } + // NNAPI ANEURALNETWORKS_FULLY_CONNECTED will only flatten the input 0 - if (it->GetDstArgIndex() != 0) { + if (&output_node_arg != &dest_node_unit.Inputs()[0].node_arg) { LOGS_DEFAULT(VERBOSE) << "Reshape/Flatten can only be skipped when the output is input 0 of Gemm/Matmul" - << ", output name, " << output; + << ", output name, " << output_name; return false; } @@ -1001,7 +964,7 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const // And NNAPI ANEURALNETWORKS_FULLY_CONNECTED will only flatten input rank >= 2 if (input_rank < 2 || output_rank != 2) { LOGS_DEFAULT(VERBOSE) << "Reshape/Flatten can only be skipped when input_rank >= 2 and output_rank == 2" - << ", output name, " << output + << ", output name, " << output_name << ", the actual input_rank, " << input_rank << ", the actual output_rank, " << output_rank; return false; @@ -1012,26 +975,26 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const // Check if the Reshape output is a graph output, if so we cannot skip the Reshape // We do not care the case where the Reshape output is a dead end for (const auto* node_arg : model_builder.GetGraphViewer().GetOutputs()) { - if (node_arg->Name() == output) { + if (node_arg == &output_node_arg) { LOGS_DEFAULT(VERBOSE) << "Reshape/Flatten can not be skipped when the output is a graph output" - << ", output name, " << output; + << ", output name, " << output_name; return false; } } LOGS_DEFAULT(VERBOSE) << "Skipping Reshape/Flatten node [" - << node.Name() << "] with output, " << output; + << node_unit.Name() << "] with output, " << output_name; return true; } /* static */ Status ReshapeOpBuilder::AddReshapeOperator(ModelBuilder& model_builder, - const Node& node, + const NodeUnit& node_unit, const std::string& input, const std::vector& shape) { auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); ORT_RETURN_IF_ERROR(shaper.Reshape(input, shape, output)); auto input_rank = shaper[input].size(); auto output_rank = shaper[output].size(); @@ -1039,7 +1002,7 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const // Since Reshape is not running using hardware in NNAPI for some CPU (e.g. Qualcomm SD for now) // We will try to see if we the skip the Reshape to prevent context switching between // NNAPI CPU impl and NNAPI hardware accelerator impl - if (CanSkipReshape(model_builder, node, input_rank, output_rank)) { + if (CanSkipReshape(model_builder, node_unit, input_rank, output_rank)) { // Since reshape can be skipped, only register the dimension and type, with same index and new name const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); model_builder.RegisterOperand(output, operand_indices.at(input), output_operand_type, false); @@ -1050,7 +1013,7 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const input_indices.push_back(operand_indices.at(input)); // Add new shape Shape shape_dimen = {static_cast(shape.size())}; - std::string shape_name = model_builder.GetUniqueName(node.Name() + input + "newshape"); + std::string shape_name = model_builder.GetUniqueName(node_unit.Name() + input + "newshape"); OperandType shape_operand_type(Type::TENSOR_INT32, shape_dimen); ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(shape_name, shape.data(), shape_operand_type)); input_indices.push_back(operand_indices.at(shape_name)); @@ -1063,17 +1026,16 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& initializers(model_builder.GetInitializerTensors()); - auto input = node.InputDefs()[0]->Name(); + auto input = node_unit.Inputs()[0].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { // We want to transpose nhwc operand back to nchw before reshape - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); } - const auto& shape_tensor = *initializers.at(node.InputDefs()[1]->Name()); + const auto& shape_tensor = *initializers.at(node_unit.Inputs()[1].node_arg.Name()); std::vector unpacked_tensor; ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(shape_tensor, unpacked_tensor)); const int64_t* raw_shape = reinterpret_cast(unpacked_tensor.data()); @@ -1087,7 +1049,7 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons shape[i] = dim == 0 ? input_shape[i] : dim; } - return AddReshapeOperator(model_builder, node, input, shape); + return AddReshapeOperator(model_builder, node_unit, input, shape); } #pragma endregion op_reshape @@ -1111,21 +1073,21 @@ void BatchNormalizationOpBuilder::AddInitializersToSkip(ModelBuilder& model_buil } Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_types(model_builder.GetOperandTypes()); const auto& initializers(model_builder.GetInitializerTensors()); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); + const auto& inputs = node_unit.Inputs(); // For reshape we are not really doing anything but // register a new operand with new shape - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = inputs[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); - const auto& scale_tensor = *initializers.at(node.InputDefs()[1]->Name()); - const auto& bias_tensor = *initializers.at(node.InputDefs()[2]->Name()); - const auto& mean_tensor = *initializers.at(node.InputDefs()[3]->Name()); - const auto& var_tensor = *initializers.at(node.InputDefs()[4]->Name()); + const auto& scale_tensor = *initializers.at(inputs[1].node_arg.Name()); + const auto& bias_tensor = *initializers.at(inputs[2].node_arg.Name()); + const auto& mean_tensor = *initializers.at(inputs[3].node_arg.Name()); + const auto& var_tensor = *initializers.at(inputs[4].node_arg.Name()); const auto eps = helper.Get("epsilon", 1e-5f); const auto size = SafeInt(scale_tensor.dims()[0]); @@ -1155,9 +1117,9 @@ Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_bu bias_data[i]); } - const auto tensor_a_name = model_builder.GetUniqueName(node.Name() + input + "_imm_a"); - const auto tensor_b_name = model_builder.GetUniqueName(node.Name() + input + "_imm_b"); - const auto tensor_imm_product_name = model_builder.GetUniqueName(node.Name() + input + "_imm_mul"); + const auto tensor_a_name = model_builder.GetUniqueName(node_unit.Name() + input + "_imm_a"); + const auto tensor_b_name = model_builder.GetUniqueName(node_unit.Name() + input + "_imm_b"); + const auto tensor_imm_product_name = model_builder.GetUniqueName(node_unit.Name() + input + "_imm_mul"); Shape tensor_a_dimen = {size}; bool input_is_nhwc = model_builder.IsOperandNHWC(input); @@ -1191,7 +1153,7 @@ Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_bu output_is_nhwc)); // Add - int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]); + int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); ORT_RETURN_IF_ERROR(AddBinaryOperator(ANEURALNETWORKS_ADD, model_builder, tensor_imm_product_name, tensor_b_name, @@ -1676,21 +1638,20 @@ class SoftMaxOpBuilder : public BaseOpBuilder { }; Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); const auto android_feature_level = model_builder.GetNNAPIFeatureLevel(); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); - auto input = node.InputDefs()[0]->Name(); + auto input = node_unit.Inputs()[0].node_arg.Name(); bool input_is_nhwc = model_builder.IsOperandNHWC(input); bool output_is_nhwc = input_is_nhwc; if (android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) { if (model_builder.IsOperandNHWC(input)) { output_is_nhwc = false; // We want to transpose nhwc operand back to nchw before softmax - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); } } @@ -1700,7 +1661,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons axis = axis_nchw_to_nhwc[axis]; } - const auto& output = node.OutputDefs()[0]->Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); float beta = 1.f; std::vector input_indices; input_indices.push_back(operand_indices.at(input)); @@ -1728,16 +1689,14 @@ class IdentityOpBuilder : public BaseOpBuilder { }; Status IdentityOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - // Identity is not really going to do anything // Just register the dimension and type, with same index and new name auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); std::vector input_indices; @@ -1791,7 +1750,7 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod if (op == "MatMul") { model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); } else if (op == "Gemm") { - NodeAttrHelper helper(node_unit.GetNode()); + NodeAttrHelper helper(node_unit); const auto transB = helper.Get("transB", 0); if (transB == 0) model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); @@ -1956,14 +1915,13 @@ void UnaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const No } Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& op_type(node.OpType()); + const auto& op_type(node_unit.OpType()); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); ORT_RETURN_IF_ERROR(shaper.Identity(input, output)); @@ -1997,9 +1955,9 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const if (is_qlinear_sigmoid) { const auto& initializers = model_builder.GetInitializerTensors(); float x_scale = 0.0f; - ORT_RETURN_IF_ERROR(GetQuantizationScale(initializers, node, 1, x_scale)); int32_t x_zero_point = 0; - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 2, x_zero_point)); + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Inputs()[0], node_unit.ModelPath(), x_scale, x_zero_point)); // Verify if the scale and zero point values from onnx input and nnapi input match ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point)); @@ -2027,17 +1985,17 @@ class ConcatOpBuilder : public BaseOpBuilder { }; Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); + const auto& inputs = node_unit.Inputs(); std::vector input_indices; - const auto& input0 = node.InputDefs()[0]->Name(); + const auto& input0 = inputs[0].node_arg.Name(); bool all_input_have_same_layout = true; bool output_is_nhwc = false; - const auto node_input_size = node.InputDefs().size(); + const auto node_input_size = inputs.size(); // First if the inputs are uint8, we need verify all the inputs have same scale and zero points if (operand_types.at(input0).type == android::nn::wrapper::Type::TENSOR_QUANT8_ASYMM) { @@ -2046,7 +2004,7 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const // Compare scale and zp of input0 to input1~n for (size_t i = 1; i < node_input_size; i++) { - const auto& type = operand_types.at(node.InputDefs()[i]->Name()); + const auto& type = operand_types.at(inputs[i].node_arg.Name()); ORT_RETURN_IF_NOT(scale == type.operandType.scale, "Input[", i, "]'s scale: ", type.operandType.scale, " is different than input[0]'s scale: ", scale); @@ -2061,31 +2019,31 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const for (size_t i = 0; i < node_input_size - 1; i++) { all_input_have_same_layout = all_input_have_same_layout && - model_builder.IsOperandNHWC(node.InputDefs()[i]->Name()) == - model_builder.IsOperandNHWC(node.InputDefs()[i + 1]->Name()); + model_builder.IsOperandNHWC(inputs[i].node_arg.Name()) == + model_builder.IsOperandNHWC(inputs[i + 1].node_arg.Name()); } - std::vector inputs; - inputs.reserve(node_input_size); + std::vector input_names; + input_names.reserve(node_input_size); if (all_input_have_same_layout) { // if all the inputs are of same layout, output will be the same layout output_is_nhwc = model_builder.IsOperandNHWC(input0); for (size_t i = 0; i < node_input_size; i++) { - auto input = node.InputDefs()[i]->Name(); + auto input = inputs[i].node_arg.Name(); input_indices.push_back(operand_indices.at(input)); - inputs.push_back(input); + input_names.push_back(input); } } else { // if all the inputs are not same layout, // will need transpos those nhwc tensors back to nchw for (size_t i = 0; i < node_input_size; i++) { - auto input = node.InputDefs()[i]->Name(); + auto input = inputs[i].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, i, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, i, input)); } input_indices.push_back(operand_indices.at(input)); - inputs.push_back(input); + input_names.push_back(input); } } @@ -2101,8 +2059,8 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const } ADD_SCALAR_OPERAND(model_builder, input_indices, axis); - const auto& output = node.OutputDefs()[0]->Name(); - ORT_RETURN_IF_ERROR(shaper.Concat(inputs, axis, output)); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); + ORT_RETURN_IF_ERROR(shaper.Concat(input_names, axis, output)); OperandType output_operand_type = operand_types.at(input0); output_operand_type.SetDimensions(shaper[output]); ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_CONCATENATION, input_indices, @@ -2120,7 +2078,7 @@ class SqueezeOpBuilder : public BaseOpBuilder { private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; - static Status GetAxes(ModelBuilder& model_builder, const Node& node, std::vector& axes); + static Status GetAxes(ModelBuilder& model_builder, const NodeUnit& node_unit, std::vector& axes); }; void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2130,13 +2088,13 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } /* static */ Status SqueezeOpBuilder::GetAxes(ModelBuilder& model_builder, - const Node& node, std::vector& axes) { + const NodeUnit& node_unit, std::vector& axes) { // Squeeze opset 13 use input as axes - if (node.SinceVersion() > 12) { + if (node_unit.SinceVersion() > 12) { // If axes is not supplied, return an empty axes as default to squeeze all - if (node.InputDefs().size() > 1) { + if (node_unit.Inputs().size() > 1) { const auto& initializers(model_builder.GetInitializerTensors()); - const auto& axes_tensor = *initializers.at(node.InputDefs()[1]->Name()); + const auto& axes_tensor = *initializers.at(node_unit.Inputs()[1].node_arg.Name()); std::vector unpacked_tensor; ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(axes_tensor, unpacked_tensor)); const int64_t* raw_axes = reinterpret_cast(unpacked_tensor.data()); @@ -2148,7 +2106,7 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } } } else { - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); axes = helper.Get("axes", std::vector()); } @@ -2156,17 +2114,15 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - - auto input = node.InputDefs()[0]->Name(); + auto input = node_unit.Inputs()[0].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { // We want to transpose nhwc operand back to nchw before squeeze - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); } std::vector axes; - ORT_RETURN_IF_ERROR(GetAxes(model_builder, node, axes)); - return AddSqueezeOp(model_builder, node.Name(), input, node.OutputDefs()[0]->Name(), axes); + ORT_RETURN_IF_ERROR(GetAxes(model_builder, node_unit, axes)); + return AddSqueezeOp(model_builder, node_unit.Name(), input, node_unit.Outputs()[0].node_arg.Name(), axes); } #pragma endregion @@ -2186,24 +2142,19 @@ void QuantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, } Status QuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); - const auto input_defs(node.InputDefs()); - const auto& input = input_defs[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); float scale = 0.0f; - ORT_RETURN_IF_ERROR(GetQuantizationScale(model_builder.GetInitializerTensors(), node, 1, scale)); int32_t zero_point = 0; - Type output_type = Type::TENSOR_QUANT8_ASYMM; - - if (input_defs.size() == 3) { // Get zero point - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder.GetInitializerTensors(), node, 2, zero_point)); - } + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + model_builder.GetInitializerTensors(), node_unit.Outputs()[0], node_unit.ModelPath(), scale, zero_point)); + Type output_type = Type::TENSOR_QUANT8_ASYMM; ORT_RETURN_IF_ERROR(shaper.Identity(input, output)); const OperandType output_operand_type(output_type, shaper[output], scale, zero_point); std::vector input_indices; @@ -2230,22 +2181,18 @@ void DequantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builde } Status DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); - const auto input_defs(node.InputDefs()); + const auto& inputs = node_unit.Inputs(); - const auto& input = input_defs[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = inputs[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); float scale = 0.0; - ORT_RETURN_IF_ERROR(GetQuantizationScale(model_builder.GetInitializerTensors(), node, 1, scale)); int32_t zero_point = 0; - if (input_defs.size() == 3) { // Get zero point - ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder.GetInitializerTensors(), node, 2, zero_point)); - } + ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint( + model_builder.GetInitializerTensors(), node_unit.Inputs()[0], node_unit.ModelPath(), scale, zero_point)); ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, scale, zero_point)); @@ -2269,21 +2216,20 @@ class LRNOpBuilder : public BaseOpBuilder { }; Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto android_feature_level = model_builder.GetNNAPIFeatureLevel(); - auto input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + auto input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); if (android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) { // on android api level 28, we need to transpose the nchw input to nhwc output_is_nhwc = true; if (!model_builder.IsOperandNHWC(input)) { - ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); } } @@ -2340,27 +2286,26 @@ void ClipOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod } Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); ORT_RETURN_IF_ERROR(shaper.Identity(input, output)); const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); if (Contains(model_builder.GetFusedActivations(), input)) { - LOGS_DEFAULT(VERBOSE) << "Clip Node [" << node.Name() << "] fused"; + LOGS_DEFAULT(VERBOSE) << "Clip Node [" << node_unit.Name() << "] fused"; model_builder.RegisterOperand(output, operand_indices.at(input), output_operand_type, output_is_nhwc); return Status::OK(); } float min, max; - GetClipMinMax(model_builder.GetInitializerTensors(), node, min, max, logging::LoggingManager::DefaultLogger()); + GetClipMinMax(model_builder.GetInitializerTensors(), node_unit.GetNode(), min, max, + logging::LoggingManager::DefaultLogger()); int32_t op_code; if (min == 0.0f && max == 6.0f) @@ -2404,17 +2349,16 @@ void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const N } Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); const auto& initializers(model_builder.GetInitializerTensors()); - NodeAttrHelper helper(node); - const auto input_defs = node.InputDefs(); + NodeAttrHelper helper(node_unit); + const auto& inputs = node_unit.Inputs(); const auto android_feature_level = model_builder.GetNNAPIFeatureLevel(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); - auto input = input_defs[0]->Name(); + auto input = inputs[0].node_arg.Name(); bool use_nchw = model_builder.UseNCHW(); bool input_is_nhwc = model_builder.IsOperandNHWC(input); bool output_is_nhwc = false; @@ -2423,7 +2367,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const } else { output_is_nhwc = true; if (!input_is_nhwc) { - ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); } } @@ -2436,8 +2380,8 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const bool using_half_pixel = coord_trans_mode == "half_pixel"; bool using_align_corners = coord_trans_mode == "align_corners"; - if (input_defs.size() == 3) { // we are using scales - const auto& scales_name = input_defs[2]->Name(); + if (inputs.size() == 3) { // we are using scales + const auto& scales_name = inputs[2].node_arg.Name(); const auto& scales_tensor = *initializers.at(scales_name); std::vector unpacked_tensor; ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(scales_tensor, unpacked_tensor)); @@ -2447,7 +2391,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const ORT_RETURN_IF_ERROR( shaper.ResizeUsingScales(input, scale_h, scale_w, use_nchw, output)); } else { // we are using sizes - const auto& sizes_name = input_defs[3]->Name(); + const auto& sizes_name = inputs[3].node_arg.Name(); const auto& sizes_tensor = *initializers.at(sizes_name); std::vector unpacked_tensor; ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(sizes_tensor, unpacked_tensor)); @@ -2498,27 +2442,25 @@ class FlattenOpBuilder : public BaseOpBuilder { }; Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - - auto input = node.InputDefs()[0]->Name(); + auto input = node_unit.Inputs()[0].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { // We want to transpose nhwc operand back to nchw before reshape - ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); } // Flatten is basically a reshape to 2d tensor // Get the shape for Reshape here Shape input_shape; - GetShape(*node.InputDefs()[0], input_shape); + GetShape(node_unit.Inputs()[0].node_arg, input_shape); int32_t dim_1 = 1; int32_t dim_2 = 1; - GetFlattenOutputShape(node, input_shape, dim_1, dim_2); + GetFlattenOutputShape_nu(node_unit, input_shape, dim_1, dim_2); // If the input is of dynamic shape, replace 0 (dynamic) dimension with -1 // We cannot have dim_1 and dim_2 both be 0 here, it was checked in IsOpSupportedImpl dim_1 = dim_1 == 0 ? -1 : dim_1; dim_2 = dim_2 == 0 ? -1 : dim_2; std::vector shape{dim_1, dim_2}; - return ReshapeOpBuilder::AddReshapeOperator(model_builder, node, input, shape); + return ReshapeOpBuilder::AddReshapeOperator(model_builder, node_unit, input, shape); } #pragma endregion @@ -2528,12 +2470,12 @@ Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons class MinMaxOpBuilder : public BaseOpBuilder { public: static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); - static Status AddMinMaxOperator(ModelBuilder& model_builder, const Node& node, - const std::string& input1, const std::string& input2, - bool output_is_nhwc) ORT_MUST_USE_RESULT; private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + static Status AddMinMaxOperator(ModelBuilder& model_builder, const NodeUnit& node_unit, + const std::string& input1, const std::string& input2, + bool output_is_nhwc) ORT_MUST_USE_RESULT; }; /* static */ void MinMaxOpBuilder::CreateSharedOpBuilder( @@ -2546,16 +2488,16 @@ class MinMaxOpBuilder : public BaseOpBuilder { }); } -/* static */ Status MinMaxOpBuilder::AddMinMaxOperator(ModelBuilder& model_builder, const Node& node, +/* static */ Status MinMaxOpBuilder::AddMinMaxOperator(ModelBuilder& model_builder, const NodeUnit& node_unit, const std::string& input1, const std::string& input2, bool output_is_nhwc) { auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); - const auto& op_type(node.OpType()); + const auto& op_type(node_unit.OpType()); int32_t op_code; if (op_type == "Min") op_code = ANEURALNETWORKS_MINIMUM; @@ -2577,17 +2519,14 @@ class MinMaxOpBuilder : public BaseOpBuilder { } Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - const auto input_defs(node.InputDefs()); - std::string input1 = input_defs[0]->Name(); - std::string input2 = input_defs[1]->Name(); + const auto& inputs = node_unit.Inputs(); + std::string input1 = inputs[0].node_arg.Name(); + std::string input2 = inputs[1].node_arg.Name(); bool output_is_nhwc = false; - ORT_RETURN_IF_ERROR(TransposeBinaryOpInputLayout(model_builder, node, - 0 /* input1_idx */, - 1 /* input2_idx */, - input1, input2, output_is_nhwc)); + ORT_RETURN_IF_ERROR(TransposeBinaryOpInputLayout_nu(model_builder, node_unit, + input1, input2, output_is_nhwc)); - return AddMinMaxOperator(model_builder, node, input1, input2, output_is_nhwc); + return AddMinMaxOperator(model_builder, node_unit, input1, input2, output_is_nhwc); } #pragma endregion @@ -2600,17 +2539,15 @@ class EluOpBuilder : public BaseOpBuilder { }; Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); ORT_RETURN_IF_ERROR(shaper.Identity(input, output)); const OperandType output_operand_type(operand_types.at(input).type, shaper[output]); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto alpha = helper.Get("alpha", 1.0f); std::vector input_indices; input_indices.push_back(operand_indices.at(input)); @@ -2645,13 +2582,11 @@ void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const No } Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - const auto& node = node_unit.GetNode(); - auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); - const auto input_defs = node.InputDefs(); - const auto& input_shape = shaper[input_defs[0]->Name()]; + const auto& inputs = node_unit.Inputs(); + const auto& input_shape = shaper[inputs[0].node_arg.Name()]; std::vector input_shape_64(input_shape.cbegin(), input_shape.cend()); SliceOp::PrepareForComputeMetadata compute_metadata(input_shape_64); @@ -2665,15 +2600,14 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const std::vector input_axes; std::vector input_steps; - const auto CopyInputData = [&node, &model_builder](size_t input_idx, std::vector& data) { + const auto CopyInputData = [&inputs, &model_builder](size_t input_idx, std::vector& data) { data.clear(); - const auto input_defs = node.InputDefs(); // This is an optional input, return empty vector - if (input_defs.size() <= input_idx) + if (inputs.size() <= input_idx) return Status::OK(); - const auto& input_name = input_defs[input_idx]->Name(); + const auto& input_name = inputs[input_idx].node_arg.Name(); const auto& initializers(model_builder.GetInitializerTensors()); const auto& tensor = *initializers.at(input_name); @@ -2715,8 +2649,8 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const std::back_inserter(nnapi_output_shape), [](int64_t i) { return SafeInt(i); }); - const auto& input = node.InputDefs()[0]->Name(); - const auto& output = node.OutputDefs()[0]->Name(); + const auto& input = inputs[0].node_arg.Name(); + const auto& output = node_unit.Outputs()[0].node_arg.Name(); bool output_is_nhwc = model_builder.IsOperandNHWC(input); // No shape inference for Slice, everything is calculated here, we only need to add the output shape @@ -2731,14 +2665,14 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Shape param_dimen = {static_cast(input_shape.size())}; // helper function to add begin/end/strides of ANEURALNETWORKS_STRIDED_SLICE - const auto AddOperand = [&model_builder, &node, &input_indices, &operand_indices]( + const auto AddOperand = [&model_builder, &node_unit, &input_indices, &operand_indices]( const char* name, const Shape& shape, const std::vector& param_raw_data) { std::vector param_data; param_data.reserve(param_raw_data.size()); std::transform(param_raw_data.cbegin(), param_raw_data.cend(), std::back_inserter(param_data), [](int64_t i) { return SafeInt(i); }); - std::string param_name = model_builder.GetUniqueName(node.Name() + name); + std::string param_name = model_builder.GetUniqueName(node_unit.Name() + name); OperandType param_operand_type(Type::TENSOR_INT32, shape); ORT_RETURN_IF_ERROR( model_builder.AddOperandFromPersistMemoryBuffer(param_name, param_data.data(), param_operand_type)); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 262440859843c..30854ca06cb52 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -431,7 +431,7 @@ bool ReshapeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init const int64_t* raw_perm = reinterpret_cast(unpacked_tensor.data()); const auto perm_size = SafeInt(perm_tensor.dims()[0]); - NodeAttrHelper helper(node_unit.GetNode()); + NodeAttrHelper helper(node_unit); const bool allow_zero = helper.Get("allowzero ", 0) == 1; for (uint32_t i = 0; i < perm_size; i++) { // NNAPI reshape does not support 0 as dimension @@ -466,16 +466,15 @@ class BatchNormalizationOpSupportChecker : public BaseOpSupportChecker { bool BatchNormalizationOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); - if (node.OutputDefs().size() != 1) { + if (node_unit.Outputs().size() != 1) { LOGS_DEFAULT(VERBOSE) << "Your onnx model may be in training mode, please export " "it in test mode."; return false; } - const auto& input_defs = node.InputDefs(); + const auto& inputs = node_unit.Inputs(); Shape input_shape; - if (!GetShape(*input_defs[0], input_shape)) + if (!GetShape(inputs[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); @@ -485,17 +484,17 @@ bool BatchNormalizationOpSupportChecker::IsOpSupportedImpl(const InitializedTens return false; } - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto spatial = helper.Get("spatial", 1); if (spatial != 1) { LOGS_DEFAULT(VERBOSE) << "Non-spatial BN is not supported"; return false; } - const auto& scale_name = input_defs[1]->Name(); - const auto& b_name = input_defs[2]->Name(); - const auto& mean_name = input_defs[3]->Name(); - const auto& var_name = input_defs[4]->Name(); + const auto& scale_name = inputs[1].node_arg.Name(); + const auto& b_name = inputs[2].node_arg.Name(); + const auto& mean_name = inputs[3].node_arg.Name(); + const auto& var_name = inputs[4].node_arg.Name(); if (!Contains(initializers, scale_name)) { LOGS_DEFAULT(VERBOSE) << "Scale of BN must be known"; return false; @@ -552,25 +551,24 @@ class PoolOpSupportChecker : public BaseOpSupportChecker { bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - const auto& op_name = node.Name(); - const auto& op_type = node.OpType(); - const auto& input_defs = node.InputDefs(); + const auto& op_name = node_unit.Name(); + const auto& op_type = node_unit.OpType(); + const auto& inputs = node_unit.Inputs(); Shape input_shape; - if (!GetShape(*input_defs[0], input_shape)) + if (!GetShape(inputs[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); if (input_size != 4) { LOGS_DEFAULT(VERBOSE) << op_type << " only supports rank-4 tensor, input [" - << input_defs[0]->Name() << "] has actual dim count " << input_size; + << inputs[0].node_arg.Name() << "] has actual dim count " << input_size; return false; } bool is_qlinear_average_pool = op_type == "QLinearAveragePool"; if (op_type == "AveragePool" || op_type == "MaxPool" || is_qlinear_average_pool) { - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto count_include_pad = helper.Get("count_include_pad", 0); if (count_include_pad == 1) { @@ -600,7 +598,7 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial return false; } - if (node.OutputDefs().size() != 1) { + if (node_unit.Outputs().size() != 1) { LOGS_DEFAULT(VERBOSE) << "Argmax in maxpooling is not supported"; return false; } @@ -611,12 +609,6 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial // We need to check if we have valid scales and zero points for QLinearAveragePool if (is_qlinear_average_pool) { - if (input_defs.size() < 4) - return false; - - // the output zero point can be optional - bool has_output_zp = input_defs.size() == 5; - // Check input scales and ZPs if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */)) return false; @@ -632,19 +624,23 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial // NNAPI requires Quantized Average Pool has same scale and zero point for both input and output float input_scale = 0.0f; - auto status = GetQuantizationScale(initializers, node, 1, input_scale); + int32_t input_zp = 0; + auto status = GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Inputs()[0], node_unit.ModelPath(), input_scale, input_zp); if (!status.IsOK()) { LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name - << "] GetQuantizationScale for input_scale failed, message: " + << "] GetQuantizationScaleAndZeroPoint for input_scale/zp failed, message: " << status.ErrorMessage(); return false; } float output_scale = 0.0f; - status = GetQuantizationScale(initializers, node, 3, output_scale); + int32_t output_zp = 0; + status = GetQuantizationScaleAndZeroPoint( + initializers, node_unit.Outputs()[0], node_unit.ModelPath(), output_scale, output_zp); if (!status.IsOK()) { LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name - << "] GetQuantizationScale for output_scale failed, message: " + << "] GetQuantizationScaleAndZeroPoint for output_scale/zp failed, message: " << status.ErrorMessage(); return false; } @@ -656,26 +652,6 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial return false; } - int32_t input_zp = 0; - int32_t output_zp = 0; - status = GetQuantizationZeroPoint(initializers, node, 2, input_zp); - if (!status.IsOK()) { - LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name - << "] GetQuantizationZeroPoint for input_zp failed, message: " - << status.ErrorMessage(); - return false; - } - - if (has_output_zp) { - status = GetQuantizationZeroPoint(initializers, node, 4, output_zp); - if (!status.IsOK()) { - LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name - << "] GetQuantizationZeroPoint for output_zp failed, message: " - << status.ErrorMessage(); - return false; - } - } - if (input_zp != output_zp) { LOGS_DEFAULT(VERBOSE) << "Op [" << op_type << "] name [" << op_name << "] has different input_zp: " << input_zp @@ -758,21 +734,19 @@ bool ConvOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) con bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - const auto& op_type = node.OpType(); + const auto& op_type = node_unit.OpType(); const bool is_qlinear_conv = (op_type == "QLinearConv"); // We don't support nhwc com.microsoft.QLinearConv for now - if (is_qlinear_conv && node.Domain() == kMSDomain) { + if (is_qlinear_conv && node_unit.Domain() == kMSDomain) { LOGS_DEFAULT(VERBOSE) << "com.microsoft.QLinearConv is not supported"; return false; } - const auto input_defs = node.InputDefs(); - NodeAttrHelper helper(node); - size_t w_idx = is_qlinear_conv ? 3 : 1; + const auto& inputs = node_unit.Inputs(); + NodeAttrHelper helper(node_unit); const auto group = helper.Get("group", 1); - const auto weight_name = input_defs[w_idx]->Name(); + const auto weight_name = inputs[1].node_arg.Name(); if (Contains(initializers, weight_name)) { const auto& tensor = *initializers.at(weight_name); if (tensor.dims().size() != 4) { @@ -801,7 +775,7 @@ bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial if (is_qlinear_conv) { // For QLinearConv, we only support uint8 output now int32_t output_type; - if (!GetType(*node.OutputDefs()[0], output_type)) + if (!GetType(node_unit.Outputs()[0].node_arg, output_type)) return false; if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { @@ -811,7 +785,7 @@ bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial return false; } - if (input_defs.size() > 8 && !Contains(initializers, input_defs[8]->Name())) { + if (inputs.size() > 2 && !Contains(initializers, inputs[2].node_arg.Name())) { LOGS_DEFAULT(VERBOSE) << "Bias of QLinearConv must be known"; return false; } @@ -852,8 +826,7 @@ class CastOpSupportChecker : public BaseOpSupportChecker { bool CastOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto to = helper.Get("to", 0); if (to != ONNX_NAMESPACE::TensorProto::FLOAT && to != ONNX_NAMESPACE::TensorProto::INT32) { @@ -881,9 +854,8 @@ class SoftMaxOpSupportChecker : public BaseOpSupportChecker { bool SoftMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); @@ -894,7 +866,7 @@ bool SoftMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* i } if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) { - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); int32_t axis = helper.Get("axis", 1); if (axis != 1) { LOGS_DEFAULT(VERBOSE) @@ -990,19 +962,13 @@ int GemmOpSupportChecker::GetMinSupportedOpSet(const NodeUnit& node_unit) const bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); - const auto& op_type = node.OpType(); - const auto input_defs(node.InputDefs()); - size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C + const auto& op_type = node_unit.OpType(); + const auto& inputs = node_unit.Inputs(); bool is_qlinear_matmul = op_type == "QLinearMatMul"; - if (is_qlinear_matmul) { - a_idx = 0; - b_idx = 3; - } Shape a_shape; { - if (!GetShape(*input_defs[a_idx], a_shape)) + if (!GetShape(inputs[0].node_arg, a_shape)) return false; if (a_shape.size() != 2) { @@ -1013,7 +979,7 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial Shape b_shape; { - if (!GetShape(*input_defs[b_idx], b_shape)) + if (!GetShape(inputs[1].node_arg, b_shape)) return false; if (b_shape.size() != 2) { @@ -1026,7 +992,7 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial // Only support // 1. A*B'+C // 2. A*B+C and B is an initializer - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto transA = helper.Get("transA", 0); const auto transB = helper.Get("transB", 0); const auto alpha = helper.Get("alpha", 1.0f); @@ -1042,14 +1008,14 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial return false; } - if (transB == 0 && !Contains(initializers, input_defs[b_idx]->Name())) { + if (transB == 0 && !Contains(initializers, inputs[1].node_arg.Name())) { LOGS_DEFAULT(VERBOSE) << "B of Gemm must be known if transB != 1"; return false; } - if (input_defs.size() == 3) { + if (inputs.size() == 3) { Shape c_shape; - if (!GetShape(*input_defs[c_idx], c_shape)) + if (!GetShape(inputs[2].node_arg, c_shape)) return false; uint32_t c_size; @@ -1067,7 +1033,7 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial } } else if (op_type == "MatMul" || is_qlinear_matmul) { // Only support A*B B is an initializer - if (!Contains(initializers, input_defs[b_idx]->Name())) { + if (!Contains(initializers, inputs[1].node_arg.Name())) { LOGS_DEFAULT(VERBOSE) << "B of MatMul must be known"; return false; } @@ -1075,7 +1041,7 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial if (is_qlinear_matmul) { // For QLinearMatMul, we only support uint8 output now int32_t output_type; - if (!GetType(*node.OutputDefs()[0], output_type)) + if (!GetType(node_unit.Outputs()[0].node_arg, output_type)) return false; if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { @@ -1250,9 +1216,8 @@ class ConcatOpSupportChecker : public BaseOpSupportChecker { bool ConcatOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); @@ -1420,9 +1385,8 @@ class LRNOpSupportChecker : public BaseOpSupportChecker { bool LRNOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); @@ -1447,9 +1411,8 @@ class ClipOpSupportChecker : public BaseOpSupportChecker { bool ClipOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); float min, max; - if (!GetClipMinMax(initializers, node, min, max, logging::LoggingManager::DefaultLogger())) + if (!GetClipMinMax(initializers, node_unit.GetNode(), min, max, logging::LoggingManager::DefaultLogger())) return false; // We only supoort relu6 or relu1 @@ -1484,9 +1447,8 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker { bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; const auto input_size = input_shape.size(); @@ -1497,7 +1459,7 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi } { // check attributes - NodeAttrHelper helper(node); + NodeAttrHelper helper(node_unit); const auto mode = helper.Get("mode", "nearest"); bool is_linear_resize = mode == "linear"; bool is_nearest_resize = mode == "nearest"; @@ -1544,27 +1506,27 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi } { // scales and sizes (if present) must be initializers - const auto input_defs = node.InputDefs(); - if (input_defs.size() < 3) { + const auto inputs = node_unit.Inputs(); + if (inputs.size() < 3) { LOGS_DEFAULT(VERBOSE) << "Input scales or sizes of Resize must be known"; return false; } // scales - if (input_defs.size() == 3 && !Contains(initializers, input_defs[2]->Name())) { + if (inputs.size() == 3 && !Contains(initializers, inputs[2].node_arg.Name())) { LOGS_DEFAULT(VERBOSE) << "Input scales of Resize must be known"; return false; } // sizes - if (input_defs.size() > 3 && !Contains(initializers, input_defs[3]->Name())) { + if (inputs.size() > 3 && !Contains(initializers, inputs[3].node_arg.Name())) { LOGS_DEFAULT(VERBOSE) << "Input sizes of Resize must be known"; return false; } // We want to check if the scales or sizes are not trying to resize on N/C channels here - if (input_defs.size() == 3) { // we are using scales - const auto& scales_tensor = *initializers.at(input_defs[2]->Name()); + if (inputs.size() == 3) { // we are using scales + const auto& scales_tensor = *initializers.at(inputs[2].node_arg.Name()); std::vector unpacked_tensor; auto status = onnxruntime::utils::UnpackInitializerData(scales_tensor, unpacked_tensor); if (!status.IsOK()) { @@ -1582,7 +1544,7 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi } } else { // we are using sizes - const auto& sizes_name = input_defs[3]->Name(); + const auto& sizes_name = inputs[3].node_arg.Name(); const auto& sizes_tensor = *initializers.at(sizes_name); std::vector unpacked_tensor; auto status = onnxruntime::utils::UnpackInitializerData(sizes_tensor, unpacked_tensor); @@ -1647,9 +1609,8 @@ class FlattenOpSupportChecker : public BaseOpSupportChecker { bool FlattenOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; if (input_shape.size() > 4 || input_shape.empty()) { @@ -1660,7 +1621,7 @@ bool FlattenOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* i int32_t dim_1 = 1; int32_t dim_2 = 1; - GetFlattenOutputShape(node, input_shape, dim_1, dim_2); + GetFlattenOutputShape_nu(node_unit, input_shape, dim_1, dim_2); if (dim_1 == 0 && dim_2 == 0) { LOGS_DEFAULT(VERBOSE) << "The dynamical input shape " << Shape2String(input_shape) @@ -1705,11 +1666,10 @@ class MinMaxOpSupportChecker : public BaseOpSupportChecker { bool MinMaxOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); // TODO support 2+ inputs for Min/Max op - if (node.InputDefs().size() != 2) { - LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() << "] only supports 2 inputs, " - << "actual input number, " << node.InputDefs().size(); + if (node_unit.Inputs().size() != 2) { + LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] only supports 2 inputs, " + << "actual input number, " << node_unit.Inputs().size(); return false; } @@ -1751,9 +1711,8 @@ class SliceOpSupportChecker : public BaseOpSupportChecker { bool SliceOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const { - const auto& node = node_unit.GetNode(); Shape input_shape; - if (!GetShape(*node.InputDefs()[0], input_shape)) + if (!GetShape(node_unit.Inputs()[0].node_arg, input_shape)) return false; if (input_shape.size() > 4) { @@ -1768,19 +1727,19 @@ bool SliceOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initia return false; } - if (!CheckIsInitializer(initializers, node, 1, "starts")) { + if (!CheckIsInitializer(initializers, node_unit, node_unit.Inputs()[1].node_arg.Name(), "starts")) { return false; } - if (!CheckIsInitializer(initializers, node, 2, "ends")) { + if (!CheckIsInitializer(initializers, node_unit, node_unit.Inputs()[2].node_arg.Name(), "ends")) { return false; } - const auto& input_defs = node.InputDefs(); - if (input_defs.size() > 3) { - if (!CheckIsInitializer(initializers, node, 3, "axes")) { + const auto& inputs = node_unit.Inputs(); + if (inputs.size() > 3) { + if (!CheckIsInitializer(initializers, node_unit, node_unit.Inputs()[3].node_arg.Name(), "axes")) { return false; } - if (input_defs.size() > 4) { - if (!CheckIsInitializer(initializers, node, 4, "steps")) { + if (inputs.size() > 4) { + if (!CheckIsInitializer(initializers, node_unit, node_unit.Inputs()[4].node_arg.Name(), "steps")) { return false; } } From edff0b4a2b00f31be6b66242554c1278b7bacc1b Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Mon, 10 Jan 2022 21:19:55 -0800 Subject: [PATCH 17/23] remove function postfix --- .../nnapi/nnapi_builtin/builders/helper.cc | 4 +- .../nnapi/nnapi_builtin/builders/helper.h | 4 +- .../nnapi_builtin/builders/model_builder.cc | 6 +- .../nnapi_builtin/builders/model_builder.h | 4 +- .../nnapi_builtin/builders/op_builder.cc | 90 +++++++++---------- .../builders/op_support_checker.cc | 2 +- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index 3cf271ae317b8..b6bb93a420b8f 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -68,7 +68,7 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) { return QLinearOpType::Unknown; } -ConvType GetConvType_nu(const NodeUnit& node_unit, const InitializedTensorSet& initializers) { +ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers) { NodeAttrHelper helper(node_unit); const auto group = helper.Get("group", 1); @@ -406,7 +406,7 @@ bool GetType(const NodeArg& node_arg, int32_t& type) { return true; } -void GetFlattenOutputShape_nu(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2) { +void GetFlattenOutputShape(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2) { int32_t rank = static_cast(input_shape.size()); NodeAttrHelper helper(node_unit); int32_t axis = helper.Get("axis", 1); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index b3e3ffe43f072..22e15130baae7 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -97,7 +97,7 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node); // Return the type of the conv ops, // This function assumes the input is a 2d conv node -ConvType GetConvType_nu(const NodeUnit& node_unit, const InitializedTensorSet& initializers); +ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers); // This qlinear op is an operator takes 2 inputs and produces 1 output // Such as QLinearConv, QLinearMatMul, QLinearAdd, ... @@ -130,7 +130,7 @@ bool GetShape(const NodeArg& node_arg, Shape& shape); bool GetType(const NodeArg& node_arg, int32_t& type); // Get the output shape of Flatten Op -void GetFlattenOutputShape_nu(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2); +void GetFlattenOutputShape(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2); // If a node is supported by NNAPI bool IsNodeSupported(const NodeUnit& node_unit, const GraphViewer& graph_viewer, const OpSupportCheckParams& params); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 7b5ac1aa6b0fb..bea1cc09ff5fd 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -55,7 +55,7 @@ Status ModelBuilder::Prepare() { PreprocessNodeUnits(); GetAllQuantizedOpInputs(); PreprocessInitializers(); - PreprocessActivations_nu(); + PreprocessActivations(); ORT_RETURN_IF_ERROR(RegisterInitializers()); ORT_RETURN_IF_ERROR(RegisterModelInputs()); ORT_RETURN_IF_ERROR(AddOperations()); @@ -122,7 +122,7 @@ void ModelBuilder::PreprocessInitializers() { } } -void ModelBuilder::PreprocessActivations_nu() { +void ModelBuilder::PreprocessActivations() { for (const auto& node_unit : node_unit_holder_) { const auto& node = node_unit->GetNode(); const auto& op_type(node.OpType()); @@ -623,7 +623,7 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { return Status::OK(); } -int32_t ModelBuilder::FindActivation_nu(const NodeUnit& node_unit, const NodeArg& output) { +int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit, const NodeArg& output) { (void)node_unit; int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; if (node_unit.GetOutputNodes().size() != 1) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 45a05773b853c..81b016423b27d 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -48,7 +48,7 @@ class ModelBuilder { const std::vector& is_nhwc_vec) ORT_MUST_USE_RESULT; // Find if an output has a fuseable activation (Relu) - int32_t FindActivation_nu(const NodeUnit& node_unit, const NodeArg& output); + int32_t FindActivation(const NodeUnit& node_unit, const NodeArg& output); // Add an NNAPI scalar operand Status AddOperandFromScalar(bool value, uint32_t& index) ORT_MUST_USE_RESULT; @@ -184,7 +184,7 @@ class ModelBuilder { // If a NNAPI operation will use initializers directly, we will add the initializers to the skip list void PreprocessInitializers(); // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later - void PreprocessActivations_nu(); + void PreprocessActivations(); // Copy and process all the initializers to NNAPI model Status RegisterInitializers() ORT_MUST_USE_RESULT; Status RegisterModelInputs() ORT_MUST_USE_RESULT; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index da6e1e9841620..6d0db0ac6c002 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -128,7 +128,7 @@ Status TransposeNCHWToNHWC(ModelBuilder& model_builder, // Convert the input from nchw to nhwc // Caller should ensure input is currently in nchw format using ModelBuilder::IsOperandNHWC -Status GetNHWCInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_index, std::string& nhwc_input) { +Status GetNHWCInput(ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_index, std::string& nhwc_input) { const auto& nchw_input = node_unit.Inputs()[input_index].node_arg.Name(); if (!model_builder.GetNHWCOperand(nchw_input, nhwc_input)) { nhwc_input = model_builder.GetUniqueName(nchw_input + "_nchw_to_nhwc"); @@ -139,7 +139,7 @@ Status GetNHWCInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, s // Convert the input from nhwc to nchw // Caller should ensure input is currently in nhwc format using ModelBuilder::IsOperandNHWC -Status GetNCHWInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_index, std::string& nchw_input) { +Status GetNCHWInput(ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_index, std::string& nchw_input) { const auto& nhwc_input = node_unit.Inputs()[input_index].node_arg.Name(); if (!model_builder.GetNCHWOperand(nhwc_input, nchw_input)) { nchw_input = model_builder.GetUniqueName(nhwc_input + "_nhwc_to_nchw"); @@ -152,12 +152,12 @@ Status GetNCHWInput_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, s // and return the layout type of output tensor // If both inputs have same layout, the output will have the same layout // Otherwise we will need transpose the nhwc input back to nchw, and output will be nchw -Status TransposeBinaryOpInputLayout_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, - std::string& input1, std::string& input2, - bool& output_is_nhwc) ORT_MUST_USE_RESULT; -Status TransposeBinaryOpInputLayout_nu(ModelBuilder& model_builder, const NodeUnit& node_unit, - std::string& input1, std::string& input2, - bool& output_is_nhwc) { +Status TransposeBinaryOpInputLayout(ModelBuilder& model_builder, const NodeUnit& node_unit, + std::string& input1, std::string& input2, + bool& output_is_nhwc) ORT_MUST_USE_RESULT; +Status TransposeBinaryOpInputLayout(ModelBuilder& model_builder, const NodeUnit& node_unit, + std::string& input1, std::string& input2, + bool& output_is_nhwc) { bool input1_is_nhwc = model_builder.IsOperandNHWC(input1); bool input2_is_nhwc = model_builder.IsOperandNHWC(input2); output_is_nhwc = false; @@ -166,10 +166,10 @@ Status TransposeBinaryOpInputLayout_nu(ModelBuilder& model_builder, const NodeUn output_is_nhwc = input1_is_nhwc; } else if (input1_is_nhwc) { // need transpose input1 back to nchw - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input1)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, 0, input1)); } else { // input2_is_nhwc // need transpose input2 back to nchw - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 1, input2)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, 1, input2)); } return Status::OK(); @@ -511,11 +511,11 @@ static Status HandleAutoPad(const Shape& input_shape, // Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output) // QLinearConv, QLinearMatmul, QLinearAdd // a, b are inputs, and y is output -static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( +static Status GetBinaryOpQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnit& node_unit, float& a_scale, float& b_scale, float& y_scale, int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) ORT_MUST_USE_RESULT; -static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( +static Status GetBinaryOpQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnit& node_unit, float& a_scale, float& b_scale, float& y_scale, int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) { @@ -538,12 +538,12 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint_nu( // If the Qlinear[Conv/MatMul] is using per-tensor u8s8, the weight/B tensor // will be convert to uint8 later, will return the same scale and 128 as zero point // Also will set is_per_tensor_u8s8 to true to be used later -static Status GetConvMatMulOpQuantizationScaleAndZeroPoint_nu( +static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const NodeUnit& node_unit, float& a_scale, float& w_scale, float& y_scale, int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, optional>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT; -static Status GetConvMatMulOpQuantizationScaleAndZeroPoint_nu( +static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const NodeUnit& node_unit, float& a_scale, float& w_scale, float& y_scale, int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, @@ -553,9 +553,9 @@ static Status GetConvMatMulOpQuantizationScaleAndZeroPoint_nu( // Get scale and zero points // We will handle per-channel weight scale and zero point later ORT_RETURN_IF_ERROR( - GetBinaryOpQuantizationScaleAndZeroPoint_nu(initializers, node_unit, - a_scale, w_scale, y_scale, - a_zero_point, w_zero_point, y_zero_point)); + GetBinaryOpQuantizationScaleAndZeroPoint(initializers, node_unit, + a_scale, w_scale, y_scale, + a_zero_point, w_zero_point, y_zero_point)); const auto& inputs = node_unit.Inputs(); const auto& weight_tensor = *initializers.at(inputs[1].node_arg.Name()); @@ -786,7 +786,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const bool output_is_nhwc = false; ORT_RETURN_IF_ERROR( - TransposeBinaryOpInputLayout_nu(model_builder, node_unit, input1, input2, output_is_nhwc)); + TransposeBinaryOpInputLayout(model_builder, node_unit, input1, input2, output_is_nhwc)); float a_scale = 0.0f, b_scale = 0.0f, @@ -796,7 +796,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const y_zero_point = 0; if (op_is_qlinear) { - ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint_nu( + ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint( model_builder.GetInitializerTensors(), node_unit, a_scale, b_scale, y_scale, a_zero_point, b_zero_point, y_zero_point)); @@ -810,7 +810,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; if (add_activation) { - fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); + fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); } return AddBinaryOperator(op_code, model_builder, @@ -1032,7 +1032,7 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons auto input = node_unit.Inputs()[0].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { // We want to transpose nhwc operand back to nchw before reshape - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, 0, input)); } const auto& shape_tensor = *initializers.at(node_unit.Inputs()[1].node_arg.Name()); @@ -1153,7 +1153,7 @@ Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_bu output_is_nhwc)); // Add - int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); ORT_RETURN_IF_ERROR(AddBinaryOperator(ANEURALNETWORKS_ADD, model_builder, tensor_imm_product_name, tensor_b_name, @@ -1221,7 +1221,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } else { output_is_nhwc = true; if (!input_is_nhwc) { - ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node_unit, 0, input)); } } @@ -1266,7 +1266,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); // Get output scale and zero point if this is QLinearAveragePool // Otherwise we will use the scale and zero point of the input @@ -1393,13 +1393,13 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } else { output_is_nhwc = true; if (!input_is_nhwc) { - ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node_unit, 0, input)); } } const auto& weight = inputs[1].node_arg.Name(); const auto& weight_tensor = *initializers.at(weight); - auto conv_type = GetConvType_nu(node_unit, model_builder.GetInitializerTensors()); + auto conv_type = GetConvType(node_unit, model_builder.GetInitializerTensors()); bool conv_2d = (conv_type == ConvType::Regular), depthwise_conv_2d = (conv_type == ConvType::Depthwise), grouped_conv_2d = (conv_type == ConvType::Grouped); @@ -1415,10 +1415,10 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N optional> w_scales; bool is_per_tensor_u8s8 = false; if (is_qlinear_conv) { - ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint_nu(model_builder, node_unit, - x_scale, w_scale, y_scale, - x_zero_point, w_zero_point, y_zero_point, - w_scales, is_per_tensor_u8s8)); + ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node_unit, + x_scale, w_scale, y_scale, + x_zero_point, w_zero_point, y_zero_point, + w_scales, is_per_tensor_u8s8)); } Shape onnx_weight_shape; @@ -1548,7 +1548,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[1].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[1].node_arg); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); if (model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { @@ -1651,7 +1651,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons if (model_builder.IsOperandNHWC(input)) { output_is_nhwc = false; // We want to transpose nhwc operand back to nchw before softmax - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, 0, input)); } } @@ -1785,10 +1785,10 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N if (is_qlinear_matmul) { optional> w_scales; ORT_RETURN_IF_ERROR( - GetConvMatMulOpQuantizationScaleAndZeroPoint_nu(model_builder, node_unit, - a_scale, b_scale, y_scale, - a_zero_point, b_zero_point, y_zero_point, - w_scales, is_per_tensor_u8s8)); + GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node_unit, + a_scale, b_scale, y_scale, + a_zero_point, b_zero_point, y_zero_point, + w_scales, is_per_tensor_u8s8)); } uint32_t input_2_idx; @@ -1859,7 +1859,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N input_indices.push_back(operand_indices.at(input1)); // A input_indices.push_back(input_2_idx); // B input_indices.push_back(bias_idx); // C - int32_t fuse_code = model_builder.FindActivation_nu(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); ORT_RETURN_IF_ERROR(shaper.FC(input1, input2, output)); @@ -2040,7 +2040,7 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const for (size_t i = 0; i < node_input_size; i++) { auto input = inputs[i].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, i, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, i, input)); } input_indices.push_back(operand_indices.at(input)); input_names.push_back(input); @@ -2117,7 +2117,7 @@ Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons auto input = node_unit.Inputs()[0].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { // We want to transpose nhwc operand back to nchw before squeeze - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, 0, input)); } std::vector axes; @@ -2229,7 +2229,7 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const No // on android api level 28, we need to transpose the nchw input to nhwc output_is_nhwc = true; if (!model_builder.IsOperandNHWC(input)) { - ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node_unit, 0, input)); } } @@ -2367,7 +2367,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const } else { output_is_nhwc = true; if (!input_is_nhwc) { - ORT_RETURN_IF_ERROR(GetNHWCInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNHWCInput(model_builder, node_unit, 0, input)); } } @@ -2445,7 +2445,7 @@ Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons auto input = node_unit.Inputs()[0].node_arg.Name(); if (model_builder.IsOperandNHWC(input)) { // We want to transpose nhwc operand back to nchw before reshape - ORT_RETURN_IF_ERROR(GetNCHWInput_nu(model_builder, node_unit, 0, input)); + ORT_RETURN_IF_ERROR(GetNCHWInput(model_builder, node_unit, 0, input)); } // Flatten is basically a reshape to 2d tensor @@ -2454,7 +2454,7 @@ Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons GetShape(node_unit.Inputs()[0].node_arg, input_shape); int32_t dim_1 = 1; int32_t dim_2 = 1; - GetFlattenOutputShape_nu(node_unit, input_shape, dim_1, dim_2); + GetFlattenOutputShape(node_unit, input_shape, dim_1, dim_2); // If the input is of dynamic shape, replace 0 (dynamic) dimension with -1 // We cannot have dim_1 and dim_2 both be 0 here, it was checked in IsOpSupportedImpl dim_1 = dim_1 == 0 ? -1 : dim_1; @@ -2523,8 +2523,8 @@ Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const std::string input1 = inputs[0].node_arg.Name(); std::string input2 = inputs[1].node_arg.Name(); bool output_is_nhwc = false; - ORT_RETURN_IF_ERROR(TransposeBinaryOpInputLayout_nu(model_builder, node_unit, - input1, input2, output_is_nhwc)); + ORT_RETURN_IF_ERROR(TransposeBinaryOpInputLayout(model_builder, node_unit, + input1, input2, output_is_nhwc)); return AddMinMaxOperator(model_builder, node_unit, input1, input2, output_is_nhwc); } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 30854ca06cb52..015cba644047b 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -1621,7 +1621,7 @@ bool FlattenOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* i int32_t dim_1 = 1; int32_t dim_2 = 1; - GetFlattenOutputShape_nu(node_unit, input_shape, dim_1, dim_2); + GetFlattenOutputShape(node_unit, input_shape, dim_1, dim_2); if (dim_1 == 0 && dim_2 == 0) { LOGS_DEFAULT(VERBOSE) << "The dynamical input shape " << Shape2String(input_shape) From a551aff180fb22347b0546912d4c5bdad735e097 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Tue, 11 Jan 2022 00:12:47 -0800 Subject: [PATCH 18/23] fix NNAPI CI failure --- .../nnapi/nnapi_builtin/builders/op_support_checker.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 015cba644047b..452eec62a80c7 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -1268,15 +1268,15 @@ bool SqueezeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init if (!GetShape(inputs[0].node_arg, input_shape)) return false; - const auto input_size = inputs.size(); - if (input_size > 4 || input_size == 0) { + const auto input_dim = input_shape.size(); + if (input_dim > 4 || input_dim == 0) { LOGS_DEFAULT(VERBOSE) << "Squeeze only supports 1-4d shape, input is " - << input_size << "d shape"; + << input_dim << "d shape"; return false; } // Squeeze opset 13 use input 1 as axes, if we have input 1 then it need to be an initializer - if (node_unit.SinceVersion() > 12 && input_size > 1) { + if (node_unit.SinceVersion() > 12 && inputs.size() > 1) { const auto& axes_name = inputs[1].node_arg.Name(); if (!Contains(initializers, axes_name)) { LOGS_DEFAULT(VERBOSE) << "Input axes of Squeeze must be known"; From 718e6b844dbb6c4f90f2e89c97b894563c62196b Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 12 Jan 2022 14:08:22 -0800 Subject: [PATCH 19/23] address CR comments --- .../nnapi/nnapi_builtin/builders/helper.cc | 5 +- .../nnapi/nnapi_builtin/builders/helper.h | 4 +- .../nnapi_builtin/builders/model_builder.cc | 4 +- .../nnapi_builtin/builders/model_builder.h | 3 - .../nnapi_builtin/builders/op_builder.cc | 57 ++++++++++--------- .../core/providers/shared/utils/utils.h | 4 +- 6 files changed, 37 insertions(+), 40 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index b6bb93a420b8f..cab24bb93d51e 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -333,7 +333,6 @@ common::Status GetQuantizationScaleAndZeroPoint( const auto unpack_tensor = [&model_path](const InitializedTensorSet& initializers, const std::string& name, std::vector& unpacked_tensor) { - unpacked_tensor.clear(); const auto& tensor = *initializers.at(name); ORT_RETURN_IF_ERROR( onnxruntime::utils::UnpackInitializerData(tensor, model_path, unpacked_tensor)); @@ -535,9 +534,9 @@ std::string Shape2String(const std::vector& shape) { } bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit, - const std::string& input_name, const char* input_string) { + const std::string& input_name, const char* input_description) { if (!Contains(initializers, input_name)) { - LOGS_DEFAULT(VERBOSE) << input_string << " of " << node_unit.Name() << "of type [" + LOGS_DEFAULT(VERBOSE) << input_description << " of " << node_unit.Name() << "of type [" << node_unit.OpType() << "] must be an initializer tensor"; return false; } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 22e15130baae7..88fadf2a135ed 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -152,9 +152,9 @@ std::string Shape2String(const std::vector& shape); // Check the given input is an initializer tensor // input_name is the name of the initializer -// input_string is the string describing the input in the output message (if nay) +// input_description is the string describing the input in the output message (if any) bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit, - const std::string& input_name, const char* input_string) ORT_MUST_USE_RESULT; + const std::string& input_name, const char* input_description) ORT_MUST_USE_RESULT; } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index bea1cc09ff5fd..b666125b86312 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -143,7 +143,8 @@ void ModelBuilder::PreprocessActivations() { } const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const { - // Do we want to throw here if the node is not in the map? + // In theory, if node_unit_map_ is generated correctly, see PreprocessNodeUnits(), a NodeUnit can be + // found for any single node in the graph_viewer_, unless the given node is not from graph_viewer_ return *node_unit_map_.at(node); } @@ -624,7 +625,6 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { } int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit, const NodeArg& output) { - (void)node_unit; int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; if (node_unit.GetOutputNodes().size() != 1) return fuse_code; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 81b016423b27d..7b188d1677b78 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -139,9 +139,6 @@ class ModelBuilder { std::unordered_set skipped_initializers_; - // All activation nodes (Relu, Relu1, Relu6) as a map - std::unordered_map activation_nodes_; - // All activation nodes (Relu, Relu1, Relu6) as a map std::unordered_map activation_node_units_; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 6d0db0ac6c002..1faaffaee30bb 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -651,13 +651,14 @@ static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, return Status::OK(); } -static void AddQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { +static void AddQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, + const NodeUnitIODef::QuantParam& quant_param) { // If we reach here, we assume the io_def has quant_param - model_builder.AddInitializerToSkip(io_def.quant_param->scale.Name()); // scale - LOGS_DEFAULT(VERBOSE) << io_def.quant_param->scale.Name() << "is skipped"; - if (io_def.quant_param->zero_point) { - model_builder.AddInitializerToSkip(io_def.quant_param->zero_point->Name()); // zero_point - LOGS_DEFAULT(VERBOSE) << io_def.quant_param->zero_point->Name() << "is skipped"; + model_builder.AddInitializerToSkip(quant_param.scale.Name()); // scale + LOGS_DEFAULT(VERBOSE) << quant_param.scale.Name() << "is skipped"; + if (quant_param.zero_point) { + model_builder.AddInitializerToSkip(quant_param.zero_point->Name()); // zero_point + LOGS_DEFAULT(VERBOSE) << quant_param.zero_point->Name() << "is skipped"; } } @@ -666,7 +667,7 @@ static void AddQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, static void AddInputToSkip(ModelBuilder& model_builder, const NodeUnitIODef& io_def) { model_builder.AddInitializerToSkip(io_def.node_arg.Name()); // main input if (io_def.quant_param) - AddQuantizationScaleAndZeroPointToSkip(model_builder, io_def); + AddQuantizationScaleAndZeroPointToSkip(model_builder, *io_def.quant_param); } template @@ -725,7 +726,7 @@ class BinaryOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: - static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder + static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; @@ -739,9 +740,9 @@ void BinaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const N return; const auto& inputs = node_unit.Inputs(); - AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[0]); // a_scale, a_zp - AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[1]); // b_scale, b_zp - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param); // a_scale, a_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[1].quant_param); // b_scale, b_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp } /* static */ void BinaryOpBuilder::CreateSharedOpBuilder( @@ -1174,7 +1175,7 @@ class PoolOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: - static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder + static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; @@ -1188,8 +1189,8 @@ void PoolOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod return; // skip input/output scales and zeropoints - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Inputs()[0]); // x_scale, x_zp - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Inputs()[0].quant_param); // x_scale, x_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp } /* static */ void PoolOpBuilder::CreateSharedOpBuilder( @@ -1328,7 +1329,7 @@ class ConvOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: - static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder + static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; @@ -1352,9 +1353,9 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod const auto& inputs = node_unit.Inputs(); // skip the weight for conv as we need to transpose if (IsQuantizedOp(node_unit)) { - AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[0]); // x_scale, x_zp - AddInputToSkip(model_builder, inputs[1]); // w, w_scale, w_zp - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param); // x_scale, x_zp + AddInputToSkip(model_builder, inputs[1]); // w, w_scale, w_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp if (inputs.size() > 2) AddInputToSkip(model_builder, inputs[2]); // B, B_scale, B_zp } else { @@ -1548,7 +1549,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[1].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); if (model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { @@ -1718,7 +1719,7 @@ class GemmOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: - static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder + static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; @@ -1741,9 +1742,9 @@ class GemmOpBuilder : public BaseOpBuilder { void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { const auto& inputs = node_unit.Inputs(); if (IsQuantizedOp(node_unit)) { - AddQuantizationScaleAndZeroPointToSkip(model_builder, inputs[0]); // b_scale, b_zp - AddInputToSkip(model_builder, inputs[1]); // b, b_scale, b_zp - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *inputs[0].quant_param); // b_scale, b_zp + AddInputToSkip(model_builder, inputs[1]); // b, b_scale, b_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp } else { const auto& op = node_unit.OpType(); const auto& inputs = node_unit.Inputs(); @@ -1879,7 +1880,7 @@ class UnaryOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: - static bool IsQuantizedOp(const NodeUnit& node_unit); // TODO, see if we want to move this to BaseOpBuilder + static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; }; @@ -1892,8 +1893,8 @@ void UnaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const No if (!IsQuantizedOp(node_unit)) return; - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Inputs()[0]); // x_scale, x_zp - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Inputs()[0].quant_param); // x_scale, x_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp } /* static */ void UnaryOpBuilder::CreateSharedOpBuilder( @@ -2138,7 +2139,7 @@ class QuantizeLinearOpBuilder : public BaseOpBuilder { }; void QuantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Outputs()[0]); // y_scale, y_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp } Status QuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2177,7 +2178,7 @@ class DequantizeLinearOpBuilder : public BaseOpBuilder { }; void DequantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { - AddQuantizationScaleAndZeroPointToSkip(model_builder, node_unit.Inputs()[0]); // x_scale, x_zp + AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Inputs()[0].quant_param); // x_scale, x_zp } Status DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { diff --git a/onnxruntime/core/providers/shared/utils/utils.h b/onnxruntime/core/providers/shared/utils/utils.h index b6884f53d1c2e..26898aa95e893 100644 --- a/onnxruntime/core/providers/shared/utils/utils.h +++ b/onnxruntime/core/providers/shared/utils/utils.h @@ -35,10 +35,10 @@ bool GetType(const NodeArg& node_arg, int32_t& type, const logging::Logger& logg */ class NodeAttrHelper { public: - NodeAttrHelper(const Node& node); + explicit NodeAttrHelper(const Node& node); // Get the attributes from the target node of the node_unit - NodeAttrHelper(const NodeUnit& node_unit); + explicit NodeAttrHelper(const NodeUnit& node_unit); float Get(const std::string& key, float def_val) const; From 8d0692b75e53c44de9812e501cd324c170d9dbe2 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 12 Jan 2022 16:30:48 -0800 Subject: [PATCH 20/23] remove redudant ORT_MUST_USE_RESULT, and clear includes --- .../nnapi/nnapi_builtin/builders/helper.cc | 14 +- .../nnapi/nnapi_builtin/builders/helper.h | 2 +- .../nnapi_builtin/builders/model_builder.cc | 15 +- .../nnapi_builtin/builders/model_builder.h | 62 +++--- .../nnapi_builtin/builders/op_builder.cc | 141 ++++--------- .../nnapi/nnapi_builtin/builders/op_builder.h | 6 +- .../builders/op_support_checker.cc | 10 +- .../nnapi/nnapi_builtin/builders/shaper.cc | 2 +- .../nnapi/nnapi_builtin/builders/shaper.h | 187 +++++++++--------- .../providers/nnapi/nnapi_builtin/model.cc | 4 +- .../providers/nnapi/nnapi_builtin/model.h | 15 +- .../nnapi_builtin/nnapi_execution_provider.cc | 8 - 12 files changed, 189 insertions(+), 277 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index cab24bb93d51e..a4097d6716026 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -5,16 +5,16 @@ #include #include -#include -#include -#include -#include -#include -#include +#include "helper.h" +#include "core/common/safeint.h" +#include "core/common/logging/logging.h" +#include "core/framework/tensorprotoutils.h" +#include "core/graph/graph.h" +#include "core/graph/graph_viewer.h" +#include "core/providers/common.h" #include "core/providers/shared/node_unit/node_unit.h" #include "core/providers/shared/utils/utils.h" -#include "helper.h" #include "op_support_checker.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h index 88fadf2a135ed..c3729fb1c8f10 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h @@ -154,7 +154,7 @@ std::string Shape2String(const std::vector& shape); // input_name is the name of the initializer // input_description is the string describing the input in the output message (if any) bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit, - const std::string& input_name, const char* input_description) ORT_MUST_USE_RESULT; + const std::string& input_name, const char* input_description); } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index b666125b86312..56be9aa755ff7 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -1,16 +1,19 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include -#include -#include +#include "model_builder.h" +#include "core/common/logging/logging.h" +#include "core/common/safeint.h" +#include "core/common/status.h" +#include "core/framework/tensorprotoutils.h" +#include "core/graph/graph_viewer.h" #include "core/providers/common.h" #include "core/providers/shared/node_unit/node_unit.h" #include "core/providers/shared/utils/utils.h" #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h" + #include "helper.h" -#include "model_builder.h" #include "op_builder.h" #include "op_support_checker.h" @@ -689,6 +692,10 @@ std::string ModelBuilder::GetUniqueName(const std::string& base_name) { return unique_name; } +const InitializedTensorSet& ModelBuilder::GetInitializerTensors() const { + return graph_viewer_.GetAllInitializedTensors(); +} + void ModelBuilder::RegisterNHWCOperand(const std::string& name) { nhwc_operands_.insert(name); } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 7b188d1677b78..0fbeec7dc5d98 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -5,19 +5,22 @@ #include #include -#include +#include "core/graph/basic_types.h" #include "core/providers/nnapi/nnapi_builtin/model.h" #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h" -#include "op_support_checker.h" #include "shaper.h" namespace onnxruntime { +class GraphViewer; class NodeUnit; +class Node; +class NodeArg; namespace nnapi { class IOpBuilder; +class IOpSupportChecker; class ModelBuilder { public: @@ -37,28 +40,28 @@ class ModelBuilder { ModelBuilder(const GraphViewer& graph_viewer); - Status Compile(std::unique_ptr& model) ORT_MUST_USE_RESULT; + common::Status Compile(std::unique_ptr& model); int32_t GetNNAPIFeatureLevel() const; // Add an NNAPI operation (operator) - Status AddOperation(int op, const std::vector& input_indices, - const std::vector& output_names, - const std::vector& types, - const std::vector& is_nhwc_vec) ORT_MUST_USE_RESULT; + common::Status AddOperation(int op, const std::vector& input_indices, + const std::vector& output_names, + const std::vector& types, + const std::vector& is_nhwc_vec); // Find if an output has a fuseable activation (Relu) int32_t FindActivation(const NodeUnit& node_unit, const NodeArg& output); // Add an NNAPI scalar operand - Status AddOperandFromScalar(bool value, uint32_t& index) ORT_MUST_USE_RESULT; - Status AddOperandFromScalar(float value, uint32_t& index) ORT_MUST_USE_RESULT; - Status AddOperandFromScalar(int32_t value, uint32_t& index) ORT_MUST_USE_RESULT; + common::Status AddOperandFromScalar(bool value, uint32_t& index); + common::Status AddOperandFromScalar(float value, uint32_t& index); + common::Status AddOperandFromScalar(int32_t value, uint32_t& index); // Add an NNAPI tensor operand (and allocate persist buffer) - Status AddOperandFromPersistMemoryBuffer( + common::Status AddOperandFromPersistMemoryBuffer( const std::string& name, const void* buffer, - const android::nn::wrapper::OperandType& operand_type) ORT_MUST_USE_RESULT; + const android::nn::wrapper::OperandType& operand_type); // The initializer will be processed separately, skip it as an initializer void AddInitializerToSkip(const std::string& tensor_name); @@ -98,7 +101,7 @@ class ModelBuilder { const std::unordered_set& GetFusedActivations() const { return fused_activations_; } - const InitializedTensorSet& GetInitializerTensors() const { return graph_viewer_.GetAllInitializedTensors(); } + const InitializedTensorSet& GetInitializerTensors() const; const GraphViewer& GetGraphViewer() const { return graph_viewer_; } @@ -112,10 +115,10 @@ class ModelBuilder { // Get the NodeUnit which contains the given node const NodeUnit& GetNodeUnit(const Node* node) const; - Status SetNHWCToNCHWOperandMap(const std::string& nhwc_name, - const std::string& nchw_name) ORT_MUST_USE_RESULT; - Status SetNCHWToNHWCOperandMap(const std::string& nchw_name, - const std::string& nhwc_name) ORT_MUST_USE_RESULT; + common::Status SetNHWCToNCHWOperandMap(const std::string& nhwc_name, + const std::string& nchw_name); + common::Status SetNCHWToNHWCOperandMap(const std::string& nchw_name, + const std::string& nhwc_name); private: const NnApi* nnapi_{nullptr}; @@ -174,19 +177,19 @@ class ModelBuilder { uint32_t next_index_ = 0; // Convert the onnx model to ANeuralNetworksModel - Status Prepare() ORT_MUST_USE_RESULT; + common::Status Prepare(); - Status GetTargetDevices() ORT_MUST_USE_RESULT; + common::Status GetTargetDevices(); // If a NNAPI operation will use initializers directly, we will add the initializers to the skip list void PreprocessInitializers(); // Preprocess all the activation nodes (Relu/Relu1/Relu6) for easy query later void PreprocessActivations(); // Copy and process all the initializers to NNAPI model - Status RegisterInitializers() ORT_MUST_USE_RESULT; - Status RegisterModelInputs() ORT_MUST_USE_RESULT; - Status AddOperations() ORT_MUST_USE_RESULT; - Status RegisterModelOutputs() ORT_MUST_USE_RESULT; + common::Status RegisterInitializers(); + common::Status RegisterModelInputs(); + common::Status AddOperations(); + common::Status RegisterModelOutputs(); // After constructing the NNAPI model, will set the shape inferencing record to the Model void RegisterModelShaper(); @@ -197,14 +200,13 @@ class ModelBuilder { // using the result of PreprocessNodeUnits, this need to run early in the Prepare() void PreprocessNodeUnits(); - Status SetOperandValue(uint32_t index, Model::NNMemory* memory, - size_t size, size_t offset) ORT_MUST_USE_RESULT; + common::Status SetOperandValue(uint32_t index, Model::NNMemory* memory, size_t size, size_t offset); - Status AddNewNNAPIOperand(const android::nn::wrapper::OperandType& type, uint32_t& index) ORT_MUST_USE_RESULT; - Status AddNewOperand(const std::string& name, - const android::nn::wrapper::OperandType& operand_type, - bool is_nhwc, - uint32_t& index) ORT_MUST_USE_RESULT; + common::Status AddNewNNAPIOperand(const android::nn::wrapper::OperandType& type, uint32_t& index); + common::Status AddNewOperand(const std::string& name, + const android::nn::wrapper::OperandType& operand_type, + bool is_nhwc, + uint32_t& index); static const IOpBuilder* GetOpBuilder(const NodeUnit& node_unit); }; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 1faaffaee30bb..9689f72156eba 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -3,12 +3,13 @@ #include "op_builder.h" -#include -#include -#include -#include #include +#include "core/common/logging/logging.h" +#include "core/common/safeint.h" +#include "core/framework/tensorprotoutils.h" +#include "core/graph/graph_viewer.h" +#include "core/providers/common.h" #include "core/providers/shared/utils/utils.h" #include "core/providers/shared/node_unit/node_unit.h" #include "core/providers/cpu/tensor/slice_helper.h" @@ -35,12 +36,6 @@ struct OpBuilderRegistrations { input_indices.push_back(_index); \ } -Status AddTransposeOperator(ModelBuilder& model_builder, - const std::string& input, - const std::string& perm_name, - std::vector perm, - const std::string& output, - bool output_is_nhwc) ORT_MUST_USE_RESULT; Status AddTransposeOperator(ModelBuilder& model_builder, const std::string& input, const std::string& perm_name, @@ -67,10 +62,6 @@ Status AddTransposeOperator(ModelBuilder& model_builder, {output_operand_type}, {output_is_nhwc}); } -Status TransposeBetweenNCHWAndNHWC(ModelBuilder& model_builder, - const std::string& input, - const std::string& output, - bool nchw_to_nhwc) ORT_MUST_USE_RESULT; Status TransposeBetweenNCHWAndNHWC(ModelBuilder& model_builder, const std::string& input, const std::string& output, @@ -108,18 +99,12 @@ Status TransposeBetweenNCHWAndNHWC(ModelBuilder& model_builder, return Status::OK(); } -Status TransposeNHWCToNCHW(ModelBuilder& model_builder, - const std::string& input, - const std::string& output) ORT_MUST_USE_RESULT; Status TransposeNHWCToNCHW(ModelBuilder& model_builder, const std::string& input, const std::string& output) { return TransposeBetweenNCHWAndNHWC(model_builder, input, output, false /* nchw_to_nhwc */); } -Status TransposeNCHWToNHWC(ModelBuilder& model_builder, - const std::string& input, - const std::string& output) ORT_MUST_USE_RESULT; Status TransposeNCHWToNHWC(ModelBuilder& model_builder, const std::string& input, const std::string& output) { @@ -152,9 +137,6 @@ Status GetNCHWInput(ModelBuilder& model_builder, const NodeUnit& node_unit, size // and return the layout type of output tensor // If both inputs have same layout, the output will have the same layout // Otherwise we will need transpose the nhwc input back to nchw, and output will be nchw -Status TransposeBinaryOpInputLayout(ModelBuilder& model_builder, const NodeUnit& node_unit, - std::string& input1, std::string& input2, - bool& output_is_nhwc) ORT_MUST_USE_RESULT; Status TransposeBinaryOpInputLayout(ModelBuilder& model_builder, const NodeUnit& node_unit, std::string& input1, std::string& input2, bool& output_is_nhwc) { @@ -184,17 +166,7 @@ static Status AddBinaryOperator(int32_t op_type, const std::string& output, bool output_is_nhwc, float output_scale = 0.0f, - int32_t output_zero_point = 0) ORT_MUST_USE_RESULT; -static Status AddBinaryOperator(int32_t op_type, - ModelBuilder& model_builder, - const std::string& input1, - const std::string& input2, - bool add_activation, - int32_t fuse_code, - const std::string& output, - bool output_is_nhwc, - float output_scale, - int32_t output_zero_point) { + int32_t output_zero_point = 0) { auto& shaper(model_builder.GetShaper()); const auto& operand_indices(model_builder.GetOperandIndices()); const auto& operand_types(model_builder.GetOperandTypes()); @@ -215,10 +187,6 @@ static Status AddBinaryOperator(int32_t op_type, return Status::OK(); } -static Status AddSqueezeOp(ModelBuilder& model_builder, - const std::string& node_name, - const std::string& input, const std::string& output, - std::vector axes) ORT_MUST_USE_RESULT; static Status AddSqueezeOp(ModelBuilder& model_builder, const std::string& node_name, const std::string& input, const std::string& output, @@ -279,11 +247,6 @@ enum DataLayout { // since NNAPI requires X and W to be same type for per-tensor quantization, // the initializer tensor W will be converted from int8 to uint8 by flip each byte by XOR 0x80 // byte ^ 0x80 == byte + 128 -static Status AddInitializerInNewLayout(ModelBuilder& model_builder, - const std::string& name, - const OperandType& source_operand_type, - DataLayout new_layout, - bool is_per_tensor_u8s8) ORT_MUST_USE_RESULT; static Status AddInitializerInNewLayout(ModelBuilder& model_builder, const std::string& name, const OperandType& source_operand_type, @@ -369,10 +332,6 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder, // and input B is signed int8), in this case, since NNAPI requires A and B to be same type, // the initializer tensor B will be converted from int8 to uint8 by flip each byte by XOR 0x80 // byte ^ 0x80 == byte + 128 -static Status AddInitializerTransposed(ModelBuilder& model_builder, - const OperandType& source_operand_type, - const std::string& name, - bool is_per_tensor_u8s8) ORT_MUST_USE_RESULT; static Status AddInitializerTransposed(ModelBuilder& model_builder, const OperandType& source_operand_type, const std::string& name, @@ -421,12 +380,6 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder, return model_builder.AddOperandFromPersistMemoryBuffer(name, &buffer[0], operand_type); } -static Status ComputeConvPads( - const Shape& input_dimen, - const uint32_t weight_size_y, const uint32_t weight_size_x, - const std::vector& onnx_pads, const std::vector& onnx_strides, const std::vector& onnx_dilations, - AutoPadType auto_pad_type, bool nchw, - std::vector& pads_out) ORT_MUST_USE_RESULT; static Status ComputeConvPads( const Shape& input_dimen, const uint32_t weight_size_y, const uint32_t weight_size_x, @@ -460,16 +413,6 @@ static Status ComputeConvPads( return Status::OK(); } -static Status HandleAutoPad(const Shape& input_shape, - const uint32_t weight_size_y, - const uint32_t weight_size_x, - const std::vector& onnx_strides, - const std::vector& onnx_dilations, - AutoPadType auto_pad_type, - bool use_nchw, - std::vector& onnx_pads, - int32_t& nnapi_padding_code, - bool& use_auto_pad) ORT_MUST_USE_RESULT; static Status HandleAutoPad(const Shape& input_shape, const uint32_t weight_size_y, const uint32_t weight_size_x, @@ -511,10 +454,6 @@ static Status HandleAutoPad(const Shape& input_shape, // Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output) // QLinearConv, QLinearMatmul, QLinearAdd // a, b are inputs, and y is output -static Status GetBinaryOpQuantizationScaleAndZeroPoint( - const InitializedTensorSet& initializers, const NodeUnit& node_unit, - float& a_scale, float& b_scale, float& y_scale, - int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) ORT_MUST_USE_RESULT; static Status GetBinaryOpQuantizationScaleAndZeroPoint( const InitializedTensorSet& initializers, const NodeUnit& node_unit, float& a_scale, float& b_scale, float& y_scale, @@ -538,11 +477,6 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint( // If the Qlinear[Conv/MatMul] is using per-tensor u8s8, the weight/B tensor // will be convert to uint8 later, will return the same scale and 128 as zero point // Also will set is_per_tensor_u8s8 to true to be used later -static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( - const ModelBuilder& model_builder, const NodeUnit& node_unit, - float& a_scale, float& w_scale, float& y_scale, - int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point, - optional>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT; static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( const ModelBuilder& model_builder, const NodeUnit& node_unit, float& a_scale, float& w_scale, float& y_scale, @@ -596,10 +530,6 @@ static Status GetConvMatMulOpQuantizationScaleAndZeroPoint( // NNAPI has the quantization scale and zero point embedded in the ANeuralNetworksOperandType // ONNX has the quantization scale and zero point as the inputs of the qlinear operators // We want to verify the scale and zeropoint of the ONNX inputs matches the values embedded in the NNAPI inputs -static Status IsValidInputQuantizedType(const ModelBuilder& model_builder, - const std::string& input_name, - float scale, - int32_t zero_point) ORT_MUST_USE_RESULT; static Status IsValidInputQuantizedType(const ModelBuilder& model_builder, const std::string& input_name, float scale, @@ -622,11 +552,6 @@ static Status IsValidInputQuantizedType(const ModelBuilder& model_builder, return Status::OK(); } -static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, - const std::string& input_name, - float scale, - int32_t zero_point, - const optional>& scales) ORT_MUST_USE_RESULT; static Status IsValidConvWeightQuantizedType(const ModelBuilder& model_builder, const std::string& input_name, float scale, @@ -692,10 +617,10 @@ class BaseOpBuilder : public IOpBuilder { public: virtual ~BaseOpBuilder() = default; virtual void AddInitializersToSkip(ModelBuilder& /* model_builder */, const NodeUnit& /* node_unit */) const override {} - Status AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const override final ORT_MUST_USE_RESULT; + Status AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const override final; protected: - virtual Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const ORT_MUST_USE_RESULT = 0; + virtual Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const = 0; static bool IsOpSupported(const ModelBuilder& model_builder, const NodeUnit& node_unit) ORT_MUST_USE_RESULT; }; @@ -727,7 +652,7 @@ class BinaryOpBuilder : public BaseOpBuilder { private: static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; /* static */ bool BinaryOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { @@ -826,7 +751,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const class ReluOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status ReluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -860,7 +785,7 @@ Status ReluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N class TransposeOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -906,10 +831,10 @@ class ReshapeOpBuilder : public BaseOpBuilder { public: void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; static Status AddReshapeOperator(ModelBuilder& model_builder, const NodeUnit& node_unit, - const std::string& input, const std::vector& shape) ORT_MUST_USE_RESULT; + const std::string& input, const std::vector& shape); private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; static bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit, size_t input_rank, size_t output_rank); }; @@ -1062,7 +987,7 @@ class BatchNormalizationOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; void BatchNormalizationOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -1176,7 +1101,7 @@ class PoolOpBuilder : public BaseOpBuilder { private: static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; /* static */ bool PoolOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { @@ -1330,7 +1255,7 @@ class ConvOpBuilder : public BaseOpBuilder { private: static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; /* static */ bool ConvOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { @@ -1595,7 +1520,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N class CastOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -1635,7 +1560,7 @@ Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N class SoftMaxOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -1686,7 +1611,7 @@ Status SoftMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons class IdentityOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status IdentityOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -1720,7 +1645,7 @@ class GemmOpBuilder : public BaseOpBuilder { private: static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; /* static */ bool GemmOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { @@ -1881,7 +1806,7 @@ class UnaryOpBuilder : public BaseOpBuilder { private: static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; /* static */ bool UnaryOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) { @@ -1982,7 +1907,7 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const class ConcatOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2078,7 +2003,7 @@ class SqueezeOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; static Status GetAxes(ModelBuilder& model_builder, const NodeUnit& node_unit, std::vector& axes); }; @@ -2135,7 +2060,7 @@ class QuantizeLinearOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; void QuantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2174,7 +2099,7 @@ class DequantizeLinearOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; void DequantizeLinearOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2213,7 +2138,7 @@ Status DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_buil class LRNOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2274,7 +2199,7 @@ class ClipOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; void ClipOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2333,7 +2258,7 @@ class ResizeOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2439,7 +2364,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const class FlattenOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2473,10 +2398,10 @@ class MinMaxOpBuilder : public BaseOpBuilder { static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; static Status AddMinMaxOperator(ModelBuilder& model_builder, const NodeUnit& node_unit, const std::string& input1, const std::string& input2, - bool output_is_nhwc) ORT_MUST_USE_RESULT; + bool output_is_nhwc); }; /* static */ void MinMaxOpBuilder::CreateSharedOpBuilder( @@ -2536,7 +2461,7 @@ Status MinMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const class EluOpBuilder : public BaseOpBuilder { private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; Status EluOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { @@ -2566,7 +2491,7 @@ class SliceOpBuilder : public BaseOpBuilder { void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; private: - Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override ORT_MUST_USE_RESULT; + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const override; }; void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h index f25a6591040bc..6483c432f1442 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.h @@ -7,7 +7,6 @@ #include #include "core/graph/basic_types.h" -#include "core/session/onnxruntime_c_api.h" namespace onnxruntime { @@ -31,7 +30,7 @@ class IOpBuilder { virtual void AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const = 0; // Add the operator to NNAPI model - virtual common::Status AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const ORT_MUST_USE_RESULT = 0; + virtual common::Status AddToModelBuilder(ModelBuilder& model_builder, const NodeUnit& node_unit) const = 0; }; // Get the lookup table with IOpBuilder delegates for different onnx operators @@ -40,8 +39,7 @@ class IOpBuilder { const std::unordered_map& GetOpBuilders(); // Transpose the NHWC input to NCHW output -common::Status TransposeNHWCToNCHW(ModelBuilder& model_builder, const std::string& input, const std::string& output) - ORT_MUST_USE_RESULT; +common::Status TransposeNHWCToNCHW(ModelBuilder& model_builder, const std::string& input, const std::string& output); } // namespace nnapi } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 452eec62a80c7..bc5a326bfe0c5 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -1,16 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include -#include -#include -#include +#include "op_support_checker.h" +#include "core/common/logging/logging.h" +#include "core/common/safeint.h" +#include "core/framework/tensorprotoutils.h" +#include "core/graph/graph.h" #include "core/providers/common.h" #include "core/providers/shared/node_unit/node_unit.h" #include "core/providers/shared/utils/utils.h" #include "helper.h" -#include "op_support_checker.h" namespace onnxruntime { namespace nnapi { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc index 2bcc167622b3e..1652237622e71 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.cc @@ -3,8 +3,8 @@ #include "core/providers/common.h" -#include "helper.h" #include "shaper.h" +#include "helper.h" namespace onnxruntime { namespace nnapi { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h index b9299454dce44..8656328804f46 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/shaper.h @@ -6,7 +6,8 @@ #include #include #include -#include + +#include "core/common/status.h" namespace onnxruntime { namespace nnapi { @@ -20,115 +21,103 @@ class Shaper { return shape_map_.at(key); } - Status Conv(const std::string& input_name, - const std::string& weight_name, - const std::vector& onnx_pads, - const std::vector& onnx_strides, - const std::vector& onnx_dilations, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - - Status DepthwiseConv(const std::string& input_name, - const std::string& weight_name, - const std::vector& onnx_pads, - const std::vector& onnx_strides, - const std::vector& onnx_dilations, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - - Status Pool(const std::string& input_name, - const std::vector& onnx_pads, - const std::vector& onnx_strides, - const std::vector& kernel_shape, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - - Status Reshape(const std::string& input_name, const std::vector& shape, const std::string& output_name) - ORT_MUST_USE_RESULT; - - Status Transpose(const std::string& input_name, const std::vector& perm, const std::string& output_name) - ORT_MUST_USE_RESULT; - - Status Eltwise(const std::string& input1_name, const std::string& input2_name, const std::string& output_name) - ORT_MUST_USE_RESULT; - - Status Identity(const std::string& input_name, const std::string& output_name) ORT_MUST_USE_RESULT; - - Status FC(const std::string& input1_name, const std::string& input2_name, const std::string& output_name) - ORT_MUST_USE_RESULT; - - Status Concat(const std::vector& input_names, const int32_t axis, const std::string& output_name) - ORT_MUST_USE_RESULT; - - Status Squeeze(const std::string& input_name, const std::vector& axes, const std::string& output_name) - ORT_MUST_USE_RESULT; - - Status ResizeUsingScales(const std::string& input_name, - const float scale_h, const float scale_w, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - Status ResizeUsingOutputSizes(const std::string& input_name, - const uint32_t output_h, const uint32_t output_w, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; + common::Status Conv(const std::string& input_name, + const std::string& weight_name, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, + bool nchw, + const std::string& output_name); + + common::Status DepthwiseConv(const std::string& input_name, + const std::string& weight_name, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, + bool nchw, + const std::string& output_name); + + common::Status Pool(const std::string& input_name, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& kernel_shape, + bool nchw, + const std::string& output_name); + + common::Status Reshape(const std::string& input_name, const std::vector& shape, const std::string& output_name); + + common::Status Transpose(const std::string& input_name, const std::vector& perm, const std::string& output_name); + + common::Status Eltwise(const std::string& input1_name, const std::string& input2_name, const std::string& output_name); + + common::Status Identity(const std::string& input_name, const std::string& output_name); + + common::Status FC(const std::string& input1_name, const std::string& input2_name, const std::string& output_name); + + common::Status Concat(const std::vector& input_names, const int32_t axis, const std::string& output_name); + + common::Status Squeeze(const std::string& input_name, const std::vector& axes, const std::string& output_name); + + common::Status ResizeUsingScales(const std::string& input_name, + const float scale_h, const float scale_w, + bool nchw, + const std::string& output_name); + common::Status ResizeUsingOutputSizes(const std::string& input_name, + const uint32_t output_h, const uint32_t output_w, + bool nchw, + const std::string& output_name); // If the shape of certain input is dynamic // Use the following 2 functions to update the particular shape // and calculate the new output shape // Only perform this when the NNAPI model is finalized! - Status UpdateShape(const std::string& name, const Shape& new_shape) ORT_MUST_USE_RESULT; - Status UpdateDynamicDimensions() ORT_MUST_USE_RESULT; + common::Status UpdateShape(const std::string& name, const Shape& new_shape); + common::Status UpdateDynamicDimensions(); void Clear(); private: - Status ConvImpl(const std::string& input_name, - const std::string& weight_name, - const std::vector& onnx_pads, - const std::vector& onnx_strides, - const std::vector& onnx_dilations, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - - Status DepthwiseConvImpl(const std::string& input_name, - const std::string& weight_name, - const std::vector& onnx_pads, - const std::vector& onnx_strides, - const std::vector& onnx_dilations, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - - Status PoolImpl(const std::string& input_name, - const std::vector& onnx_pads, - const std::vector& onnx_strides, - const std::vector& kernel_shape, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - - Status ReshapeImpl(const std::string& input_name, const std::vector& shape, const std::string& output_name) - ORT_MUST_USE_RESULT; - Status TransposeImpl(const std::string& input_name, const std::vector& perm, const std::string& output_name) - ORT_MUST_USE_RESULT; - Status EltwiseImpl(const std::string& input1_name, const std::string& input2_name, const std::string& output_name) - ORT_MUST_USE_RESULT; - Status IdentityImpl(const std::string& input_name, const std::string& output_name) ORT_MUST_USE_RESULT; - Status FCImpl(const std::string& input1_name, const std::string& input2_name, const std::string& output_name) - ORT_MUST_USE_RESULT; - Status ConcatImpl(const std::vector& input_names, const int32_t axis, const std::string& output_name) - ORT_MUST_USE_RESULT; - Status SqueezeImpl(const std::string& input_names, const std::vector& axes, const std::string& output_name) - ORT_MUST_USE_RESULT; - Status ResizeUsingScalesImpl(const std::string& input_name, - const float scale_h, const float scale_w, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; - Status ResizeUsingOutputSizesImpl(const std::string& input_name, - const uint32_t output_h, const uint32_t output_w, - bool nchw, - const std::string& output_name) ORT_MUST_USE_RESULT; + common::Status ConvImpl(const std::string& input_name, + const std::string& weight_name, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, + bool nchw, + const std::string& output_name); + + common::Status DepthwiseConvImpl(const std::string& input_name, + const std::string& weight_name, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& onnx_dilations, + bool nchw, + const std::string& output_name); + + common::Status PoolImpl(const std::string& input_name, + const std::vector& onnx_pads, + const std::vector& onnx_strides, + const std::vector& kernel_shape, + bool nchw, + const std::string& output_name); + + common::Status ReshapeImpl(const std::string& input_name, const std::vector& shape, const std::string& output_name); + common::Status TransposeImpl(const std::string& input_name, const std::vector& perm, const std::string& output_name); + common::Status EltwiseImpl(const std::string& input1_name, const std::string& input2_name, const std::string& output_name); + common::Status IdentityImpl(const std::string& input_name, const std::string& output_name); + common::Status FCImpl(const std::string& input1_name, const std::string& input2_name, const std::string& output_name); + common::Status ConcatImpl(const std::vector& input_names, const int32_t axis, const std::string& output_name); + common::Status SqueezeImpl(const std::string& input_names, const std::vector& axes, const std::string& output_name); + common::Status ResizeUsingScalesImpl(const std::string& input_name, + const float scale_h, const float scale_w, + bool nchw, + const std::string& output_name); + common::Status ResizeUsingOutputSizesImpl(const std::string& input_name, + const uint32_t output_h, const uint32_t output_w, + bool nchw, + const std::string& output_name); std::unordered_map shape_map_; - std::vector> shape_ops_; + std::vector> shape_ops_; }; } // namespace nnapi diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc index 7a2036252ae8f..887384e6bd3bc 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.cc @@ -1,9 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include - #include "model.h" + +#include "core/common/logging/logging.h" #include "core/providers/common.h" #include "core/providers/nnapi/nnapi_builtin/builders/helper.h" #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h" diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h index 8ce72538affc4..6326e60cf9797 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/model.h @@ -103,7 +103,7 @@ class Model { // this output may need special handling bool IsScalarOutput(const std::string& output_name) const; - Status PrepareForExecution(std::unique_ptr& execution) ORT_MUST_USE_RESULT; + common::Status PrepareForExecution(std::unique_ptr& execution); private: const NnApi* nnapi_{nullptr}; @@ -143,7 +143,7 @@ class Model { void AddScalarOutput(const std::string& output_name); - void SetShaper(const Shaper shaper) { shaper_ = shaper; } + void SetShaper(const Shaper& shaper) { shaper_ = shaper; } int32_t GetNNAPIFeatureLevel() const; }; @@ -172,17 +172,16 @@ class Execution { // Set the input/output data buffers // These need to be called before calling Predict() - Status SetInputBuffers(const std::vector& inputs) ORT_MUST_USE_RESULT; - Status SetOutputBuffers(const std::vector& outputs) ORT_MUST_USE_RESULT; + common::Status SetInputBuffers(const std::vector& inputs); + common::Status SetOutputBuffers(const std::vector& outputs); // Execute the NNAPI model // if there is dynamic output shape, will output the actual output shapes - Status Predict(const std::vector& dynamic_outputs, std::vector& dynamic_output_shapes) - ORT_MUST_USE_RESULT; + common::Status Predict(const std::vector& dynamic_outputs, std::vector& dynamic_output_shapes); private: - Status SetInputBuffer(const int32_t index, const InputBuffer& input) ORT_MUST_USE_RESULT; - Status SetOutputBuffer(const int32_t index, const OutputBuffer& output) ORT_MUST_USE_RESULT; + common::Status SetInputBuffer(const int32_t index, const InputBuffer& input); + common::Status SetOutputBuffer(const int32_t index, const OutputBuffer& output); const NnApi* nnapi_{nullptr}; ANeuralNetworksExecution* execution_; diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc index 150db8f593e11..85a0cf3ad13fc 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc @@ -187,14 +187,6 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view } #ifdef __ANDROID__ -static Status GetOutputBuffer(Ort::CustomOpApi& ort, - OrtKernelContext* context, - const nnapi::Model& model, - const std::string& output_name, - const std::vector& output_shape, - const android::nn::wrapper::Type output_type, - void** output_buffer) ORT_MUST_USE_RESULT; - static Status GetOutputBuffer(Ort::CustomOpApi& ort, OrtKernelContext* context, const nnapi::Model& model, From e075eaac338cf10b073be13d9c6c39847bfacee7 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 12 Jan 2022 17:02:12 -0800 Subject: [PATCH 21/23] Simplify FindActivation --- .../nnapi_builtin/builders/model_builder.cc | 20 ++++++++++++++++--- .../nnapi_builtin/builders/model_builder.h | 5 +++-- .../nnapi_builtin/builders/op_builder.cc | 10 +++++----- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 56be9aa755ff7..12beec0f9419c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -627,12 +627,26 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { return Status::OK(); } -int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit, const NodeArg& output) { +int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) { int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; - if (node_unit.GetOutputNodes().size() != 1) + const auto& output_nodes = node_unit.GetOutputNodes(); + if (node_unit.GetOutputNodes().size() != 1) { + LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name() + << "] type [" << node_unit.OpType() + << "], with " << output_nodes.size() << " output nodes"; return fuse_code; + } + + const auto& outputs = node_unit.Outputs(); + if (outputs.size() != 1) { + LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name() + << "] type [" << node_unit.OpType() + << "], with " << outputs.size() << " outputs"; + return fuse_code; + } - const auto& output_node = *node_unit.GetOutputNodes()[0]; + const NodeArg& output = outputs[0].node_arg; + const auto& output_node = *output_nodes[0]; // TODO, add support of activation fusion for quantized node group (qdq or qlinear) // We do not support activation fusion for quantized operators for now diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 0fbeec7dc5d98..2269c986f60ea 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -50,8 +50,9 @@ class ModelBuilder { const std::vector& types, const std::vector& is_nhwc_vec); - // Find if an output has a fuseable activation (Relu) - int32_t FindActivation(const NodeUnit& node_unit, const NodeArg& output); + // Find if the given node_unit has a fuseable activation (Relu/Relu1/Relu6) + // For now we only support node_unit with a single output + int32_t FindActivation(const NodeUnit& node_unit); // Add an NNAPI scalar operand common::Status AddOperandFromScalar(bool value, uint32_t& index); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index 9689f72156eba..acab410be1462 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -736,7 +736,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE; if (add_activation) { - fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); + fuse_code = model_builder.FindActivation(node_unit); } return AddBinaryOperator(op_code, model_builder, @@ -1079,7 +1079,7 @@ Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_bu output_is_nhwc)); // Add - int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit); ORT_RETURN_IF_ERROR(AddBinaryOperator(ANEURALNETWORKS_ADD, model_builder, tensor_imm_product_name, tensor_b_name, @@ -1192,7 +1192,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit); // Get output scale and zero point if this is QLinearAveragePool // Otherwise we will use the scale and zero point of the input @@ -1474,7 +1474,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N } } - int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); if (model_builder.GetNNAPIFeatureLevel() > ANEURALNETWORKS_FEATURE_LEVEL_2) { @@ -1785,7 +1785,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N input_indices.push_back(operand_indices.at(input1)); // A input_indices.push_back(input_2_idx); // B input_indices.push_back(bias_idx); // C - int32_t fuse_code = model_builder.FindActivation(node_unit, node_unit.Outputs()[0].node_arg); + int32_t fuse_code = model_builder.FindActivation(node_unit); ADD_SCALAR_OPERAND(model_builder, input_indices, fuse_code); ORT_RETURN_IF_ERROR(shaper.FC(input1, input2, output)); From 613035cce12aa65ce007a036d583ebc753890fdd Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 14 Jan 2022 16:24:56 -0800 Subject: [PATCH 22/23] address CR comments --- .../core/providers/nnapi/nnapi_builtin/builders/helper.cc | 6 +++--- .../providers/nnapi/nnapi_builtin/builders/model_builder.cc | 6 ++---- .../providers/nnapi/nnapi_builtin/builders/op_builder.cc | 5 ++--- .../nnapi/nnapi_builtin/builders/op_support_checker.cc | 4 ++-- onnxruntime/core/providers/shared/node_unit/node_unit.h | 2 +- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc index a4097d6716026..af93017649e78 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc @@ -153,9 +153,9 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs(); for (const auto idx : indices) { if (idx >= io_defs.size()) { - LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationScales, " - << (is_input ? "Input" : "Output") << " index, " << idx - << " >= size, " << io_defs.size(); + LOGS_DEFAULT(VERBOSE) << (is_input ? "Input" : "Output") << " index, " << idx + << " >= size, " << io_defs.size() + << " of NodeUnit: " << node_unit.Name(); return false; } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 12beec0f9419c..fe6eade431770 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -173,16 +173,14 @@ void ModelBuilder::GetAllQuantizedOpInputs() { auto qlinear_op_type = GetQLinearOpType(node_unit->GetNode()); // Not a qlinear op + // TODO, add handling for QDQ NodeUnit if (qlinear_op_type == QLinearOpType::Unknown) continue; const auto add_quantized_input = [&all_quantized_op_inputs = all_quantized_op_inputs_](const NodeUnit& node_unit, size_t input_idx) { const auto& input_name = node_unit.Inputs()[input_idx].node_arg.Name(); - if (Contains(all_quantized_op_inputs, input_name)) - all_quantized_op_inputs.at(input_name).push_back(&node_unit); - else - all_quantized_op_inputs.emplace(input_name, std::vector{&node_unit}); + all_quantized_op_inputs[input_name].push_back(&node_unit); }; // All qlinear ops EXCEPT QuantizeLinear has quantized input diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc index acab410be1462..bff260cb6741c 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc @@ -791,7 +791,7 @@ class TransposeOpBuilder : public BaseOpBuilder { Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const NodeUnit& node_unit) const { auto& shaper(model_builder.GetShaper()); - auto input = node_unit.Inputs()[0].node_arg.Name(); + const auto& input = node_unit.Inputs()[0].node_arg.Name(); const auto& output = node_unit.Outputs()[0].node_arg.Name(); NodeAttrHelper helper(node_unit); std::vector perm = helper.Get("perm", std::vector()); @@ -1672,7 +1672,6 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod AddQuantizationScaleAndZeroPointToSkip(model_builder, *node_unit.Outputs()[0].quant_param); // y_scale, y_zp } else { const auto& op = node_unit.OpType(); - const auto& inputs = node_unit.Inputs(); if (op == "MatMul") { model_builder.AddInitializerToSkip(inputs[1].node_arg.Name()); } else if (op == "Gemm") { @@ -1956,7 +1955,7 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const output_is_nhwc = model_builder.IsOperandNHWC(input0); for (size_t i = 0; i < node_input_size; i++) { - auto input = inputs[i].node_arg.Name(); + const auto& input = inputs[i].node_arg.Name(); input_indices.push_back(operand_indices.at(input)); input_names.push_back(input); } diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index bc5a326bfe0c5..9bd25abc62a77 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -46,7 +46,7 @@ bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node return true; if (!input.quant_param) - return false; + continue; if (is_ext_initializer(input.quant_param->scale)) return true; @@ -314,7 +314,7 @@ bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi if (op_is_qlinear) { // For QLinearAdd, we only support uint8 output now int32_t output_type; - if (!GetType(inputs[0].node_arg, output_type)) + if (!GetType(node_unit.Outputs()[0].node_arg, output_type)) return false; if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) { diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.h b/onnxruntime/core/providers/shared/node_unit/node_unit.h index 73fcca032a013..bea1e476a72da 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.h +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.h @@ -25,7 +25,7 @@ struct NodeGroup; // If the optional quant_param is present, then this is a quantized input, // otherwise this is a regular input struct NodeUnitIODef { - // The quantization parmeter, scale is manadatory, and zero_point is optional + // The quantization parameter, scale is manadatory, and zero_point is optional struct QuantParam { const NodeArg& scale; const NodeArg* zero_point{nullptr}; From e0191e59d542f1d379da4ed2a3cb095745fad5fc Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Fri, 14 Jan 2022 17:44:18 -0800 Subject: [PATCH 23/23] address CR comments --- .../builders/op_support_checker.cc | 6 ++--- .../providers/shared/node_unit/node_unit.cc | 27 +++++++++---------- .../providers/shared/node_unit/node_unit.h | 6 ++--- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc index 9bd25abc62a77..75eab4c837d00 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc @@ -1268,10 +1268,10 @@ bool SqueezeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init if (!GetShape(inputs[0].node_arg, input_shape)) return false; - const auto input_dim = input_shape.size(); - if (input_dim > 4 || input_dim == 0) { + const auto input_rank = input_shape.size(); + if (input_rank > 4 || input_rank == 0) { LOGS_DEFAULT(VERBOSE) << "Squeeze only supports 1-4d shape, input is " - << input_dim << "d shape"; + << input_rank << "d shape"; return false; } diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.cc b/onnxruntime/core/providers/shared/node_unit/node_unit.cc index 3c5a829a4d497..d443fe858f36b 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.cc +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.cc @@ -82,27 +82,24 @@ bool IsVariadicQLinearOp(QLinearOpType type) { } // namespace NodeUnit::NodeUnit(const Node& node) - : nodes_{&node}, - output_nodes_{&node}, - node_(node), + : output_nodes_{&node}, + target_node_(node), type_(Type::SingleNode) { InitForNode(); } -const std::string& NodeUnit::Domain() const noexcept { return node_.Domain(); } -const std::string& NodeUnit::OpType() const noexcept { return node_.OpType(); } -const std::string& NodeUnit::Name() const noexcept { return node_.Name(); } -int NodeUnit::SinceVersion() const noexcept { return node_.SinceVersion(); } -NodeIndex NodeUnit::Index() const noexcept { return node_.Index(); } -const Path& NodeUnit::ModelPath() const noexcept { return node_.ModelPath(); } -ProviderType NodeUnit::GetExecutionProviderType() const noexcept { return node_.GetExecutionProviderType(); } +const std::string& NodeUnit::Domain() const noexcept { return target_node_.Domain(); } +const std::string& NodeUnit::OpType() const noexcept { return target_node_.OpType(); } +const std::string& NodeUnit::Name() const noexcept { return target_node_.Name(); } +int NodeUnit::SinceVersion() const noexcept { return target_node_.SinceVersion(); } +NodeIndex NodeUnit::Index() const noexcept { return target_node_.Index(); } +const Path& NodeUnit::ModelPath() const noexcept { return target_node_.ModelPath(); } +ProviderType NodeUnit::GetExecutionProviderType() const noexcept { return target_node_.GetExecutionProviderType(); } void NodeUnit::InitForNode() { - const auto& input_defs = node_.InputDefs(); - const auto& output_defs = node_.OutputDefs(); - // The 1st step is to hookup the NodeUnit with the NNAPI builder interface - // So we are not handling quantization here now - auto qlinear_type = GetQLinearOpType(node_); + const auto& input_defs = target_node_.InputDefs(); + const auto& output_defs = target_node_.OutputDefs(); + auto qlinear_type = GetQLinearOpType(target_node_); if (qlinear_type == QLinearOpType::Unknown || IsVariadicQLinearOp(qlinear_type)) { // TODO, add variadic support // Not a Qlinear op, add all inputs / outputs diff --git a/onnxruntime/core/providers/shared/node_unit/node_unit.h b/onnxruntime/core/providers/shared/node_unit/node_unit.h index bea1e476a72da..e109703c9316f 100644 --- a/onnxruntime/core/providers/shared/node_unit/node_unit.h +++ b/onnxruntime/core/providers/shared/node_unit/node_unit.h @@ -63,17 +63,15 @@ class NodeUnit { const Path& ModelPath() const noexcept; ProviderType GetExecutionProviderType() const noexcept; - const Node& GetNode() const noexcept { return node_; } + const Node& GetNode() const noexcept { return target_node_; } const std::vector GetOutputNodes() const noexcept { return output_nodes_; } - const std::vector GetAllNodes() const noexcept { return nodes_; } private: std::vector inputs_; std::vector outputs_; - const std::vector nodes_; // all nodes in this NodeUnit const std::vector output_nodes_; // all the nodes producing outputs for this NodeUnit - const Node& node_; // target Node + const Node& target_node_; Type type_; void InitForNode(); // Initializing for single Node