Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QDQ] Move NNAPI EP to use NodeUnitIODef for non-QDQ ops #10237

Merged
merged 28 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5331711
clear using in code
guoyu-wang Dec 17, 2021
e0f844c
add qlinear iodef generation
guoyu-wang Dec 17, 2021
72f0324
Move iodef out of nodeunit
guoyu-wang Dec 17, 2021
b55527b
Move AddInitializersToSkip to use iodef
guoyu-wang Dec 18, 2021
5252da7
minor fix
guoyu-wang Dec 19, 2021
24eb8fd
Merge remote-tracking branch 'origin/master' into gwang-msft/use_node…
guoyu-wang Jan 3, 2022
6189bb1
Move get quantized inputs to NodeUnit
guoyu-wang Jan 3, 2022
2baac12
move isvalidzp to nodeunit, fix some minor bug
guoyu-wang Jan 4, 2022
35b6565
Merge remote-tracking branch 'origin/master' into gwang-msft/use_node…
guoyu-wang Jan 4, 2022
5504358
Let HasValidQuantizationZeroPoints handle output def of node_unit
guoyu-wang Jan 4, 2022
87c1204
move op_support_checked to node_unit
guoyu-wang Jan 6, 2022
1367e84
Merge remote-tracking branch 'origin/master' into gwang-msft/use_node…
guoyu-wang Jan 6, 2022
7ee1ecb
minor update
guoyu-wang Jan 7, 2022
221776b
add activation handleing for node_unit
guoyu-wang Jan 7, 2022
35292cb
add bin and relu support
guoyu-wang Jan 8, 2022
21d0555
move more ops to node_unit
guoyu-wang Jan 9, 2022
61b7e7e
move conv to node_unit
guoyu-wang Jan 10, 2022
7aa4f9d
move gemm/cast to node_unit
guoyu-wang Jan 10, 2022
cc6d468
remove redundant functions
guoyu-wang Jan 11, 2022
edff0b4
remove function postfix
guoyu-wang Jan 11, 2022
09dbdb5
Merge remote-tracking branch 'origin/master' into gwang-msft/use_node…
guoyu-wang Jan 11, 2022
a551aff
fix NNAPI CI failure
guoyu-wang Jan 11, 2022
718e6b8
address CR comments
guoyu-wang Jan 12, 2022
8d0692b
remove redudant ORT_MUST_USE_RESULT, and clear includes
guoyu-wang Jan 13, 2022
e075eaa
Simplify FindActivation
guoyu-wang Jan 13, 2022
613035c
address CR comments
guoyu-wang Jan 15, 2022
e0191e5
address CR comments
guoyu-wang Jan 15, 2022
80caa9b
Merge remote-tracking branch 'origin/master' into gwang-msft/use_node…
guoyu-wang Jan 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 126 additions & 83 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc

Large diffs are not rendered by default.

39 changes: 24 additions & 15 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ namespace onnxruntime {
using Shape = std::vector<uint32_t>;
using InitializerMap = std::unordered_map<std::string, const ONNX_NAMESPACE::TensorProto&>;

class GraphViewer;
class Node;
class NodeArg;
class NodeUnit;
class GraphViewer;
class Path;

struct NodeUnitIODef;

namespace nnapi {

Expand Down Expand Up @@ -94,36 +97,40 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node);

// Return the type of the conv ops,
// This function assumes the input is a 2d conv node
ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers);
ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers);

// This qlinear op is an operator takes 2 inputs and produces 1 output
// Such as QLinearConv, QLinearMatMul, QLinearAdd, ...
bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);

// Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool]
bool HasValidUnaryOpQuantizedInputs(const Node& node);
bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit);
// Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add]
bool HasValidBinaryOpQuantizedInputs(const Node& node);
bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit);

// Check if a qlinear op has valid scales for given indices
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices, const OpSupportCheckParams& params);
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input);

// Check if a qlinear op has valid zero points for given indices
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices);
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const std::vector<size_t>& indices, bool is_input);

common::Status GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node,
size_t idx, float& scale);
common::Status GetQuantizationScaleAndZeroPoint(
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
float& scale, int32_t& zero_point);

common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT;
common::Status GetQuantizationScaleAndZeroPoint(
const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& name,
float& scale, int32_t& zero_point, bool is_input = true);

// Get Shape/Type of a NodeArg
// TODO, move to shared_utils
bool GetShape(const NodeArg& node_arg, Shape& shape);
bool GetType(const NodeArg& node_arg, int32_t& type);

// Get the output shape of Flatten Op
void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2);
void GetFlattenOutputShape(const NodeUnit& node_unit, const Shape& input_shape, int32_t& dim_1, int32_t& dim_2);

// If a node is supported by NNAPI
bool IsNodeSupported(const NodeUnit& node_unit, const GraphViewer& graph_viewer, const OpSupportCheckParams& params);
Expand All @@ -144,8 +151,10 @@ bool IsValidSupportedNodeGroup(const std::vector<const Node*>& supported_node_gr
std::string Shape2String(const std::vector<uint32_t>& shape);

// Check the given input is an initializer tensor
bool CheckIsInitializer(const InitializedTensorSet& initializers, const Node& node,
size_t index, const char* input_name) ORT_MUST_USE_RESULT;
// input_name is the name of the initializer
// input_description is the string describing the input in the output message (if any)
bool CheckIsInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const std::string& input_name, const char* input_description);

} // namespace nnapi
} // namespace onnxruntime
177 changes: 112 additions & 65 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <core/common/logging/logging.h>
#include <core/common/safeint.h>
#include <core/framework/tensorprotoutils.h>
#include "model_builder.h"

#include "core/common/logging/logging.h"
#include "core/common/safeint.h"
#include "core/common/status.h"
#include "core/framework/tensorprotoutils.h"
#include "core/graph/graph_viewer.h"
#include "core/providers/common.h"
#include "core/providers/shared/node_unit/node_unit.h"
#include "core/providers/shared/utils/utils.h"
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"

#include "helper.h"
#include "model_builder.h"
#include "op_builder.h"
#include "op_support_checker.h"

using onnxruntime::NodeUnit;
using namespace android::nn::wrapper;
using std::vector;

namespace onnxruntime {
namespace nnapi {
Expand All @@ -31,7 +32,7 @@ int32_t ModelBuilder::GetNNAPIFeatureLevel() const {
// Scalar operand is copied into the model, no need to persist
#define DEFINE_ADD_OPERAND_FROM_SCALAR(scalar_type, op_type) \
Status ModelBuilder::AddOperandFromScalar(scalar_type value, uint32_t& index) { \
OperandType operandType(Type::op_type, vector<uint32_t>{}); \
OperandType operandType(Type::op_type, std::vector<uint32_t>{}); \
ORT_RETURN_IF_ERROR(AddNewNNAPIOperand(operandType, index)); \
RETURN_STATUS_ON_ERROR_WITH_NOTE( \
nnapi_->ANeuralNetworksModel_setOperandValue( \
Expand All @@ -50,13 +51,12 @@ void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
skipped_initializers_.insert(tensor_name);
}

static std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer);

Status ModelBuilder::Prepare() {
nnapi_model_ = std::unique_ptr<Model>(new Model());
RETURN_STATUS_ON_ERROR(nnapi_->ANeuralNetworksModel_create(&nnapi_model_->model_));
ORT_RETURN_IF_ERROR(GetTargetDevices());
all_quantized_op_inputs_ = GetAllQuantizedOpInputs(graph_viewer_);
PreprocessNodeUnits();
GetAllQuantizedOpInputs();
PreprocessInitializers();
PreprocessActivations();
ORT_RETURN_IF_ERROR(RegisterInitializers());
Expand Down Expand Up @@ -118,74 +118,89 @@ Status ModelBuilder::GetTargetDevices() {
}

void ModelBuilder::PreprocessInitializers() {
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
for (size_t i = 0; i < node_indices.size(); i++) {
const auto* node(graph_viewer_.GetNode(node_indices[i]));
if (const auto* op_builder = GetOpBuilder(*node)) {
const NodeUnit node_unit(*node);
op_builder->AddInitializersToSkip(*this, node_unit);
for (const auto& node_unit : node_unit_holder_) {
if (const auto* op_builder = GetOpBuilder(*node_unit)) {
op_builder->AddInitializersToSkip(*this, *node_unit);
}
}
}

void ModelBuilder::PreprocessActivations() {
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
for (size_t i = 0; i < node_indices.size(); i++) {
const auto* node(graph_viewer_.GetNode(node_indices[i]));
const auto& op_type(node->OpType());

for (const auto& node_unit : node_unit_holder_) {
const auto& node = node_unit->GetNode();
const auto& op_type(node.OpType());
if (op_type == "Relu") {
activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU);
activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU);
} else if (op_type == "Clip") { // Relu1 or Relu6
float min, max;
if (!GetClipMinMax(GetInitializerTensors(), *node, min, max, logging::LoggingManager::DefaultLogger()))
if (!GetClipMinMax(GetInitializerTensors(), node, min, max, logging::LoggingManager::DefaultLogger()))
continue;

if (min == -1.0f && max == 1.0f) {
activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU1);
activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU1);
} else if (min == 0.0f && max == 6.0f) {
activation_nodes_.emplace(node->Index(), ANEURALNETWORKS_FUSED_RELU6);
activation_node_units_.emplace(node_unit.get(), ANEURALNETWORKS_FUSED_RELU6);
}
}
}
}

// Help to get all quantized operators' input and the node(s) using the input
static std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(const GraphViewer& graph_viewer) {
std::unordered_map<std::string, vector<const Node*>> all_quantized_op_inputs;
const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
for (const auto& node_idx : node_indices) {
const auto* node(graph_viewer.GetNode(node_idx));
auto qlinear_op_type = GetQLinearOpType(*node);
const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const {
// In theory, if node_unit_map_ is generated correctly, see PreprocessNodeUnits(), a NodeUnit can be
// found for any single node in the graph_viewer_, unless the given node is not from graph_viewer_
return *node_unit_map_.at(node);
}

void ModelBuilder::PreprocessNodeUnits() {
// TODO, hookup shared QDQ selectors here to identify all the qdq NodeUnit in the graph
Copy link
Contributor

@YUNQIUGUO YUNQIUGUO Jan 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// TODO, hookup shared QDQ selectors here to identify all the qdq NodeUnit in the graph

a quick question regarding this:
Since the shared GetQDQselection() method now returns a std::vector<NodeGroup> . and if I understand correctly in order to make NodeUnits here from a NodeGroup type, we need the QDQGroup type NodeUnit to be implemented first before adding the hookup right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we will update the NodeUnit to take NodeGroup, for now the NodeUnit can only handle single node
This will be part of task 5

const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
for (size_t i = 0; i < node_indices.size(); i++) {
const auto node_idx = node_indices[i];
// TODO, check if the node is already part of a qdq group
const auto* node(graph_viewer_.GetNode(node_idx));
auto node_unit = std::make_unique<NodeUnit>(*node);
node_unit_map_.insert({node, node_unit.get()});
node_unit_holder_.push_back(std::move(node_unit));
}
}

// Help to get all quantized operators' input and the NodeUnit(s) using the input
void ModelBuilder::GetAllQuantizedOpInputs() {
for (const auto& node_unit : node_unit_holder_) {
// TODO, hookup getting quantized inputs with QDQ NodeUnits and remove the ORT_ENFORCE
ORT_ENFORCE(node_unit->UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented");

auto qlinear_op_type = GetQLinearOpType(node_unit->GetNode());

// Not a qlinear op
if (qlinear_op_type == QLinearOpType::Unknown)
continue;

const auto add_quantized_input =
[&all_quantized_op_inputs = all_quantized_op_inputs_](const NodeUnit& node_unit, size_t input_idx) {
const auto& input_name = node_unit.Inputs()[input_idx].node_arg.Name();
if (Contains(all_quantized_op_inputs, input_name))
all_quantized_op_inputs.at(input_name).push_back(&node_unit);
else
all_quantized_op_inputs.emplace(input_name, std::vector<const NodeUnit*>{&node_unit});
guoyu-wang marked this conversation as resolved.
Show resolved Hide resolved
};

// All qlinear ops EXCEPT QuantizeLinear has quantized input
if (qlinear_op_type != QLinearOpType::QuantizeLinear) {
const auto& input_name = node->InputDefs()[0]->Name();
if (Contains(all_quantized_op_inputs, input_name))
all_quantized_op_inputs.at(input_name).push_back(node);
else
all_quantized_op_inputs.emplace(input_name, vector<const Node*>{node});
add_quantized_input(*node_unit, 0);
}

if (IsQLinearBinaryOp(qlinear_op_type)) {
const auto& input_name = node->InputDefs()[3]->Name();
if (Contains(all_quantized_op_inputs, input_name))
all_quantized_op_inputs.at(input_name).push_back(node);
else
all_quantized_op_inputs.emplace(input_name, vector<const Node*>{node});
add_quantized_input(*node_unit, 1);
}
}

return all_quantized_op_inputs;
// TODO, add handling for varidiac nodes such as QLinearConcat
}
}

static Status GetInputDataType(
const InitializedTensorSet& initializers,
const std::unordered_map<std::string, std::vector<const Node*>>& all_quantized_op_inputs,
const std::unordered_map<std::string, std::vector<const NodeUnit*>>& all_quantized_op_inputs,
const std::string& name, int32_t data_type, const Shape& shape,
OperandType& operand_type) {
Type type = Type::TENSOR_FLOAT32;
Expand All @@ -208,10 +223,9 @@ static Status GetInputDataType(
}

// TODO, verify the scale and zero point match if there are multiple op using same input
const auto* node = all_quantized_op_inputs.at(name)[0];
const NodeUnit node_unit(*node);
ORT_RETURN_IF_ERROR(GetQuantizedInputScaleAndZeroPoint(
initializers, node_unit, name, scale, zero_point));
const auto* node_unit = all_quantized_op_inputs.at(name)[0];
ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
initializers, *node_unit, name, scale, zero_point, true /* is_input */));
break;
}
// case ONNX_NAMESPACE::TensorProto_DataType_INT8:
Expand Down Expand Up @@ -491,15 +505,23 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(

Status ModelBuilder::AddOperations() {
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
std::unordered_set<const NodeUnit*> processed_node_units;
for (size_t i = 0; i < node_indices.size(); i++) {
const auto* node(graph_viewer_.GetNode(node_indices[i]));
if (const auto* op_builder = GetOpBuilder(*node)) {
const NodeUnit node_unit(*node);
const NodeUnit& node_unit = GetNodeUnit(node);

// Since a NodeUnit may contain multiple nodes, avoid processing the same NodeUnit multiple times
if (Contains(processed_node_units, &node_unit))
continue;

if (const auto* op_builder = GetOpBuilder(node_unit)) {
ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, node_unit));
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Node [", node->Name(), "], type [", node->OpType(), "] is not supported");
"Node [", node_unit.Name(), "], type [", node_unit.OpType(), "] is not supported");
}

processed_node_units.insert(&node_unit);
}

return Status::OK();
Expand Down Expand Up @@ -605,20 +627,40 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
return Status::OK();
}

int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) {
int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE;
const auto& output_nodes = node_unit.GetOutputNodes();
if (node_unit.GetOutputNodes().size() != 1) {
LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name()
<< "] type [" << node_unit.OpType()
<< "], with " << output_nodes.size() << " output nodes";
return fuse_code;
}

const auto& outputs = node_unit.Outputs();
if (outputs.size() != 1) {
LOGS_DEFAULT(VERBOSE) << "FindActivation does not support, NodeUnit [" << node_unit.Name()
<< "] type [" << node_unit.OpType()
<< "], with " << outputs.size() << " outputs";
return fuse_code;
}

const NodeArg& output = outputs[0].node_arg;
const auto& output_node = *output_nodes[0];

// TODO, add support of activation fusion for quantized node group (qdq or qlinear)
// We do not support activation fusion for quantized operators for now
auto qlinear_op_type = GetQLinearOpType(node);
auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
if (qlinear_op_type != QLinearOpType::Unknown)
return fuse_code;

for (auto it = node.OutputEdgesBegin(), end = node.OutputEdgesEnd(); it != end; ++it) {
for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) {
const auto& dst_node = it->GetNode();
const auto* dst_input = dst_node.InputDefs()[it->GetDstArgIndex()];
if (Contains(activation_nodes_, dst_node.Index())) {
const auto& dst_node_unit = GetNodeUnit(&dst_node);
if (Contains(activation_node_units_, &dst_node_unit)) {
if (&output == dst_input) {
fuse_code = activation_nodes_.at(dst_node.Index());
fuse_code = activation_node_units_.at(&dst_node_unit);
}
} else {
// if there is any other non-relu node using the output
Expand All @@ -628,14 +670,14 @@ int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
}
}

// if output is a graph output, will add relu separately
// if output is a graph output, will add activation separately
if (fuse_code != ANEURALNETWORKS_FUSED_NONE) {
for (const auto* graph_output : graph_viewer_.GetOutputs()) {
if (&output == graph_output)
return ANEURALNETWORKS_FUSED_NONE;
const auto& graph_outputs = graph_viewer_.GetOutputs();
if (std::find(graph_outputs.cbegin(), graph_outputs.cend(), &output) != graph_outputs.cend()) {
return ANEURALNETWORKS_FUSED_NONE;
}

LOGS_DEFAULT(VERBOSE) << "Node [" << node.Name() << "] type [" << node.OpType()
LOGS_DEFAULT(VERBOSE) << "Node [" << node_unit.Name() << "] type [" << node_unit.OpType()
<< "], fused the output [" << output.Name() << "]";

fused_activations_.insert(output.Name());
Expand All @@ -644,12 +686,13 @@ int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
return fuse_code;
}

/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const Node& node) {
/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const NodeUnit& node_unit) {
const auto& op_builders = GetOpBuilders();
if (!Contains(op_builders, node.OpType()))
const auto& op_type = node_unit.GetNode().OpType();
if (!Contains(op_builders, op_type))
return nullptr;

return op_builders.at(node.OpType());
return op_builders.at(op_type);
}

std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
Expand All @@ -663,6 +706,10 @@ std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
return unique_name;
}

const InitializedTensorSet& ModelBuilder::GetInitializerTensors() const {
return graph_viewer_.GetAllInitializedTensors();
}

void ModelBuilder::RegisterNHWCOperand(const std::string& name) {
nhwc_operands_.insert(name);
}
Expand Down
Loading