Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NNAPI QDQ] Add QDQ Conv support #10418

Merged
merged 4 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 49 additions & 37 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,31 @@ std::string GetErrorCause(int error_code) {
}
}

QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) {
const auto& op_type = node.OpType();
if (op_type == "DequantizeLinear")
return QLinearOpType::DequantizeLinear;
else if (op_type == "QuantizeLinear")
return QLinearOpType::QuantizeLinear;
else if (op_type == "QLinearConv")
return QLinearOpType::QLinearConv;
else if (op_type == "QLinearMatMul")
return QLinearOpType::QLinearMatMul;
else if (op_type == "QLinearAdd")
return QLinearOpType::QLinearAdd;
else if (op_type == "QLinearSigmoid")
return QLinearOpType::QLinearSigmoid;
else if (op_type == "QLinearAveragePool")
return QLinearOpType::QLinearAveragePool;

return QLinearOpType::Unknown;
QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
const auto& op_type = node_unit.OpType();
if (node_unit.UnitType() == NodeUnit::Type::SingleNode) {
if (op_type == "DequantizeLinear")
return QuantizedOpType::DequantizeLinear;
else if (op_type == "QuantizeLinear")
return QuantizedOpType::QuantizeLinear;
else if (op_type == "QLinearConv")
return QuantizedOpType::QLinearConv;
else if (op_type == "QLinearMatMul")
return QuantizedOpType::QLinearMatMul;
else if (op_type == "QLinearAdd")
return QuantizedOpType::QLinearAdd;
else if (op_type == "QLinearSigmoid")
return QuantizedOpType::QLinearSigmoid;
else if (op_type == "QLinearAveragePool")
return QuantizedOpType::QLinearAveragePool;
} else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
if (op_type == "Conv")
return QuantizedOpType::QDQConv;
} else {
// throw?
}

return QuantizedOpType::Unknown;
}

ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers) {
Expand All @@ -89,10 +96,15 @@ ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& init
return ConvType::Grouped;
}

bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type) {
return qlinear_op_type == QLinearOpType::QLinearConv ||
qlinear_op_type == QLinearOpType::QLinearMatMul ||
qlinear_op_type == QLinearOpType::QLinearAdd;
bool IsQuantizedConv(QuantizedOpType quant_op_type) {
return (quant_op_type == QuantizedOpType::QLinearConv) ||
(quant_op_type == QuantizedOpType::QDQConv);
}

bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type) {
return quant_op_type == QuantizedOpType::QLinearMatMul ||
quant_op_type == QuantizedOpType::QLinearAdd ||
IsQuantizedConv(quant_op_type);
}

bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit) {
Expand All @@ -111,9 +123,9 @@ bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit) {
}

bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
auto op_type = GetQLinearOpType(node_unit.GetNode());
auto quant_op_type = GetQuantizedOpType(node_unit);
int32_t a_input_type, b_input_type;
if (!IsQLinearBinaryOp(op_type)) {
if (!IsQuantizedBinaryOp(quant_op_type)) {
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] is not a binary qlinear op";
return false;
}
Expand All @@ -126,14 +138,14 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {

// QlinearConv supports u8u8 or u8s8
// QLinearMatMul/Add only support u8u8
bool is_qlinear_conv = op_type == QLinearOpType::QLinearConv;
bool is_quant_conv = IsQuantizedConv(quant_op_type);
bool has_valid_qlinear_conv_weight =
(b_input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
b_input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8);

if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
(!is_qlinear_conv && a_input_type != b_input_type) ||
(is_qlinear_conv && !has_valid_qlinear_conv_weight)) {
(!is_quant_conv && a_input_type != b_input_type) ||
(is_quant_conv && !has_valid_qlinear_conv_weight)) {
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
<< "] A Input type: [" << a_input_type
<< "] B Input type: [" << b_input_type
Expand All @@ -147,9 +159,9 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input) {
const auto& op_type = node_unit.OpType();
auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv);
bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul);
auto quant_op_type = GetQuantizedOpType(node_unit);
bool is_quant_conv = IsQuantizedConv(quant_op_type);
bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
for (const auto idx : indices) {
if (idx >= io_defs.size()) {
Expand All @@ -174,7 +186,7 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
}

// If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul)
bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 1;
bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
bool is_conv_matmul_u8s8_weight = false;

if (is_conv_matmul_weight) {
Expand All @@ -194,7 +206,7 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
// For u8s8 Qlinear[Conv/MatMul], we support
// 1. Per-tensor, the weight will be transformed to uint8 later
// 2. Per-channel, only from Android API level 29
if (is_qlinear_matmul) {
if (is_quant_matmul) {
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
return false;
}
Expand All @@ -221,9 +233,9 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
const std::vector<size_t>& indices, bool is_input) {
const auto& op_type = node_unit.OpType();
auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv);
bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul);
auto quant_op_type = GetQuantizedOpType(node_unit);
bool is_quant_conv = IsQuantizedConv(quant_op_type);
bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);

const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
for (const auto idx : indices) {
Expand Down Expand Up @@ -251,7 +263,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
return false;
}

bool is_conv_matmul_weight = is_input && (is_qlinear_conv || is_qlinear_matmul) && idx == 1;
bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
bool is_conv_matmul_u8s8_weight = false;

if (is_conv_matmul_weight) {
Expand Down Expand Up @@ -279,7 +291,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
}

if (zero_dim != 1) {
if (is_qlinear_matmul) {
if (is_quant_matmul) {
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
return false;
}
Expand Down
17 changes: 11 additions & 6 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ struct OpSupportCheckParams;

std::string GetErrorCause(int error_code);

enum class QLinearOpType : uint8_t {
Unknown, // Unknown or not a linear quantized op
enum class QuantizedOpType : uint8_t {
Unknown, // Unknown or not a quantized NodeUnit
DequantizeLinear,
QuantizeLinear,
QLinearConv,
Expand All @@ -85,6 +85,8 @@ enum class QLinearOpType : uint8_t {
// Not yet supported
// QLinearMul,
// QLinearReduceMean,
QDQConv,
// TODO, add other QDQ NodeUnit types
};

enum class ConvType : uint8_t {
Expand All @@ -93,15 +95,18 @@ enum class ConvType : uint8_t {
Grouped,
};

QLinearOpType GetQLinearOpType(const onnxruntime::Node& node);
QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit);

// Return the type of the conv ops,
// This function assumes the input is a 2d conv node
ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers);

// This qlinear op is an operator takes 2 inputs and produces 1 output
// Such as QLinearConv, QLinearMatMul, QLinearAdd, ...
bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);
// If this is a quantized Conv (QLinearConv or QDQConv)
bool IsQuantizedConv(QuantizedOpType quant_op_type);

// This quantized op is an operator or qdq node unit takes 2 inputs and produces 1 output
// Such as QLinearConv, QLinearMatMul, QLinearAdd, QDQConv,...
bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type);

// Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool]
bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,10 @@ void ModelBuilder::PreprocessNodeUnits() {
// Help to get all quantized operators' input and the NodeUnit(s) using the input
void ModelBuilder::GetAllQuantizedOpInputs() {
for (const auto& node_unit : node_unit_holder_) {
// TODO, hookup getting quantized inputs with QDQ NodeUnits and remove the ORT_ENFORCE
ORT_ENFORCE(node_unit->UnitType() == NodeUnit::Type::SingleNode, "QDQ NodeUnit is not yet implemented");
auto quant_op_type = GetQuantizedOpType(*node_unit);

auto qlinear_op_type = GetQLinearOpType(node_unit->GetNode());

// Not a qlinear op
// TODO, add handling for QDQ NodeUnit
if (qlinear_op_type == QLinearOpType::Unknown)
// Not a qlinear op or qdq node group
if (quant_op_type == QuantizedOpType::Unknown)
continue;

const auto add_quantized_input =
Expand All @@ -174,12 +170,12 @@ void ModelBuilder::GetAllQuantizedOpInputs() {
all_quantized_op_inputs[input_name].push_back(&node_unit);
};

// All qlinear ops EXCEPT QuantizeLinear has quantized input
if (qlinear_op_type != QLinearOpType::QuantizeLinear) {
// All quantized ops EXCEPT QuantizeLinear has quantized input
if (quant_op_type != QuantizedOpType::QuantizeLinear) {
add_quantized_input(*node_unit, 0);
}

if (IsQLinearBinaryOp(qlinear_op_type)) {
if (IsQuantizedBinaryOp(quant_op_type)) {
add_quantized_input(*node_unit, 1);
}

Expand Down Expand Up @@ -494,14 +490,29 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(

Status ModelBuilder::AddOperations() {
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
std::unordered_set<const NodeUnit*> processed_node_units;
processed_node_units.reserve(node_unit_holder_.size());
for (size_t i = 0; i < node_indices.size(); i++) {
const auto* node(graph_viewer_.GetNode(node_indices[i]));
for (const auto node_idx : node_indices) {
LOGS_DEFAULT(VERBOSE) << "Adding node [" << node_idx << "]";
const auto* node(graph_viewer_.GetNode(node_idx));
const NodeUnit& node_unit = GetNodeUnit(node);

// Since a NodeUnit may contain multiple nodes, avoid processing the same NodeUnit multiple times
if (Contains(processed_node_units, &node_unit))
// Since we may have NodeUnit with multiple nodes, insert NodeUnit with the first occurrence of
// its node(s) in topological order may cause the incorrect topological order while inserting
// NodeUNits, for example,
// Q1
// |
// DQ1 DQ2
// \ |
// CONV
// |
// Q2
// In the above graph, we will have 2 NodeUnits, NU1 [Q1] and NU2 [DQ1, DQ2, CONV, Q2]
// The Q1 and DQ2 have the same topological order, if we insert DQ2 (as part of NU2) when we visit DQ2
// first in the topological order, the input from Q1 required by NU2 is not yet inserted, this will
// cause failure finding the inputs for NU2
//
// So we only insert the NodeUnit once when we hit the target node, to ensure the topological order
// of the NodeUnits
if (node != &node_unit.GetNode())
continue;

if (const auto* op_builder = GetOpBuilder(node_unit)) {
Expand All @@ -510,8 +521,6 @@ Status ModelBuilder::AddOperations() {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Node [", node_unit.Name(), "], type [", node_unit.OpType(), "] is not supported");
}

processed_node_units.insert(&node_unit);
}

return Status::OK();
Expand All @@ -535,6 +544,8 @@ Status ModelBuilder::AddOperation(int op, const std::vector<uint32_t>& input_ind
"op = " + std::to_string(op));

num_nnapi_ops_++;

LOGS_DEFAULT(VERBOSE) << "Added NNAPI Operation Type [" << op << "]";
return Status::OK();
}

Expand Down Expand Up @@ -640,8 +651,9 @@ int32_t ModelBuilder::FindActivation(const NodeUnit& node_unit) {

// TODO, add support of activation fusion for quantized node group (qdq or qlinear)
// We do not support activation fusion for quantized operators for now
auto qlinear_op_type = GetQLinearOpType(node_unit.GetNode());
if (qlinear_op_type != QLinearOpType::Unknown)
// (usually the activations are fused already in the quantization)
auto quant_op_type = GetQuantizedOpType(node_unit);
if (quant_op_type != QuantizedOpType::Unknown)
return fuse_code;

for (auto it = output_node.OutputEdgesBegin(), end = output_node.OutputEdgesEnd(); it != end; ++it) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1260,8 +1260,7 @@ class ConvOpBuilder : public BaseOpBuilder {
};

/* static */ bool ConvOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
// TODO, add support for QDQ NodeUnit
return node_unit.OpType() == "QLinearConv";
return IsQuantizedConv(GetQuantizedOpType(node_unit));
}

/* static */ void
Expand Down Expand Up @@ -1296,7 +1295,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
const auto& initializers(model_builder.GetInitializerTensors());
NodeAttrHelper helper(node_unit);
const auto inputs = node_unit.Inputs();
bool is_qlinear_conv = IsQuantizedOp(node_unit);
bool is_quant_conv = IsQuantizedOp(node_unit);

// onnx strides are in the order height, width
// while nnapi strides are in the order width, height
Expand Down Expand Up @@ -1341,7 +1340,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
// this is for per-channel quantization weights
optional<std::vector<float>> w_scales;
bool is_per_tensor_u8s8 = false;
if (is_qlinear_conv) {
if (is_quant_conv) {
ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node_unit,
x_scale, w_scale, y_scale,
x_zero_point, w_zero_point, y_zero_point,
Expand Down Expand Up @@ -1379,7 +1378,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
// Get weight operand type
// Per-channel quantized weight is handled differently
OperandType onnx_weight_operand_type =
(is_qlinear_conv && w_scales.has_value())
(is_quant_conv && w_scales.has_value())
? OperandType{onnx_weight_type, onnx_weight_shape,
SymmPerChannelQuantParams{w_scales.value(),
depthwise_conv_2d ? 3u : 0u}} // channelDim is 3 for depthwise-conv
Expand All @@ -1392,7 +1391,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight, onnx_weight_operand_type, L_1230, is_per_tensor_u8s8));
}

if (is_qlinear_conv) {
if (is_quant_conv) {
// Verify if the scale and zero point matchs from onnx input/weight and nnapi input/weight
ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
ORT_RETURN_IF_ERROR(IsValidConvWeightQuantizedType(model_builder, weight, w_scale, w_zero_point, w_scales));
Expand Down Expand Up @@ -1420,7 +1419,7 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unknown weight type ", TypeToStr(weight_type));
}
} else if (is_qlinear_conv) {
} else if (is_quant_conv) {
// QLinearConv's bias type need special handling to add scale for quantization input
const auto& bias_tensor = *model_builder.GetInitializerTensors().at(bias);
ORT_RETURN_IF_NOT(bias_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32,
Expand Down
Loading