Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QNN EP] Improve QDQ model accuracy tests #16916

Merged
merged 35 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
caed9a1
Add tests
adrianlizarraga Jul 25, 2023
4f22f43
Fix merge conflicts from main
adrianlizarraga Jul 25, 2023
afbe467
Rework new Asin, Sin, and Sign tests to use float inputs/outputs
adrianlizarraga Jul 25, 2023
59948ed
Add 3-way comparision between Cpu(f32), Cpu(QDQ), QNN(QDQ)
adrianlizarraga Jul 27, 2023
35b91ee
Clean up
adrianlizarraga Jul 27, 2023
eee35db
Switch argmin/argmax tests to new QDQ accuracy testing
adrianlizarraga Jul 27, 2023
fa5dcb4
Convert averagepool tests (need to disable inaccurate tests)
adrianlizarraga Jul 28, 2023
aeba185
Add explicit inputs to AvergePool tests
adrianlizarraga Jul 28, 2023
9fcba0b
Use new QDQ acc testing for BatchNorm. Found issues.
adrianlizarraga Jul 29, 2023
b6123fd
Update conv tests to new accuracy testing
adrianlizarraga Jul 29, 2023
ec41d97
Update comments for QDQ accuracy testing func
adrianlizarraga Jul 29, 2023
d89e0b3
Fix linter errors. Add include for math
adrianlizarraga Jul 29, 2023
40ac48a
More linter fixes. Use difference cmath funcs
adrianlizarraga Jul 29, 2023
a610a6e
Update shape for And/Or tests
adrianlizarraga Jul 29, 2023
1eed952
More lint error
adrianlizarraga Jul 29, 2023
9c6149f
Update Gather op tests. Need to support testing scalar indices
adrianlizarraga Jul 30, 2023
46f4793
Simplify accuracy computation
adrianlizarraga Jul 30, 2023
7de0d07
Convert InstanceNorm to new testing approach
adrianlizarraga Jul 30, 2023
974bc77
Update LeakyRelu tests to new accuracy testing approach
adrianlizarraga Jul 30, 2023
f91aea1
Update LRN tests
adrianlizarraga Jul 31, 2023
d48980f
Update MatMul tests
adrianlizarraga Jul 31, 2023
2a05554
Fix merge conflicts with main
adrianlizarraga Jul 31, 2023
f82fb22
Reuse GetDataQuantParams function
adrianlizarraga Jul 31, 2023
e513d63
Update MaxPool tests
adrianlizarraga Jul 31, 2023
fd01af3
Update ReduceOp cpu tests
adrianlizarraga Jul 31, 2023
56fa59b
Update QDQ Reduce op tests
adrianlizarraga Jul 31, 2023
7af9410
Explicitly handle quant params when data only has a single 0
adrianlizarraga Jul 31, 2023
962bca6
Update Resize tests to use new accuracy measuring approach
adrianlizarraga Jul 31, 2023
7917a07
Add disabled inaccurate tests
adrianlizarraga Jul 31, 2023
f2216ee
Add a way to override a test input's range
adrianlizarraga Jul 31, 2023
32ea966
Move all reduce op tests into 1 file and rename it
adrianlizarraga Jul 31, 2023
0742fd3
Merge latest commits from main
adrianlizarraga Aug 2, 2023
20edf00
Merge latest commits from main
adrianlizarraga Aug 3, 2023
3878e79
Revert change in input shape
adrianlizarraga Aug 3, 2023
a21251e
Merge latest commits from main
adrianlizarraga Aug 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
{"Atan", {}},
{"Asin", {}},
{"Sin", {}},
{"Cos", {}},
{"Sign", {}},
{"Tanh", {}},
{"Exp", {}},
{"LRN", {}}};
{"LRN", {}},
{"Ceil", {}},
{"Abs", {}}};
}
static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
return {{"Add", {}},
Expand Down
22 changes: 8 additions & 14 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,34 +145,28 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
if (it != node_unit_supported_result.cend()) {
return it->second;
} else {
// quantized required, filter out the non-quantized nodes, filter in the QDQ nodes
auto IsQdqNode = [](const NodeUnit& node_unit) {
if ("QuantizeLinear" == node_unit.OpType() || "DequantizeLinear" == node_unit.OpType()) {
return true;
} else {
return false;
}
};
const std::string& op_type = node_unit.OpType();
const bool is_qdq_node = op_type == "QuantizeLinear" || op_type == "DequantizeLinear";

// Is NPU backend, is single node, case by case
// Q/DQ nodes -- supported
// Transpose nodes -- supported
// Cast nodes -- need to call CastOpBuilder::IsOpSupported
if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) {
if (IsQdqNode(node_unit)) { // Qnn has Quantize & Dequantize Op
if (is_qdq_node) { // Qnn has Quantize & Dequantize Op
LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name();
return true;
}

// Tranpose only changes the data layout. NPU still supports it.
if ("Transpose" == node_unit.OpType()) {
if ("Transpose" == op_type) {
LOGS(logger, VERBOSE) << "Single Transpose node is supported for NPU backend. Node name: " << node_unit.Name();
return true;
}

// For Cast, need to call IsOpSupported (below) to validate input and output types.
// For Cast, And, and Or, we need to call IsOpSupported (below) to validate input and output types.
// For other single non-qdq nodes, immediately return not supported.
if (node_unit.OpType() != "Cast") {
if (op_type != "Cast" && op_type != "And" && op_type != "Or") {
adrianlizarraga marked this conversation as resolved.
Show resolved Hide resolved
LOGS(logger, WARNING) << "Non-QDQ " << node_unit.OpType()
<< " operators are not supported on HTP or DSP backends. " << node_unit.OpType()
<< " node `" << node_unit.Name() << " will not be assigned to QNN EP.";
Expand All @@ -181,14 +175,14 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
}

// Non-NPU backend, quantized model not supported, but a QDQ node encountered
if (!is_npu_backend && IsQdqNode(node_unit)) {
if (!is_npu_backend && is_qdq_node) {
LOGS(logger, ERROR) << "QDQ models are only supported on HTP or DSP backends. "
<< node_unit.OpType() << " node `" << node_unit.Name() << "` will not be assigned to QNN EP.";
return false;
}

bool supported = false;
const auto* op_builder = qnn::GetOpBuilder(node_unit.OpType());
const auto* op_builder = qnn::GetOpBuilder(op_type);
if (op_builder == nullptr) {
LOGS(logger, WARNING) << "Operators of type `" << node_unit.OpType() << "` are not supported by QNN EP."
<< node_unit.OpType() << " node `" << node_unit.Name()
Expand Down
9 changes: 9 additions & 0 deletions onnxruntime/test/optimizer/graph_transform_test_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,15 @@ class ModelTestBuilder {
return &graph_.GetOrCreateNodeArg(name, nullptr);
}

NodeArg* MakeRandInitializerBool(const std::vector<int64_t>& shape) {
std::vector<uint8_t> data_uint8 = rand_gen_.Uniform<uint8_t>(shape, 0, 1);
std::vector<bool> data;
for (uint8_t x : data_uint8) {
data.push_back(x != 0);
}
return MakeInitializerBool(shape, data);
}

template <typename T>
NodeArg* MakeInitializer(const std::vector<int64_t>& shape, T min, T max) {
return MakeInitializer<T>(shape, rand_gen_.Uniform<T>(shape, min, max));
Expand Down
96 changes: 0 additions & 96 deletions onnxruntime/test/optimizer/qdq_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,102 +91,6 @@ GetQDQTestCaseFn BuildQDQConvTransposeTestCase(const std::vector<int64_t>& input
};
}

// Creates the following graph:
// _______________________
// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32)
// axes (int32, initializer) -> | Gather |
// |_______________________|
//
template <typename QuantType, typename IndicesType>
GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector<int64_t>& input_shape,
const std::vector<IndicesType> indices,
const std::vector<int64_t>& indices_shape,
int64_t axis) {
return [input_shape, indices, indices_shape, axis](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();

// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);

auto* indices_input = builder.MakeInitializer<IndicesType>(indices_shape, indices);

auto* gather_output = builder.MakeIntermediate();
Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output});
gather_node.AddAttribute("axis", axis);

// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(gather_output, .003f, 1,
q_output);

builder.AddDequantizeLinearNode<QuantType>(q_output, .003f, 1,
final_output);
};
}

// Creates the following graph:
// _______________________
// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32)
// axes (int32, initializer) -> | Gather |
// |_______________________|
//
template <typename QuantType, typename IndicesType>
GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t>& input_shape,
const IndicesType indices,
int64_t axis) {
return [input_shape, indices, axis](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();

// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);

auto* indices_input = builder.MakeScalarInitializer<IndicesType>(indices);

auto* gather_output = builder.MakeIntermediate();
Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output});
gather_node.AddAttribute("axis", axis);

// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(gather_output, .003f, 1,
q_output);

builder.AddDequantizeLinearNode<QuantType>(q_output, .003f, 1,
final_output);
};
}

// Creates the following graph:
// _______________________
// | |
// input (f32) -> Q -> DQ -> | LeakyRelu | -> Q -> DQ -> output (f32)
// |_______________________|
//
template <typename QuantType>
GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector<int64_t>& input_shape) {
return [input_shape](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();

// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, 0.0473f, 137);

auto* leakyrelu_output = builder.MakeIntermediate();
Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output});
leakyrelu_node.AddAttribute("alpha", 0.2f);

// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(leakyrelu_output, 0.02696f, 48,
q_output);

builder.AddDequantizeLinearNode<QuantType>(q_output, 0.02696f, 48,
final_output);
};
}

template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
return [input_shape, weights_shape](ModelTestBuilder& builder) {
Expand Down
32 changes: 21 additions & 11 deletions onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,29 @@ static GetTestModelFn BuildArgMxxTestCase(const std::string& op_type, TestInputD
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [op_type, input_def, attrs](ModelTestBuilder& builder) {
auto* input = MakeTestInput(builder, input_def);
auto* output = builder.MakeOutput();

Node& argm_node = builder.AddNode(op_type, {input}, {output});
auto* argm_output = builder.MakeIntermediate();
Node& argm_node = builder.AddNode(op_type, {input}, {argm_output});
for (const auto& attr : attrs) {
argm_node.AddAttributeProto(attr);
}

// Add cast to uint32
auto* output = builder.MakeOutput();
Node& cast_node = builder.AddNode("Cast", {argm_output}, {output});
const auto dst_type = ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32;
cast_node.AddAttribute("to", static_cast<int64_t>(dst_type));
};
}

// Builds a QDQ model with ArgMin/ArgMax and a Cast to uint32. The quantization parameters are computed from the provided
// input definition.
template <typename QType = uint8_t>
static GetTestModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef<float> input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [op_type, input_def, attrs](ModelTestBuilder& builder) {
static GetTestQDQModelFn<QType> BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef<float> input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [op_type, input_def, attrs](ModelTestBuilder& builder,
std::vector<QuantParams<QType>>& output_qparams) {
ORT_UNUSED_PARAMETER(output_qparams);
QuantParams<QType> input_qparams = GetTestInputQuantParams(input_def);

auto* input = MakeTestInput(builder, input_def);
Expand Down Expand Up @@ -75,8 +83,8 @@ static void RunCPUArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
expected_ep_assignment);
}

// Runs an ArgMax/ArgMin model on the QNN CPU backend. Checks the graph node assignment, and that inference
// outputs for QNN EP and CPU EP match.
// Runs a QDQ ArgMax/ArgMin model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference
// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model).
template <typename QType = uint8_t>
static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
Expand All @@ -90,10 +98,12 @@ static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
provider_options["backend_path"] = "libQnnHtp.so";
#endif

RunQnnModelTest(BuildQDQArgMxxTestCase(op_type, input_def, attrs),
provider_options,
opset,
expected_ep_assignment);
TestQDQModelAccuracy(BuildArgMxxTestCase(op_type, input_def, attrs), // baseline float32 model
BuildQDQArgMxxTestCase<QType>(op_type, input_def, attrs), // QDQ model
provider_options,
opset,
expected_ep_assignment,
1e-5f);
adrianlizarraga marked this conversation as resolved.
Show resolved Hide resolved
}

//
Expand Down
Loading