Skip to content

Commit

Permalink
[QNN EP] Improve QDQ model accuracy tests (microsoft#16916)
Browse files Browse the repository at this point in the history
### Description
- Improves how unit tests measure the accuracy of QDQ models on QNN EP.
- Adds tests for ops: Add, Mul, Abs<sup>1</sup>, And<sup>1</sup>,
Or<sup>1</sup>, Ceil<sup>1</sup>, Cos<sup>1</sup>

<sup>1</sup>: Not previously supported due to missing node unit
handling.

### Motivation and Context
The new approach for testing QDQ operator accuracy requires running 3
inferences:

1. float model on CPU EP (baseline)
2. qdq model on CPU EP
3. qdq model on QNN EP

The units tests check that running the QDQ model on QNN EP (3) is at
least as accurate (+- small tolerance) as running the QDQ model on CPU
EP (2). We measure accuracy by comparing to the baseline (1).

This is essentially what we care about: is qnn ep as accurate as cpu ep.
If not, it is worth investigating as a potential bug.
  • Loading branch information
adrianlizarraga authored and kleiti committed Mar 22, 2024
1 parent 51e216f commit 97a9bd2
Show file tree
Hide file tree
Showing 24 changed files with 2,234 additions and 1,589 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
{"Atan", {}},
{"Asin", {}},
{"Sin", {}},
{"Cos", {}},
{"Sign", {}},
{"Tanh", {}},
{"Exp", {}},
{"LRN", {}}};
{"LRN", {}},
{"Ceil", {}},
{"Abs", {}}};
}
static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
return {{"Add", {}},
Expand Down
22 changes: 8 additions & 14 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,34 +145,28 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
if (it != node_unit_supported_result.cend()) {
return it->second;
} else {
// quantized required, filter out the non-quantized nodes, filter in the QDQ nodes
auto IsQdqNode = [](const NodeUnit& node_unit) {
if ("QuantizeLinear" == node_unit.OpType() || "DequantizeLinear" == node_unit.OpType()) {
return true;
} else {
return false;
}
};
const std::string& op_type = node_unit.OpType();
const bool is_qdq_node = op_type == "QuantizeLinear" || op_type == "DequantizeLinear";

// Is NPU backend, is single node, case by case
// Q/DQ nodes -- supported
// Transpose nodes -- supported
// Cast nodes -- need to call CastOpBuilder::IsOpSupported
if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) {
if (IsQdqNode(node_unit)) { // Qnn has Quantize & Dequantize Op
if (is_qdq_node) { // Qnn has Quantize & Dequantize Op
LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name();
return true;
}

// Tranpose only changes the data layout. NPU still supports it.
if ("Transpose" == node_unit.OpType()) {
if ("Transpose" == op_type) {
LOGS(logger, VERBOSE) << "Single Transpose node is supported for NPU backend. Node name: " << node_unit.Name();
return true;
}

// For Cast, need to call IsOpSupported (below) to validate input and output types.
// For Cast, And, and Or, we need to call IsOpSupported (below) to validate input and output types.
// For other single non-qdq nodes, immediately return not supported.
if (node_unit.OpType() != "Cast") {
if (op_type != "Cast" && op_type != "And" && op_type != "Or") {
LOGS(logger, WARNING) << "Non-QDQ " << node_unit.OpType()
<< " operators are not supported on HTP or DSP backends. " << node_unit.OpType()
<< " node `" << node_unit.Name() << " will not be assigned to QNN EP.";
Expand All @@ -181,14 +175,14 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
}

// Non-NPU backend, quantized model not supported, but a QDQ node encountered
if (!is_npu_backend && IsQdqNode(node_unit)) {
if (!is_npu_backend && is_qdq_node) {
LOGS(logger, ERROR) << "QDQ models are only supported on HTP or DSP backends. "
<< node_unit.OpType() << " node `" << node_unit.Name() << "` will not be assigned to QNN EP.";
return false;
}

bool supported = false;
const auto* op_builder = qnn::GetOpBuilder(node_unit.OpType());
const auto* op_builder = qnn::GetOpBuilder(op_type);
if (op_builder == nullptr) {
LOGS(logger, WARNING) << "Operators of type `" << node_unit.OpType() << "` are not supported by QNN EP."
<< node_unit.OpType() << " node `" << node_unit.Name()
Expand Down
9 changes: 9 additions & 0 deletions onnxruntime/test/optimizer/graph_transform_test_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,15 @@ class ModelTestBuilder {
return &graph_.GetOrCreateNodeArg(name, nullptr);
}

NodeArg* MakeRandInitializerBool(const std::vector<int64_t>& shape) {
std::vector<uint8_t> data_uint8 = rand_gen_.Uniform<uint8_t>(shape, 0, 1);
std::vector<bool> data;
for (uint8_t x : data_uint8) {
data.push_back(x != 0);
}
return MakeInitializerBool(shape, data);
}

template <typename T>
NodeArg* MakeInitializer(const std::vector<int64_t>& shape, T min, T max) {
return MakeInitializer<T>(shape, rand_gen_.Uniform<T>(shape, min, max));
Expand Down
96 changes: 0 additions & 96 deletions onnxruntime/test/optimizer/qdq_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,102 +91,6 @@ GetQDQTestCaseFn BuildQDQConvTransposeTestCase(const std::vector<int64_t>& input
};
}

// Creates the following graph:
// _______________________
// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32)
// axes (int32, initializer) -> | Gather |
// |_______________________|
//
template <typename QuantType, typename IndicesType>
GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector<int64_t>& input_shape,
const std::vector<IndicesType> indices,
const std::vector<int64_t>& indices_shape,
int64_t axis) {
return [input_shape, indices, indices_shape, axis](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();

// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);

auto* indices_input = builder.MakeInitializer<IndicesType>(indices_shape, indices);

auto* gather_output = builder.MakeIntermediate();
Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output});
gather_node.AddAttribute("axis", axis);

// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(gather_output, .003f, 1,
q_output);

builder.AddDequantizeLinearNode<QuantType>(q_output, .003f, 1,
final_output);
};
}

// Creates the following graph:
// _______________________
// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32)
// axes (int32, initializer) -> | Gather |
// |_______________________|
//
template <typename QuantType, typename IndicesType>
GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t>& input_shape,
const IndicesType indices,
int64_t axis) {
return [input_shape, indices, axis](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();

// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);

auto* indices_input = builder.MakeScalarInitializer<IndicesType>(indices);

auto* gather_output = builder.MakeIntermediate();
Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output});
gather_node.AddAttribute("axis", axis);

// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(gather_output, .003f, 1,
q_output);

builder.AddDequantizeLinearNode<QuantType>(q_output, .003f, 1,
final_output);
};
}

// Creates the following graph:
// _______________________
// | |
// input (f32) -> Q -> DQ -> | LeakyRelu | -> Q -> DQ -> output (f32)
// |_______________________|
//
template <typename QuantType>
GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector<int64_t>& input_shape) {
return [input_shape](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();

// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, 0.0473f, 137);

auto* leakyrelu_output = builder.MakeIntermediate();
Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output});
leakyrelu_node.AddAttribute("alpha", 0.2f);

// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(leakyrelu_output, 0.02696f, 48,
q_output);

builder.AddDequantizeLinearNode<QuantType>(q_output, 0.02696f, 48,
final_output);
};
}

template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
return [input_shape, weights_shape](ModelTestBuilder& builder) {
Expand Down
32 changes: 21 additions & 11 deletions onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,29 @@ static GetTestModelFn BuildArgMxxTestCase(const std::string& op_type, TestInputD
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [op_type, input_def, attrs](ModelTestBuilder& builder) {
auto* input = MakeTestInput(builder, input_def);
auto* output = builder.MakeOutput();

Node& argm_node = builder.AddNode(op_type, {input}, {output});
auto* argm_output = builder.MakeIntermediate();
Node& argm_node = builder.AddNode(op_type, {input}, {argm_output});
for (const auto& attr : attrs) {
argm_node.AddAttributeProto(attr);
}

// Add cast to uint32
auto* output = builder.MakeOutput();
Node& cast_node = builder.AddNode("Cast", {argm_output}, {output});
const auto dst_type = ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32;
cast_node.AddAttribute("to", static_cast<int64_t>(dst_type));
};
}

// Builds a QDQ model with ArgMin/ArgMax and a Cast to uint32. The quantization parameters are computed from the provided
// input definition.
template <typename QType = uint8_t>
static GetTestModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef<float> input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [op_type, input_def, attrs](ModelTestBuilder& builder) {
static GetTestQDQModelFn<QType> BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef<float> input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [op_type, input_def, attrs](ModelTestBuilder& builder,
std::vector<QuantParams<QType>>& output_qparams) {
ORT_UNUSED_PARAMETER(output_qparams);
QuantParams<QType> input_qparams = GetTestInputQuantParams(input_def);

auto* input = MakeTestInput(builder, input_def);
Expand Down Expand Up @@ -75,8 +83,8 @@ static void RunCPUArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
expected_ep_assignment);
}

// Runs an ArgMax/ArgMin model on the QNN CPU backend. Checks the graph node assignment, and that inference
// outputs for QNN EP and CPU EP match.
// Runs a QDQ ArgMax/ArgMin model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference
// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model).
template <typename QType = uint8_t>
static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
Expand All @@ -90,10 +98,12 @@ static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
provider_options["backend_path"] = "libQnnHtp.so";
#endif

RunQnnModelTest(BuildQDQArgMxxTestCase(op_type, input_def, attrs),
provider_options,
opset,
expected_ep_assignment);
TestQDQModelAccuracy(BuildArgMxxTestCase(op_type, input_def, attrs), // baseline float32 model
BuildQDQArgMxxTestCase<QType>(op_type, input_def, attrs), // QDQ model
provider_options,
opset,
expected_ep_assignment,
1e-5f);
}

//
Expand Down
Loading

0 comments on commit 97a9bd2

Please sign in to comment.