From 97a9bd2a825f64efb2cb786a6baf28bc2204c7f7 Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Fri, 4 Aug 2023 12:15:27 -0700 Subject: [PATCH] [QNN EP] Improve QDQ model accuracy tests (#16916) ### Description - Improves how unit tests measure the accuracy of QDQ models on QNN EP. - Adds tests for ops: Add, Mul, Abs1, And1, Or1, Ceil1, Cos1 1: Not previously supported due to missing node unit handling. ### Motivation and Context The new approach for testing QDQ operator accuracy requires running 3 inferences: 1. float model on CPU EP (baseline) 2. qdq model on CPU EP 3. qdq model on QNN EP The units tests check that running the QDQ model on QNN EP (3) is at least as accurate (+- small tolerance) as running the QDQ model on CPU EP (2). We measure accuracy by comparing to the baseline (1). This is essentially what we care about: is qnn ep as accurate as cpu ep. If not, it is worth investigating as a potential bug. --- .../selectors_actions/shared/utils.cc | 5 +- .../providers/qnn/qnn_execution_provider.cc | 22 +- .../optimizer/graph_transform_test_builder.h | 9 + onnxruntime/test/optimizer/qdq_test_utils.h | 96 --- .../test/providers/qnn/argmaxmin_op_test.cc | 32 +- .../test/providers/qnn/average_pool_test.cc | 170 +++-- .../test/providers/qnn/batch_norm_htp_test.cc | 199 ++++-- onnxruntime/test/providers/qnn/conv_test.cc | 183 +++--- .../test/providers/qnn/gather_op_htp_test.cc | 130 ++-- .../providers/qnn/instance_norm_htp_test.cc | 122 ++-- .../test/providers/qnn/layer_norm_test.cc | 6 +- .../providers/qnn/leakyrelu_op_htp_test.cc | 71 +- onnxruntime/test/providers/qnn/lrn_op_test.cc | 97 +-- .../test/providers/qnn/matmul_test.cpp | 159 ++--- .../test/providers/qnn/max_pool_test.cpp | 343 ++++------ .../test/providers/qnn/qnn_test_utils.cc | 78 +++ .../test/providers/qnn/qnn_test_utils.h | 286 +++++++- .../test/providers/qnn/reduce_op_cpu_test.cc | 225 ------- .../test/providers/qnn/reduce_op_htp_test.cc | 256 -------- .../test/providers/qnn/reduce_op_test.cc | 618 ++++++++++++++++++ onnxruntime/test/providers/qnn/resize_test.cc | 213 +++--- .../test/providers/qnn/simple_op_htp_test.cc | 419 ++++++++---- onnxruntime/test/util/include/test_utils.h | 6 + onnxruntime/test/util/test_utils.cc | 78 ++- 24 files changed, 2234 insertions(+), 1589 deletions(-) delete mode 100644 onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc delete mode 100644 onnxruntime/test/providers/qnn/reduce_op_htp_test.cc create mode 100644 onnxruntime/test/providers/qnn/reduce_op_test.cc diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc index 4f24fa26d8896..5dcf27c9b5d2e 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc @@ -64,10 +64,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() { {"Atan", {}}, {"Asin", {}}, {"Sin", {}}, + {"Cos", {}}, {"Sign", {}}, {"Tanh", {}}, {"Exp", {}}, - {"LRN", {}}}; + {"LRN", {}}, + {"Ceil", {}}, + {"Abs", {}}}; } static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() { return {{"Add", {}}, diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 43998084618c0..d80594d8f72c7 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -145,34 +145,28 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp if (it != node_unit_supported_result.cend()) { return it->second; } else { - // quantized required, filter out the non-quantized nodes, filter in the QDQ nodes - auto IsQdqNode = [](const NodeUnit& node_unit) { - if ("QuantizeLinear" == node_unit.OpType() || "DequantizeLinear" == node_unit.OpType()) { - return true; - } else { - return false; - } - }; + const std::string& op_type = node_unit.OpType(); + const bool is_qdq_node = op_type == "QuantizeLinear" || op_type == "DequantizeLinear"; // Is NPU backend, is single node, case by case // Q/DQ nodes -- supported // Transpose nodes -- supported // Cast nodes -- need to call CastOpBuilder::IsOpSupported if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) { - if (IsQdqNode(node_unit)) { // Qnn has Quantize & Dequantize Op + if (is_qdq_node) { // Qnn has Quantize & Dequantize Op LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name(); return true; } // Tranpose only changes the data layout. NPU still supports it. - if ("Transpose" == node_unit.OpType()) { + if ("Transpose" == op_type) { LOGS(logger, VERBOSE) << "Single Transpose node is supported for NPU backend. Node name: " << node_unit.Name(); return true; } - // For Cast, need to call IsOpSupported (below) to validate input and output types. + // For Cast, And, and Or, we need to call IsOpSupported (below) to validate input and output types. // For other single non-qdq nodes, immediately return not supported. - if (node_unit.OpType() != "Cast") { + if (op_type != "Cast" && op_type != "And" && op_type != "Or") { LOGS(logger, WARNING) << "Non-QDQ " << node_unit.OpType() << " operators are not supported on HTP or DSP backends. " << node_unit.OpType() << " node `" << node_unit.Name() << " will not be assigned to QNN EP."; @@ -181,14 +175,14 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp } // Non-NPU backend, quantized model not supported, but a QDQ node encountered - if (!is_npu_backend && IsQdqNode(node_unit)) { + if (!is_npu_backend && is_qdq_node) { LOGS(logger, ERROR) << "QDQ models are only supported on HTP or DSP backends. " << node_unit.OpType() << " node `" << node_unit.Name() << "` will not be assigned to QNN EP."; return false; } bool supported = false; - const auto* op_builder = qnn::GetOpBuilder(node_unit.OpType()); + const auto* op_builder = qnn::GetOpBuilder(op_type); if (op_builder == nullptr) { LOGS(logger, WARNING) << "Operators of type `" << node_unit.OpType() << "` are not supported by QNN EP." << node_unit.OpType() << " node `" << node_unit.Name() diff --git a/onnxruntime/test/optimizer/graph_transform_test_builder.h b/onnxruntime/test/optimizer/graph_transform_test_builder.h index d0be5aa201671..361903c386dd5 100644 --- a/onnxruntime/test/optimizer/graph_transform_test_builder.h +++ b/onnxruntime/test/optimizer/graph_transform_test_builder.h @@ -219,6 +219,15 @@ class ModelTestBuilder { return &graph_.GetOrCreateNodeArg(name, nullptr); } + NodeArg* MakeRandInitializerBool(const std::vector& shape) { + std::vector data_uint8 = rand_gen_.Uniform(shape, 0, 1); + std::vector data; + for (uint8_t x : data_uint8) { + data.push_back(x != 0); + } + return MakeInitializerBool(shape, data); + } + template NodeArg* MakeInitializer(const std::vector& shape, T min, T max) { return MakeInitializer(shape, rand_gen_.Uniform(shape, min, max)); diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h index 62dd322f292f2..7f6865a89e6e6 100644 --- a/onnxruntime/test/optimizer/qdq_test_utils.h +++ b/onnxruntime/test/optimizer/qdq_test_utils.h @@ -91,102 +91,6 @@ GetQDQTestCaseFn BuildQDQConvTransposeTestCase(const std::vector& input }; } -// Creates the following graph: -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// axes (int32, initializer) -> | Gather | -// |_______________________| -// -template -GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector& input_shape, - const std::vector indices, - const std::vector& indices_shape, - int64_t axis) { - return [input_shape, indices, indices_shape, axis](ModelTestBuilder& builder) { - auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, .003f, 1); - - auto* indices_input = builder.MakeInitializer(indices_shape, indices); - - auto* gather_output = builder.MakeIntermediate(); - Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output}); - gather_node.AddAttribute("axis", axis); - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(gather_output, .003f, 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .003f, 1, - final_output); - }; -} - -// Creates the following graph: -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// axes (int32, initializer) -> | Gather | -// |_______________________| -// -template -GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector& input_shape, - const IndicesType indices, - int64_t axis) { - return [input_shape, indices, axis](ModelTestBuilder& builder) { - auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, .003f, 1); - - auto* indices_input = builder.MakeScalarInitializer(indices); - - auto* gather_output = builder.MakeIntermediate(); - Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output}); - gather_node.AddAttribute("axis", axis); - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(gather_output, .003f, 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .003f, 1, - final_output); - }; -} - -// Creates the following graph: -// _______________________ -// | | -// input (f32) -> Q -> DQ -> | LeakyRelu | -> Q -> DQ -> output (f32) -// |_______________________| -// -template -GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector& input_shape) { - return [input_shape](ModelTestBuilder& builder) { - auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, 0.0473f, 137); - - auto* leakyrelu_output = builder.MakeIntermediate(); - Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output}); - leakyrelu_node.AddAttribute("alpha", 0.2f); - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(leakyrelu_output, 0.02696f, 48, - q_output); - - builder.AddDequantizeLinearNode(q_output, 0.02696f, 48, - final_output); - }; -} - template GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector& input_shape, const std::vector& weights_shape) { return [input_shape, weights_shape](ModelTestBuilder& builder) { diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc index 66b53109d7f05..e579e3274e699 100644 --- a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc +++ b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc @@ -20,21 +20,29 @@ static GetTestModelFn BuildArgMxxTestCase(const std::string& op_type, TestInputD const std::vector& attrs) { return [op_type, input_def, attrs](ModelTestBuilder& builder) { auto* input = MakeTestInput(builder, input_def); - auto* output = builder.MakeOutput(); - Node& argm_node = builder.AddNode(op_type, {input}, {output}); + auto* argm_output = builder.MakeIntermediate(); + Node& argm_node = builder.AddNode(op_type, {input}, {argm_output}); for (const auto& attr : attrs) { argm_node.AddAttributeProto(attr); } + + // Add cast to uint32 + auto* output = builder.MakeOutput(); + Node& cast_node = builder.AddNode("Cast", {argm_output}, {output}); + const auto dst_type = ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32; + cast_node.AddAttribute("to", static_cast(dst_type)); }; } // Builds a QDQ model with ArgMin/ArgMax and a Cast to uint32. The quantization parameters are computed from the provided // input definition. template -static GetTestModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef input_def, - const std::vector& attrs) { - return [op_type, input_def, attrs](ModelTestBuilder& builder) { +static GetTestQDQModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef input_def, + const std::vector& attrs) { + return [op_type, input_def, attrs](ModelTestBuilder& builder, + std::vector>& output_qparams) { + ORT_UNUSED_PARAMETER(output_qparams); QuantParams input_qparams = GetTestInputQuantParams(input_def); auto* input = MakeTestInput(builder, input_def); @@ -75,8 +83,8 @@ static void RunCPUArgMxxOpTest(const std::string& op_type, TestInputDef i expected_ep_assignment); } -// Runs an ArgMax/ArgMin model on the QNN CPU backend. Checks the graph node assignment, and that inference -// outputs for QNN EP and CPU EP match. +// Runs a QDQ ArgMax/ArgMin model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference +// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model). template static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef input_def, const std::vector& attrs, @@ -90,10 +98,12 @@ static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef i provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQArgMxxTestCase(op_type, input_def, attrs), - provider_options, - opset, - expected_ep_assignment); + TestQDQModelAccuracy(BuildArgMxxTestCase(op_type, input_def, attrs), // baseline float32 model + BuildQDQArgMxxTestCase(op_type, input_def, attrs), // QDQ model + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc index c501622b5bb61..114802d56cfd3 100644 --- a/onnxruntime/test/providers/qnn/average_pool_test.cc +++ b/onnxruntime/test/providers/qnn/average_pool_test.cc @@ -17,16 +17,15 @@ namespace onnxruntime { namespace test { // Returns a function that creates a graph with a single AveragePool operator. -static GetTestModelFn BuildAveragePoolTestCase(const std::vector& shape, +static GetTestModelFn BuildAveragePoolTestCase(const TestInputDef& input_def, const std::vector& kernel_shape, const std::vector& strides, const std::vector& pads, int64_t count_include_pad, const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, + return [input_def, kernel_shape, strides, pads, count_include_pad, auto_pad](ModelTestBuilder& builder) { - // Random input data - auto input = builder.MakeInput(shape, 0.0f, 10.0f); + auto* input = MakeTestInput(builder, input_def); auto* output = builder.MakeOutput(); Node& pool_node = builder.AddNode("AveragePool", {input}, {output}); @@ -51,26 +50,20 @@ static GetTestModelFn BuildAveragePoolTestCase(const std::vector& shape // Returns a function that creates a graph with a QDQ AveragePool operator. template -GetQDQTestCaseFn BuildAveragePoolQDQTestCase(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - int64_t count_include_pad, - const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, - count_include_pad, auto_pad](ModelTestBuilder& builder) { - float dq_scale = 0.0038f; - float pool_output_scale = 0.0038f; - float q_scale = 0.0038f; - QuantType dq_zp = std::numeric_limits::max() / 2; - QuantType pool_output_zp = std::numeric_limits::max() / 2; - QuantType q_zp = std::numeric_limits::max() / 2; - - auto* input_arg = builder.MakeInput(shape, -1.0f, 1.0f); - auto* output_arg = builder.MakeOutput(); +GetTestQDQModelFn BuildAveragePoolQDQTestCase(const TestInputDef& input_def, + const std::vector& kernel_shape, + const std::vector& strides, + const std::vector& pads, + int64_t count_include_pad, + const std::string& auto_pad = "NOTSET") { + return [input_def, kernel_shape, strides, pads, + count_include_pad, auto_pad](ModelTestBuilder& builder, + std::vector>& output_qparams) { + auto* input_arg = MakeTestInput(builder, input_def); // add QDQ + AveragePool - auto* dq_output = AddQDQNodePair(builder, input_arg, dq_scale, dq_zp); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* dq_output = AddQDQNodePair(builder, input_arg, input_qparams.scale, input_qparams.zero_point); auto* averagepool_output = builder.MakeIntermediate(); Node& pool_node = builder.AddNode("AveragePool", {dq_output}, {averagepool_output}); @@ -90,22 +83,15 @@ GetQDQTestCaseFn BuildAveragePoolQDQTestCase(const std::vector& shape, pool_node.AddAttribute("count_include_pad", count_include_pad); } - // add QDQ output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(averagepool_output, - pool_output_scale, - pool_output_zp, - q_output); - builder.AddDequantizeLinearNode(q_output, - q_scale, - q_zp, - output_arg); + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, averagepool_output, + output_qparams[0].scale, output_qparams[0].zero_point); }; } // Runs an AveragePool model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunAveragePoolOpTest(const std::vector& shape, +static void RunAveragePoolOpTest(const TestInputDef& input_def, const std::vector& kernel_shape, const std::vector& strides, const std::vector& pads, @@ -120,16 +106,16 @@ static void RunAveragePoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildAveragePoolTestCase(shape, kernel_shape, strides, pads, count_include_pad, auto_pad), + RunQnnModelTest(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad), provider_options, opset, expected_ep_assignment); } -// Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that inference -// outputs for QNN and CPU match. +// Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that accuracy +// on QNN EP is at least as good as on CPU EP. template -static void RunQDQAveragePoolOpTest(const std::vector& shape, +static void RunQDQAveragePoolOpTest(const TestInputDef& input_def, const std::vector& kernel_shape, const std::vector& strides, const std::vector& pads, @@ -144,12 +130,13 @@ static void RunQDQAveragePoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildAveragePoolQDQTestCase(shape, kernel_shape, strides, pads, count_include_pad, - auto_pad), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad), + BuildAveragePoolQDQTestCase(input_def, kernel_shape, strides, pads, count_include_pad, + auto_pad), + provider_options, + opset, + expected_ep_assignment, + fp32_abs_err); } // @@ -157,45 +144,45 @@ static void RunQDQAveragePoolOpTest(const std::vector& shape, // // AveragePool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnCPUBackendTests, TestAveragePool_Global) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - 0, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_Global) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {3, 3}, // kernel_shape + {3, 3}, // strides + {0, 0, 0, 0}, // pads + 0, // count_include_pad "NOTSET", ExpectedEPNodeAssignment::All); } // AveragePool that counts padding. -TEST_F(QnnCPUBackendTests, TestAveragePool_CountIncludePad) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {0, 0, 0, 0}, // pads - 1, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_CountIncludePad) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {1, 1}, // kernel_shape + {1, 1}, // strides + {0, 0, 0, 0}, // pads + 1, // count_include_pad "NOTSET", ExpectedEPNodeAssignment::All); } // AveragePool that use auto_pad 'SAME_UPPER'. -TEST_F(QnnCPUBackendTests, TestAveragePool_AutopadSameUpper) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 1, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameUpper) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 1, // count_include_pad "SAME_UPPER", ExpectedEPNodeAssignment::All); } // AveragePool that use auto_pad 'SAME_LOWER'. -TEST_F(QnnCPUBackendTests, TestAveragePool_AutopadSameLower) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 1, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameLower) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 1, // count_include_pad "SAME_LOWER", ExpectedEPNodeAssignment::All); } @@ -206,8 +193,10 @@ TEST_F(QnnCPUBackendTests, TestAveragePool_AutopadSameLower) { // // QDQ AveragePool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnHTPBackendTests, TestAveragePool_Global_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape +TEST_F(QnnHTPBackendTests, AveragePool_Global_HTP) { + std::vector input = {32.1289f, -59.981f, -17.2799f, 62.7263f, 33.6205f, -19.3515f, -54.0113f, 37.5648f, 61.5357f, + -52.5769f, 27.3637f, -9.01382f, -65.5612f, 19.9497f, -47.9228f, 26.9813f, 83.064f, 0.362503f}; + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), {3, 3}, // kernel_shape {3, 3}, // strides {0, 0, 0, 0}, // pads @@ -217,39 +206,48 @@ TEST_F(QnnHTPBackendTests, TestAveragePool_Global_HTP_u8) { } // QDQ AveragePool that counts padding. -TEST_F(QnnHTPBackendTests, TestAveragePool_CountIncludePad_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape +TEST_F(QnnHTPBackendTests, AveragePool_CountIncludePad_HTP_u8) { + std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), {1, 1}, // kernel_shape {1, 1}, // strides {0, 0, 0, 0}, // pads 1, // count_include_pad "NOTSET", ExpectedEPNodeAssignment::All, - 18, 0.00381f); + 18); } // QDQ AveragePool that use auto_pad 'SAME_UPPER'. -TEST_F(QnnHTPBackendTests, TestAveragePool_AutopadSameUpper_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 0, // count_include_pad +TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameUpper_HTP_u8) { + std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 0, // count_include_pad "SAME_UPPER", ExpectedEPNodeAssignment::All, - 18, 0.00381f); + 18); } // QDQ AveragePool that use auto_pad 'SAME_LOWER'. -TEST_F(QnnHTPBackendTests, TestAveragePool_AutopadSameLower_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 0, // count_include_pad +TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameLower_HTP_u8) { + std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 0, // count_include_pad "SAME_LOWER", ExpectedEPNodeAssignment::All, - 18, 0.00381f); + 18); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc index d69ce53d41b5c..9a4021c5563c8 100644 --- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc @@ -15,53 +15,133 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -// Creates the graph: -// _______________________ -// input_u8 -> DQ -> | | -// scale_u8 (initializer) -> DQ -> | | -// bias_u8 (initializer) -> DQ -> | BatchNormalization | -> Q -> output_u8 -// mean_u8 (initializer) -> DQ -> | | -// var_u8 (initializer) -> DQ -> |_______________________| -// -// Currently used to test QNN EP. +// Computes the mean and variance of inputs within a channel. +// Requires an input with rank >= 3 +static void ComputeChannelMeanAndVar(const std::vector& input_data, const std::vector& input_shape, + std::vector& mean_vals, std::vector& var_vals) { + const size_t input_rank = input_shape.size(); + const size_t num_batches = input_shape[0]; + const size_t num_channels = input_shape[1]; + + size_t batch_stride = 1; + for (size_t i = 1; i < input_rank; i++) { + batch_stride *= input_shape[i]; + } + const size_t channel_stride = batch_stride / num_channels; + + assert(mean_vals.size() == num_channels); + assert(var_vals.size() == num_channels); + for (size_t i = 0; i < num_channels; i++) { + mean_vals[i] = 0.0f; + var_vals[i] = 0.0f; + } + + // Compute running sum of elements within each channel. The running sum is stored in the mean_vals array directly. + for (size_t b = 0; b < num_batches; b++) { + const size_t batch_start = b * batch_stride; + + for (size_t c = 0; c < num_channels; c++) { + const size_t chan_start = batch_start + (c * channel_stride); + + for (size_t i = chan_start; i < chan_start + channel_stride; i++) { + mean_vals[c] += input_data[i]; + } + } + } + + // Divide sums by the number of elements in a channel to get the mean. + for (size_t c = 0; c < num_channels; c++) { + mean_vals[c] /= static_cast(num_batches * channel_stride); + } + + // Compute running sum of deviations from mean within each channel. The running sum is stored in the var_vals array directly. + for (size_t b = 0; b < num_batches; b++) { + const size_t batch_start = b * batch_stride; + + for (size_t c = 0; c < num_channels; c++) { + const size_t chan_start = batch_start + (c * channel_stride); + + for (size_t i = chan_start; i < chan_start + channel_stride; i++) { + const float deviation = input_data[i] - mean_vals[c]; + var_vals[c] += (deviation * deviation); + } + } + } + + // Divide sums by the number of elements in a channel to get the variance. + for (size_t c = 0; c < num_channels; c++) { + var_vals[c] /= static_cast(num_batches * channel_stride); + } +} + +static GetTestModelFn BuildBatchNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def) { + ORT_ENFORCE(input_def.IsRawData()); // Need raw data to compute mean and variance inputs. + ORT_ENFORCE(input_def.GetShape().size() > 2); // Need at least rank 3 data for convenience. + + return [input_def, scale_def, bias_def](ModelTestBuilder& builder) { + const auto& input_shape = input_def.GetShape(); + const auto& input_data = input_def.GetRawData(); + const int64_t num_channels = input_shape[1]; + + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* scale = MakeTestInput(builder, scale_def); + NodeArg* bias = MakeTestInput(builder, bias_def); + + std::vector mean_vals(num_channels); + std::vector var_vals(num_channels); + ComputeChannelMeanAndVar(input_data, input_shape, mean_vals, var_vals); + + NodeArg* mean = builder.MakeInitializer({num_channels}, mean_vals); + NodeArg* var = builder.MakeInitializer({num_channels}, var_vals); + NodeArg* output = builder.MakeOutput(); + builder.AddNode("BatchNormalization", {input, scale, bias, mean, var}, {output}); + }; +} + template -GetQDQTestCaseFn BuildQDQBatchNormTestCase(const std::vector& input_shape) { - return [input_shape](ModelTestBuilder& builder) { +GetTestQDQModelFn BuildQDQBatchNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def) { + ORT_ENFORCE(input_def.IsRawData()); // Need raw data to compute mean and variance inputs. + ORT_ENFORCE(input_def.GetShape().size() > 2); // Need at least rank 3 data for convenience. + + return [input_def, scale_def, bias_def](ModelTestBuilder& builder, + std::vector>& output_qparams) { + const auto& input_shape = input_def.GetShape(); + const auto& input_data = input_def.GetRawData(); const int64_t num_channels = input_shape[1]; - const InputQType quant_zero_point = 0; - const float quant_scale = 1.0f; - auto* input = builder.MakeInput(input_shape, std::numeric_limits::min(), - std::numeric_limits::max()); - auto* dq_input = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input, 0.0039f, quant_zero_point, dq_input); + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); - auto* dq_scale_output = builder.MakeIntermediate(); - auto* scale = builder.MakeInitializer({num_channels}, static_cast(1), static_cast(127)); - builder.AddDequantizeLinearNode(scale, 0.0028f, quant_zero_point, dq_scale_output); + NodeArg* scale = MakeTestInput(builder, scale_def); + QuantParams scale_qparams = GetTestInputQuantParams(scale_def); + NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); - auto* dq_bias_output = builder.MakeIntermediate(); - auto* bias = builder.MakeInitializer({num_channels}, std::vector(num_channels)); - builder.AddDequantizeLinearNode(bias, quant_scale, quant_zero_point, dq_bias_output); + NodeArg* bias = MakeTestInput(builder, bias_def); + QuantParams bias_qparams = GetTestInputQuantParams(bias_def); + NodeArg* bias_qdq = AddQDQNodePair(builder, bias, bias_qparams.scale, bias_qparams.zero_point); - auto* dq_mean_output = builder.MakeIntermediate(); - auto* mean = builder.MakeInitializer({num_channels}, std::vector(num_channels)); - builder.AddDequantizeLinearNode(mean, quant_scale, quant_zero_point, dq_mean_output); + std::vector mean_vals(num_channels); + std::vector var_vals(num_channels); + ComputeChannelMeanAndVar(input_data, input_shape, mean_vals, var_vals); - auto* dq_var_output = builder.MakeIntermediate(); - auto* var = builder.MakeInitializer({num_channels}, std::vector(num_channels, 255)); - builder.AddDequantizeLinearNode(var, 0.003921f, 0, dq_var_output); + NodeArg* mean = builder.MakeInitializer({num_channels}, mean_vals); + QuantParams mean_qparams = GetDataQuantParams(mean_vals); + NodeArg* mean_qdq = AddQDQNodePair(builder, mean, mean_qparams.scale, mean_qparams.zero_point); - auto* batchnorm_output = builder.MakeIntermediate(); - builder.AddNode("BatchNormalization", {dq_input, dq_scale_output, dq_bias_output, dq_mean_output, dq_var_output}, {batchnorm_output}); + NodeArg* var = builder.MakeInitializer({num_channels}, var_vals); + QuantParams var_qparams = GetDataQuantParams(var_vals); + NodeArg* var_qdq = AddQDQNodePair(builder, var, var_qparams.scale, var_qparams.zero_point); - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(batchnorm_output, 0.00377f, quant_zero_point, q_output); + auto* batchnorm_output = builder.MakeIntermediate(); + builder.AddNode("BatchNormalization", {input_qdq, scale_qdq, bias_qdq, mean_qdq, var_qdq}, + {batchnorm_output}); - auto* final_output = builder.MakeOutput(); - builder.AddDequantizeLinearNode(q_output, 0.00377f, - quant_zero_point, - final_output); + AddQDQNodePairWithOutputAsGraphOutput(builder, batchnorm_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } @@ -72,7 +152,9 @@ GetQDQTestCaseFn BuildQDQBatchNormTestCase(const std::vector& input_sha * \param input_shape The input's shape. * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). */ -static void RunBatchNormQDQTest(const std::vector& input_shape, +static void RunBatchNormQDQTest(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) @@ -82,28 +164,49 @@ static void RunBatchNormQDQTest(const std::vector& input_shape, #endif // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildQDQBatchNormTestCase(input_shape), - provider_options, - 11, - expected_ep_assignment); + TestQDQModelAccuracy(BuildBatchNormTestCase(input_def, scale_def, bias_def), + BuildQDQBatchNormTestCase(input_def, scale_def, bias_def), + provider_options, + 11, + expected_ep_assignment, + 1e-5f); } +// TODO: FIX TRANSLATION!!! // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQBatchNorm1D) { - RunBatchNormQDQTest({1, 2, 3}, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm1D) { + constexpr int64_t num_channels = 2; + + RunBatchNormQDQTest(TestInputDef({1, num_channels, 3}, false, {-5.0f, -4.0f, -3.0f, 0.0f, 2.0f, 5.0f}), // Input data + TestInputDef({num_channels}, true, {1.0f, 2.0f}), // Scale initializer + TestInputDef({num_channels}, true, {1.1f, 2.1f}), // Bias initializer + ExpectedEPNodeAssignment::All); } +// TODO: FIX TRANSLATION!!! // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit. // Use an input of rank 4. -TEST_F(QnnHTPBackendTests, TestQDQBatchNorm2D) { - RunBatchNormQDQTest({2, 3, 4, 5}, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm2D) { + constexpr int64_t num_channels = 2; + std::vector input_data = {-8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 1.1f, 3.3f, 8.0f, + -7.0f, -5.0f, -3.0f, -1.0f, 0.0f, 2.1f, 4.3f, 7.0f}; + + RunBatchNormQDQTest(TestInputDef({2, num_channels, 2, 2}, false, input_data), // Input data + TestInputDef({num_channels}, true, {1.0f, 2.0f}), // Scale initializer + TestInputDef({num_channels}, true, {1.1f, 2.1f}), // Bias initializer + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit. // Use an input of rank 5. QNN BatchNormalization doesn't support 5D on HTP -TEST_F(QnnHTPBackendTests, TestQDQBatchNorm3D) { - RunBatchNormQDQTest({1, 2, 3, 4, 5}, ExpectedEPNodeAssignment::None); +TEST_F(QnnHTPBackendTests, BatchNorm3D) { + constexpr int64_t num_channels = 2; + constexpr int64_t num_elems = 1 * num_channels * 3 * 4 * 5; + RunBatchNormQDQTest(TestInputDef({1, num_channels, 3, 4, 5}, false, std::vector(num_elems)), // Input data (all zeros) + TestInputDef({num_channels}, true, {1.0f, 2.0f}), // Scale initializer + TestInputDef({num_channels}, true, {1.1f, 2.1f}), // Bias initializer + ExpectedEPNodeAssignment::None); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc index ddaf7bbf59ad7..147c1dda13e66 100644 --- a/onnxruntime/test/providers/qnn/conv_test.cc +++ b/onnxruntime/test/providers/qnn/conv_test.cc @@ -142,65 +142,36 @@ static void RunCPUConvOpTest(const std::string& conv_op_type, const TestInputDef // Creates a graph with a single Q/DQ Conv operator. Used for testing HTP backend. template -static GetTestModelFn BuildQDQConvTestCase(const std::string& conv_op_type, const TestInputDef& input_def, - const TestInputDef& weights_def, - const TestInputDef& bias_def, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - const std::string& auto_pad = "NOTSET") { - return [conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, auto_pad](ModelTestBuilder& builder) { - auto* output = builder.MakeOutput(); - - using InputQLimits = std::numeric_limits; - - const float input_scale = 0.004f; - const float weight_scale = 0.004f; - const InputQType io_zp = (InputQLimits::min() + InputQLimits::max()) / 2 + 1; - +static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& conv_op_type, const TestInputDef& input_def, + const TestInputDef& weights_def, + const TestInputDef& bias_def, + const std::vector& strides, + const std::vector& pads, + const std::vector& dilations, + const std::string& auto_pad = "NOTSET") { + return [conv_op_type, input_def, weights_def, bias_def, strides, pads, + dilations, auto_pad](ModelTestBuilder& builder, + std::vector>& output_qparams) { std::vector conv_inputs; // input -> Q/DQ -> auto* input = MakeTestInput(builder, input_def); - auto* input_qdq = AddQDQNodePair(builder, input, input_scale, io_zp); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); conv_inputs.push_back(input_qdq); // weights -> Q/DQ -> auto* weights = MakeTestInput(builder, weights_def); - auto* weights_qdq = AddQDQNodePair(builder, weights, weight_scale, io_zp); + QuantParams weights_qparams = GetTestInputQuantParams(weights_def); + auto* weights_qdq = AddQDQNodePair(builder, weights, weights_qparams.scale, weights_qparams.zero_point); conv_inputs.push_back(weights_qdq); // bias -> if (!bias_def.GetShape().empty()) { - NodeArg* bias_int32 = nullptr; - const float bias_scale = input_scale * weight_scale; // Taken from python quantization tool: onnx_quantizer.py::quantize_bias_static() - - // Bias must be int32 to be detected as a QDQ node unit. - // We must quantize the data. - if (bias_def.IsRandomData()) { - // Create random initializer def that is quantized to int32 - const auto& rand_info = bias_def.GetRandomDataInfo(); - TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), static_cast(rand_info.min / bias_scale), - static_cast(rand_info.max / bias_scale)); - bias_int32 = MakeTestInput(builder, bias_int32_def); - } else { - assert(bias_def.IsRawData()); - // Create raw data initializer def that is quantized to int32 - const auto& bias_f32_raw = bias_def.GetRawData(); - const size_t num_elems = bias_f32_raw.size(); - - std::vector bias_int32_raw(num_elems); - for (size_t i = 0; i < num_elems; i++) { - bias_int32_raw[i] = static_cast(bias_f32_raw[i] / bias_scale); - } - - TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), bias_int32_raw); - bias_int32 = MakeTestInput(builder, bias_int32_def); - } - - auto* bias = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(bias_int32, bias_scale, 0, bias); - conv_inputs.push_back(bias); + // Bias requirement taken from python quantization tool: onnx_quantizer.py::quantize_bias_static() + const float bias_scale = input_qparams.scale * weights_qparams.scale; + + conv_inputs.push_back(MakeTestQDQBiasInput(builder, bias_def, bias_scale)); } auto* conv_output = builder.MakeIntermediate(); @@ -218,9 +189,7 @@ static GetTestModelFn BuildQDQConvTestCase(const std::string& conv_op_type, cons conv_node.AddAttribute("dilations", dilations); } - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(conv_output, input_scale, io_zp, q_output); - builder.AddDequantizeLinearNode(q_output, input_scale, io_zp, output); + AddQDQNodePairWithOutputAsGraphOutput(builder, conv_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } @@ -245,18 +214,19 @@ static void RunHTPConvOpTest(const std::string& conv_op_type, const TestInputDef provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQConvTestCase(conv_op_type, input_def, weights_def, bias_def, - strides, pads, dilations, auto_pad), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, auto_pad), + BuildQDQConvTestCase(conv_op_type, input_def, weights_def, bias_def, + strides, pads, dilations, auto_pad), + provider_options, + opset, + expected_ep_assignment, + fp32_abs_err); } // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as a dynamic input. // TODO: Segfaults when calling graphFinalize(). -TEST_F(QnnCPUBackendTests, DISABLED_TestCPUConvf32_dynamic_bias) { +TEST_F(QnnCPUBackendTests, DISABLED_Convf32_dynamic_bias) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, true, 0.0f, 1.0f), // Random static weights @@ -270,7 +240,7 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestCPUConvf32_dynamic_bias) { // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as an initializer. -TEST_F(QnnCPUBackendTests, TestCPUConvf32_bias_initializer) { +TEST_F(QnnCPUBackendTests, Convf32_bias_initializer) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, true, 0.0f, 1.0f), // Random static weights @@ -283,7 +253,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_bias_initializer) { } // Tests Conv's auto_pad value "SAME_UPPER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadUpper) { +TEST_F(QnnCPUBackendTests, Convf32_AutoPadUpper) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, true, -1.0f, 1.0f), // Random static weights @@ -296,7 +266,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadUpper) { } // Tests ConvTranspose's auto_pad value "SAME_UPPER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadUpper) { +TEST_F(QnnCPUBackendTests, ConvTransposef32_AutoPadUpper) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({1, 2, 2, 2}, true, -1.0f, 1.0f), // Random static weights @@ -309,7 +279,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadUpper) { } // Tests Conv's auto_pad value "SAME_LOWER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadLower) { +TEST_F(QnnCPUBackendTests, Convf32_AutoPadLower) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, false, -1.0f, 1.0f), // Random dynamic weights @@ -322,7 +292,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadLower) { } // Tests ConvTranspose's auto_pad value "SAME_LOWER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadLower) { +TEST_F(QnnCPUBackendTests, ConvTransposef32_AutoPadLower) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({1, 2, 2, 2}, false, -1.0f, 1.0f), // Random dynamic weights @@ -335,7 +305,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadLower) { } // large input,output, pads -TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input1_pad_bias_initializer) { +TEST_F(QnnCPUBackendTests, Convf32_large_input1_pad_bias_initializer) { RunCPUConvOpTest("Conv", TestInputDef({1, 3, 60, 452}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({16, 3, 3, 3}, true, 0.0f, 1.0f), // Random dynamic weights @@ -349,7 +319,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input1_pad_bias_initializer) { 1e-4f); } -TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input2_nopad_bias_initializer) { +TEST_F(QnnCPUBackendTests, Convf32_large_input2_nopad_bias_initializer) { #if defined(_WIN32) // Tolerance needs to be > 1.52588e-05 on Windows x64 // TODO: Investigate why @@ -372,7 +342,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input2_nopad_bias_initializer) { } // Test 1D Conv with static weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_StaticWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, Conv1Df32_StaticWeights_DefaultBias) { RunCPUConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights @@ -385,7 +355,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_StaticWeights_DefaultBias) { } // Test 1D Conv with dynamic weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_DynamicWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, Conv1Df32_DynamicWeights_DefaultBias) { RunCPUConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({1, 2, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights @@ -398,7 +368,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_DynamicWeights_DefaultBias) { } // Test 1D ConvTranspose with static weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_StaticWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, ConvTranspose1Df32_StaticWeights_DefaultBias) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights @@ -411,7 +381,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_StaticWeights_DefaultBias) } // Test 1D ConvTranspose with dynamic weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_DynamicWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, ConvTranspose1Df32_DynamicWeights_DefaultBias) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({2, 1, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights @@ -427,7 +397,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_DynamicWeights_DefaultBias) // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as a dynamic input. -TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_bias_dynamic_input) { +TEST_F(QnnHTPBackendTests, ConvU8S32_bias_dynamic_input) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static input @@ -441,35 +411,35 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_bias_dynamic_input) { // Test that dynamic weights with default bias works for Conv. This was previously not working // on older versions of QNN sdk. -TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_DynamicWeight_NoBias) { +TEST_F(QnnHTPBackendTests, ConvU8S32_DynamicWeight_NoBias) { RunHTPConvOpTest("Conv", - TestInputDef({1, 3, 32, 32}, false, 0.0f, 10.0f), // Random dynamic input - TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights - TestInputDef(), // Default bias - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations + TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Random dynamic input + TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights + TestInputDef(), // Default bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Test that dynamic weights with default bias works for ConvTranspose. This was previously not working // on older versions of QNN sdk. -TEST_F(QnnHTPBackendTests, TestQDQConvTransposeU8S32_DynamicWeight_NoBias) { +TEST_F(QnnHTPBackendTests, ConvTransposeU8S32_DynamicWeight_NoBias) { RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 3, 32, 32}, false, 0.0f, 100.0f), // Random dynamic input - TestInputDef({3, 1, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights - TestInputDef(), // Default bias - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations + TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Random dynamic input + TestInputDef({3, 1, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights + TestInputDef(), // Default bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as an initializer. -TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_bias_initializer) { +TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static weight @@ -482,7 +452,7 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_bias_initializer) { } // Tests 1D Conv with bias as an initializer. -TEST_F(QnnHTPBackendTests, TestQDQConv1DU8S32_bias_initializer) { +TEST_F(QnnHTPBackendTests, Conv1DU8S32_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -495,7 +465,7 @@ TEST_F(QnnHTPBackendTests, TestQDQConv1DU8S32_bias_initializer) { } // Tests 1D ConvTranspose with bias as an initializer. -TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8S32_bias_initializer) { +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8S32_bias_initializer) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -508,7 +478,7 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8S32_bias_initializer) { } // Tests auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_AutoPadUpper) { +TEST_F(QnnHTPBackendTests, ConvU8S32_AutoPadUpper) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights @@ -518,12 +488,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_AutoPadUpper) { {1, 1}, // dilations "SAME_UPPER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests Conv1d auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadUpper) { +TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_AutoPadUpper) { RunHTPConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -533,12 +502,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadUpper) { {1}, // dilations "SAME_UPPER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests TransposeConv1d auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadUpper) { +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_AutoPadUpper) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -548,12 +516,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadUpper) { {1}, // dilations "SAME_UPPER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests Conv's auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, ConvU8U8S32_AutoPadLower) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights @@ -563,12 +530,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_AutoPadLower) { {1, 1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests ConvTranspose's auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvTransposeU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, ConvTransposeU8U8S32_AutoPadLower) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights @@ -578,12 +544,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTransposeU8U8S32_AutoPadLower) { {1, 1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests Conv1d auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_AutoPadLower) { RunHTPConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -593,12 +558,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadLower) { {1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests ConvTranspose 1d auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_AutoPadLower) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -608,12 +572,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadLower) { {1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // TODO: re-enable tests once HTP issues are resolved -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8U8S32_large_input1_padding_bias_initializer) { +TEST_F(QnnHTPBackendTests, DISABLED_ConvU8U8S32_large_input1_padding_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 3, 60, 452}, false, 0.f, 10.f), // Dynamic input TestInputDef({16, 3, 3, 3}, true, -1.f, 1.f), // Static weights @@ -625,7 +588,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8U8S32_large_input1_padding_bias ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8S32_large_input2_bias_initializer) { +TEST_F(QnnHTPBackendTests, DISABLED_ConvU8S32_large_input2_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 128, 8, 56}, false, 0.f, 10.f), // Dynamic input TestInputDef({32, 128, 1, 1}, true, -1.f, 1.f), // Random static weights @@ -638,7 +601,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8S32_large_input2_bias_initializ } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8U8S32_LargeInput_Dilations_Pads) { +TEST_F(QnnHTPBackendTests, DISABLED_ConvU8U8S32_LargeInput_Dilations_Pads) { RunHTPConvOpTest("Conv", TestInputDef({1, 3, 768, 1152}, false, 0.f, 10.f), // Dynamic input TestInputDef({64, 3, 7, 7}, true, -1.f, 1.f), // Random static weights diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc index 3571cdff9b6cc..d2ca9d8ff71e0 100644 --- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc @@ -6,7 +6,6 @@ #include #include "core/graph/graph.h" -#include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" #include "gtest/gtest.h" @@ -15,17 +14,47 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -/** - * Runs a Gather op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param opset The opset version. - * \param scalar_indices whether the incidices input is scalar or not. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - */ +// Function that builds a float model with a Gather op. +template +static GetTestModelFn BuildGatherOpTestCase(const TestInputDef& input_def, + const TestInputDef& indices_def, + int64_t axis = 0) { + return [input_def, indices_def, axis](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* indices = MakeTestInput(builder, indices_def); + NodeArg* output = builder.MakeOutput(); + + Node& gather_node = builder.AddNode("Gather", {input, indices}, {output}); + gather_node.AddAttribute("axis", axis); + }; +} + +// Function that builds a QDQ model with a Gather op. +template +static GetTestQDQModelFn BuildQDQGatherOpTestCase(const TestInputDef& input_def, + const TestInputDef& indices_def, + int64_t axis = 0) { + return [input_def, indices_def, axis](ModelTestBuilder& builder, + std::vector>& output_qparams) { + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + NodeArg* indices = MakeTestInput(builder, indices_def); + + NodeArg* gather_output = builder.MakeIntermediate(); + Node& gather_node = builder.AddNode("Gather", {input_qdq, indices}, {gather_output}); + gather_node.AddAttribute("axis", axis); + + AddQDQNodePairWithOutputAsGraphOutput(builder, gather_output, output_qparams[0].scale, output_qparams[0].zero_point); + }; +} + +// Test the accuracy of a QDQ Gather model on QNN EP. Checks if the QDQ model on QNN EP as accurate as the QDQ model on CPU EP +// (compared to float32 model). template -static void RunGatherOpQDQTest(int opset, bool scalar_indices = false, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) { +static void RunQDQGatherOpTest(const TestInputDef& input_def, const TestInputDef& indices_def, + int64_t axis, int opset, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -33,54 +62,69 @@ static void RunGatherOpQDQTest(int opset, bool scalar_indices = false, provider_options["backend_path"] = "libQnnHtp.so"; #endif - if (scalar_indices) { - RunQnnModelTest(BuildQDQGatherOpScalarIndicesTestCase({2, 3, 4}, // input shape - 1, // indices - 1), // axis - provider_options, - opset, - expected_ep_assignment); - } else { - RunQnnModelTest(BuildQDQGatherOpTestCase({2, 3, 4}, // input shape - std::vector{1}, // indices - {1}, // indices_shape - 1), // axis - provider_options, - opset, - expected_ep_assignment); - } + TestQDQModelAccuracy(BuildGatherOpTestCase(input_def, indices_def, axis), + BuildQDQGatherOpTestCase(input_def, indices_def, axis), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpU8) { - RunGatherOpQDQTest(11); +// Static int64 indices with default axis. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt64_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, true, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::All); +} + +// Tests that dynamic int64 indices are not supported on HTP backend. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt64_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, false, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::None); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses int8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpI8) { - RunGatherOpQDQTest(11); +// Static int32 indices with default axis. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, true, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::All); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpScalarIndicesU8) { - RunGatherOpQDQTest(11, true); +// Dynamic int32 indices with default axis. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt32_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, false, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::All); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses int8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpScalarIndicesI8) { - RunGatherOpQDQTest(11, true); +// Static int32 indices with axis = 1 +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_Axis1) { + RunQDQGatherOpTest(TestInputDef({3, 3}, false, {1.0f, 1.2f, 1.9f, 2.3f, 3.4f, 3.9f, 4.5f, 5.7f, 5.9f}), + TestInputDef({1, 2}, true, {0, 2}), + 1, + 13, + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc index 3846a2868a895..683c4d49fa99d 100644 --- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc @@ -16,47 +16,56 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -// Creates the graph: -// _______________________ -// input_u8 -> DQ -> | | -> Q -> output_u8 -// scale_u8 (initializer) -> DQ -> | InstanceNormalization | -// bias_u8 (initializer) -> DQ -> |_______________________| -// -// Currently used to test QNN EP. -template -GetQDQTestCaseFn BuildQDQInstanceNormTestCase(const TestInputDef& input_def, - const TestInputDef& scale_def, - const TestInputDef& bias_def, - const std::vector& attrs) { +// Function that builds a float32 model with an InstanceNormalization operator. +GetTestModelFn BuildInstanceNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, + const std::vector& attrs) { return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder) { - const QuantType quant_zero_point = 0; - const float quant_scale = 1.0f; - - auto* dq_scale_output = builder.MakeIntermediate(); - auto* scale = MakeTestInput(builder, scale_def); - builder.AddDequantizeLinearNode(scale, quant_scale, quant_zero_point, dq_scale_output); + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* scale = MakeTestInput(builder, scale_def); + NodeArg* bias = MakeTestInput(builder, bias_def); - // Add bias (initializer) -> DQ -> - auto* dq_bias_output = builder.MakeIntermediate(); - auto* bias = MakeTestInput(builder, bias_def); - builder.AddDequantizeLinearNode(bias, 1.0f, 0, dq_bias_output); + NodeArg* output = builder.MakeOutput(); + Node& op_node = builder.AddNode("InstanceNormalization", {input, scale, bias}, {output}); - // Add input_u8 -> DQ -> - auto* input_u8 = MakeTestInput(builder, input_def); - auto* dq_input_output = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input_u8, quant_scale, quant_zero_point, dq_input_output); + for (const auto& attr : attrs) { + op_node.AddAttributeProto(attr); + } + }; +} - // Add dq_input_output -> InstanceNormalization -> +// Function that builds a QDQ model with an InstanceNormalization operator. +template +static GetTestQDQModelFn BuildQDQInstanceNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, + const std::vector& attrs) { + return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input => Q => DQ => + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // scale => Q => DQ => + NodeArg* scale = MakeTestInput(builder, scale_def); + QuantParams scale_qparams = GetTestInputQuantParams(scale_def); + NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); + + // bias (as int32) => DQ => + NodeArg* bias_qdq = MakeTestQDQBiasInput(builder, bias_def, input_qparams.scale * scale_qparams.scale); + + // InstanceNormalization operator. auto* instance_norm_output = builder.MakeIntermediate(); - Node& inst_norm_node = builder.AddNode("InstanceNormalization", {dq_input_output, dq_scale_output, dq_bias_output}, + Node& inst_norm_node = builder.AddNode("InstanceNormalization", {input_qdq, scale_qdq, bias_qdq}, {instance_norm_output}); for (const auto& attr : attrs) { inst_norm_node.AddAttributeProto(attr); } // Add instance_norm_output -> Q -> output_u8 - auto* output_u8 = builder.MakeOutput(); - builder.AddQuantizeLinearNode(instance_norm_output, quant_scale, quant_zero_point, output_u8); + AddQDQNodePairWithOutputAsGraphOutput(builder, instance_norm_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } @@ -71,9 +80,9 @@ GetQDQTestCaseFn BuildQDQInstanceNormTestCase(const TestInputDef& inp * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). */ template -static void RunInstanceNormQDQTest(const TestInputDef& input_def, - const TestInputDef& scale_def, - const TestInputDef& bias_def, +static void RunInstanceNormQDQTest(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; @@ -84,50 +93,39 @@ static void RunInstanceNormQDQTest(const TestInputDef& input_def, #endif // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildQDQInstanceNormTestCase(input_def, scale_def, bias_def, attrs), - provider_options, - 18, - expected_ep_assignment); + TestQDQModelAccuracy(BuildInstanceNormTestCase(input_def, scale_def, bias_def, attrs), + BuildQDQInstanceNormTestCase(input_def, scale_def, bias_def, attrs), + provider_options, + 18, + expected_ep_assignment, + 1e-5f); } // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit. // Use an input of rank 4. -TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3}, false, 0, 255), - TestInputDef({2}, true, 0, 127), - TestInputDef({2}, true, 0, 10), +TEST_F(QnnHTPBackendTests, InstanceNormU8) { + RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), + TestInputDef({2}, true, -2.0f, 2.0f), + TestInputDef({2}, true, -3.0f, 3.0f), {}, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8Rank3) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3}, false, {6, 4, 2, 6, 8, 2}), - TestInputDef({2}, true, {1, 2}), - TestInputDef({2}, true, {1, 3}), - {}, - ExpectedEPNodeAssignment::All); -} - -// TODO: This test now fails in QNN SDK version 2.12.0 (windows arm64 and linux x86_64). -// This worked in QNN SDK version 2.10.0. Need to determine the severity of this inaccuracy. -// -// Exepcted output: 2 6 2 42 42 0 -// Actual output: 2 6 2 43 43 0 -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQInstanceNormU8Rank3_QnnSdk_2_12_Regression) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3}, false, {3, 4, 3, 9, 9, 8}), - TestInputDef({2}, true, {2, 57}), - TestInputDef({2}, true, {3, 2}), +TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3) { + RunInstanceNormQDQTest(TestInputDef({1, 2, 3}, false, {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f}), + TestInputDef({2}, true, {1.0f, 2.0f}), + TestInputDef({2}, true, {1.0f, 3.0f}), {}, ExpectedEPNodeAssignment::All); } // Check that QNN InstanceNorm operator does not handle inputs with rank > 4. -TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8Rank5) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3, 3}, false, 0, 255), - TestInputDef({2}, true, 0, 127), - TestInputDef({2}, true, 0, 10), +TEST_F(QnnHTPBackendTests, InstanceNormU8Rank5) { + RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3, 3}, false, -10.0f, 10.0f), + TestInputDef({2}, true, -2.0f, 2.0f), + TestInputDef({2}, true, -3.0f, 3.0f), {}, ExpectedEPNodeAssignment::None); } diff --git a/onnxruntime/test/providers/qnn/layer_norm_test.cc b/onnxruntime/test/providers/qnn/layer_norm_test.cc index d9512d16a1f28..3b73a6bf800a3 100644 --- a/onnxruntime/test/providers/qnn/layer_norm_test.cc +++ b/onnxruntime/test/providers/qnn/layer_norm_test.cc @@ -113,6 +113,7 @@ static void RunLayerNormQDQTest(const std::vector& input_shape, #endif // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. + // TODO: Use new QDQ accuracy testing approach (see TestQDQModelAccuracy) RunQnnModelTest(BuildQDQLayerNormTestCase(input_shape, scale_shape, axis_value), provider_options, 11, @@ -122,11 +123,14 @@ static void RunLayerNormQDQTest(const std::vector& input_shape, // Check that QNN compiles DQ -> LayerNormalization -> Q as a single unit. // Use an input of rank 3. // Failed QNN op validation: QnnDsp Param[0] has incorrect Value 3 +// TODO: Use new QDQ accuracy testing approach (see TestQDQModelAccuracy) TEST_F(QnnHTPBackendTests, TestQDQLayerNorm1DAxis0) { RunLayerNormQDQTest({1, 2, 3}, {1, 2, 3}, ExpectedEPNodeAssignment::None); } // Failed QNN FinalizeGraphs: QnnDsp Failed to finalize graph (id: 1) with err 1002 +// +// TODO: Use new QDQ accuracy testing approach (see TestQDQModelAccuracy) TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis2) { RunLayerNormQDQTest({1, 2, 3}, {3}, ExpectedEPNodeAssignment::All, -1); } @@ -136,4 +140,4 @@ TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis2) { } // namespace test } // namespace onnxruntime -#endif \ No newline at end of file +#endif diff --git a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc index 489ac1924eb8e..772476cb0d245 100644 --- a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc @@ -15,17 +15,44 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -/** - * Runs a LeakyRelu op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param op_type The LeakyRelu op type (e.g., ReduceSum). - * \param opset The opset version. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - */ +// Creates a function that builds a model with a LeakyRelu operator. +static GetTestModelFn BuildLeakyReluOpTestCase(const TestInputDef& input_def, float alpha) { + return [input_def, alpha](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* output = builder.MakeOutput(); + Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input}, {output}); + leakyrelu_node.AddAttribute("alpha", alpha); + }; +} + +// Creates a function that builds a QDQ model with a LeakyRelu operator. +template +static GetTestQDQModelFn BuildQDQLeakyReluOpTestCase(const TestInputDef& input_def, + float alpha) { + return [input_def, alpha](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input => Q => DQ => + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // LeakryRelu + auto* leakyrelu_output = builder.MakeIntermediate(); + Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq}, {leakyrelu_output}); + leakyrelu_node.AddAttribute("alpha", alpha); + + // => Q => DQ -> final output + AddQDQNodePairWithOutputAsGraphOutput(builder, leakyrelu_output, output_qparams[0].scale, + output_qparams[0].zero_point); + }; +} + +// Checks the accuracy of a QDQ LeakyRelu model by comparing to ORT CPU EP. template -static void RunLeakyReluOpQDQTest(int opset, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) { +static void RunLeakyReluOpQDQTest(const TestInputDef& input_def, + float alpha, + int opset, + ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -33,26 +60,34 @@ static void RunLeakyReluOpQDQTest(int opset, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQLeakyReluOpTestCase({2, 3, 4}), - provider_options, - opset, - expected_ep_assignment); + TestQDQModelAccuracy(BuildLeakyReluOpTestCase(input_def, alpha), + BuildQDQLeakyReluOpTestCase(input_def, alpha), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all // nodes are supported by the QNN EP, and that the inference results match the CPU EP results. // // - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet15) { - RunLeakyReluOpQDQTest(15); +TEST_F(QnnHTPBackendTests, LeakyReluOpSet15) { + RunLeakyReluOpQDQTest(TestInputDef({1, 2, 3}, false, {-40.0f, -20.0f, 0.0f, 10.0f, 30.0f, 40.0f}), + 0.2f, + 15, + ExpectedEPNodeAssignment::All); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all // nodes are supported by the QNN EP, and that the inference results match the CPU EP results. // // - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet16) { - RunLeakyReluOpQDQTest(16); +TEST_F(QnnHTPBackendTests, LeakyReluOpSet16) { + RunLeakyReluOpQDQTest(TestInputDef({1, 2, 3}, false, {-40.0f, -20.0f, 0.0f, 10.0f, 30.0f, 40.0f}), + 0.2f, + 16, + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc index 3b28678bcb0a7..82f7b246aa5e4 100644 --- a/onnxruntime/test/providers/qnn/lrn_op_test.cc +++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc @@ -17,10 +17,10 @@ namespace onnxruntime { namespace test { // Creates a graph with a single LRN operator. Used for testing CPU backend. -static GetTestModelFn BuildLRNTestCase(const std::vector& shape, int64_t size, +static GetTestModelFn BuildLRNTestCase(const TestInputDef& input_def, int64_t size, float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { - return [shape, size, alpha, beta, bias](ModelTestBuilder& builder) { - auto* input = builder.MakeInput(shape, 0.0f, 20.0f); + return [input_def, size, alpha, beta, bias](ModelTestBuilder& builder) { + auto* input = MakeTestInput(builder, input_def); auto* output = builder.MakeOutput(); Node& lrn_node = builder.AddNode("LRN", {input}, {output}); @@ -31,40 +31,34 @@ static GetTestModelFn BuildLRNTestCase(const std::vector& shape, int64_ }; } -// Q/DQ scaled used to build Q/DQ test model. This is a global constant -// because results from HTP backend are off by exactly this amount. -static constexpr float qdq_scale = 0.0038f; - // Creates a graph with a single Q/DQ LRN operator. Used for testing HTP backend. template -static GetTestModelFn BuildQDQLRNTestCase(const std::vector& shape, int64_t size, - float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { - return [shape, size, alpha, beta, bias](ModelTestBuilder& builder) { - const InputQType zero_point = std::numeric_limits::max() / 2; - - auto* input = builder.MakeInput(shape, -1.0f, 1.0f); - auto* output = builder.MakeOutput(); - - // input -> Q -> DQ -> LRN - auto* qdq_output = AddQDQNodePair(builder, input, qdq_scale, zero_point); - auto* lrn_output = builder.MakeIntermediate(); - - Node& lrn_node = builder.AddNode("LRN", {qdq_output}, {lrn_output}); +static GetTestQDQModelFn BuildQDQLRNTestCase(const TestInputDef& input_def, int64_t size, + float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { + return [input_def, size, alpha, beta, bias](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // LRN + NodeArg* lrn_output = builder.MakeIntermediate(); + Node& lrn_node = builder.AddNode("LRN", {input_qdq}, {lrn_output}); lrn_node.AddAttribute("size", size); lrn_node.AddAttribute("alpha", alpha); lrn_node.AddAttribute("beta", beta); lrn_node.AddAttribute("bias", bias); - // -> Q -> DQ -> output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(lrn_output, qdq_scale, zero_point, q_output); - builder.AddDequantizeLinearNode(q_output, qdq_scale, zero_point, output); + // LRN output -> Q -> DQ -> final output + AddQDQNodePairWithOutputAsGraphOutput(builder, lrn_output, output_qparams[0].scale, + output_qparams[0].zero_point); }; } // Runs an LRN model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN EP and CPU EP match. -static void RunCPULRNOpTest(const std::vector& shape, int64_t size, +static void RunCPULRNOpTest(const TestInputDef& input_def, int64_t size, ExpectedEPNodeAssignment expected_ep_assignment, float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f, int opset = 13) { ProviderOptions provider_options; @@ -77,7 +71,7 @@ static void RunCPULRNOpTest(const std::vector& shape, int64_t size, fp32_abs_err = 1.5e-5f; // On linux we need slightly larger tolerance. #endif - RunQnnModelTest(BuildLRNTestCase(shape, size, alpha, beta, bias), + RunQnnModelTest(BuildLRNTestCase(input_def, size, alpha, beta, bias), provider_options, opset, expected_ep_assignment, @@ -87,10 +81,10 @@ static void RunCPULRNOpTest(const std::vector& shape, int64_t size, // Runs an LRN model on the QNN HTP backend. Checks the graph node assignment, and that inference // outputs for QNN EP and CPU EP match. template -static void RunQDQLRNOpTest(const std::vector& shape, int64_t size, +static void RunQDQLRNOpTest(const TestInputDef& input_def, int64_t size, ExpectedEPNodeAssignment expected_ep_assignment, float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f, - int opset = 13, float fp32_abs_err = qdq_scale) { + int opset = 13) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -98,27 +92,34 @@ static void RunQDQLRNOpTest(const std::vector& shape, int64_t size, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQLRNTestCase(shape, size, alpha, beta, bias), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err + 0.0001f); + TestQDQModelAccuracy(BuildLRNTestCase(input_def, size, alpha, beta, bias), + BuildQDQLRNTestCase(input_def, size, alpha, beta, bias), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // // CPU tests: // -TEST_F(QnnCPUBackendTests, TestCPULRNSize3) { - RunCPULRNOpTest({1, 128, 4, 5}, 3, ExpectedEPNodeAssignment::All); +TEST_F(QnnCPUBackendTests, LRNSize3) { + RunCPULRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 3, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestCPULRNSize5) { - RunCPULRNOpTest({1, 128, 4, 5}, 5, ExpectedEPNodeAssignment::All); +TEST_F(QnnCPUBackendTests, LRNSize5) { + RunCPULRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 5, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestCPULRN_size_larger_than_channel) { - RunCPULRNOpTest({1, 128, 4, 5}, 255, ExpectedEPNodeAssignment::All); +TEST_F(QnnCPUBackendTests, LRN_size_larger_than_channel) { + RunCPULRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 255, // Size + ExpectedEPNodeAssignment::All); } #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) @@ -126,16 +127,22 @@ TEST_F(QnnCPUBackendTests, TestCPULRN_size_larger_than_channel) { // HTP tests: // -TEST_F(QnnHTPBackendTests, TestHTPLRNSize3) { - RunQDQLRNOpTest({1, 128, 4, 5}, 3, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, LRNSize3) { + RunQDQLRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 3, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestHTPLRNSize5) { - RunQDQLRNOpTest({1, 128, 4, 5}, 5, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, LRNSize5) { + RunQDQLRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 5, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestHTPLRN_size_larger_than_channel) { - RunQDQLRNOpTest({1, 128, 4, 5}, 255, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, LRN_size_larger_than_channel) { + RunQDQLRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 255, // Size + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp index 5c7a08ae06080..421bdfdaf1bb6 100644 --- a/onnxruntime/test/providers/qnn/matmul_test.cpp +++ b/onnxruntime/test/providers/qnn/matmul_test.cpp @@ -6,7 +6,6 @@ #include #include -#include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" #include "onnx/onnx_pb.h" @@ -17,74 +16,46 @@ namespace onnxruntime { namespace test { // Returns a function that creates a graph with MatMul operator. -static GetTestModelFn BuildMatMulOpTestCase(const std::vector& input1_shape, - const std::vector& input2_shape) { - return [input1_shape, input2_shape](ModelTestBuilder& builder) { - // Random input data - auto input1 = builder.MakeInput(input1_shape, 0.0f, 10.0f); - auto input2 = builder.MakeInput(input2_shape, 0.0f, 10.0f); - - auto* output = builder.MakeOutput(); +static GetTestModelFn BuildMatMulOpTestCase(const TestInputDef& input1_def, + const TestInputDef& input2_def) { + return [input1_def, input2_def](ModelTestBuilder& builder) { + NodeArg* input1 = MakeTestInput(builder, input1_def); + NodeArg* input2 = MakeTestInput(builder, input2_def); + NodeArg* output = builder.MakeOutput(); builder.AddNode("MatMul", {input1, input2}, {output}); }; } -// Returns a function that creates a graph with a QDQ AveragePool operator. +// Returns a function that creates a graph with a QDQ MatMul operator. template -GetQDQTestCaseFn BuildMatMulOpQDQTestCase(const std::vector& input1_shape, - const std::vector& input2_shape) { - return [input1_shape, input2_shape](ModelTestBuilder& builder) { - float pool_output_scale = 0.0038f; - float q_scale = 0.0038f; - QuantType pool_output_zp = std::numeric_limits::max() / 2; - QuantType q_zp = std::numeric_limits::max() / 2; - - auto* input_arg = builder.MakeInput(input1_shape, -1.f, 1.f); - auto* output_arg = builder.MakeOutput(); - - using InputLimits = std::numeric_limits; - - // add QDQ input - auto* q1_output = builder.MakeIntermediate(); - auto* dq1_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(input_arg, - pool_output_scale, - pool_output_zp, - q1_output); - builder.AddDequantizeLinearNode(q1_output, - q_scale, - q_zp, - dq1_output); - - // add input b initializer (NNAPI only supports case of MatMul A*B - B is an initializer) - auto* dq_2_output = builder.MakeIntermediate(); - auto* input_b = builder.MakeInitializer(input2_shape, InputLimits::min(), InputLimits::max()); - builder.AddDequantizeLinearNode(input_b, - q_scale, - q_zp, - dq_2_output); - - // add MatMul operator - auto* matmul_op_output = builder.MakeIntermediate(); - builder.AddNode("MatMul", {dq1_output, dq_2_output}, {matmul_op_output}); - - // add QDQ output - auto* q3_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(matmul_op_output, - pool_output_scale, - pool_output_zp, - q3_output); - builder.AddDequantizeLinearNode(q3_output, - q_scale, - q_zp, - output_arg); +static GetTestQDQModelFn BuildMatMulOpQDQTestCase(const TestInputDef& input1_def, + const TestInputDef& input2_def) { + return [input1_def, input2_def](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input1 -> Q -> DQ -> + NodeArg* input1 = MakeTestInput(builder, input1_def); + QuantParams input1_qparams = GetTestInputQuantParams(input1_def); + auto* input1_qdq = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); + + // input2 -> Q -> DQ -> + NodeArg* input2 = MakeTestInput(builder, input2_def); + QuantParams input2_qparams = GetTestInputQuantParams(input2_def); + auto* input2_qdq = AddQDQNodePair(builder, input2, input2_qparams.scale, input2_qparams.zero_point); + + // MatMul + auto* op_output = builder.MakeIntermediate(); + builder.AddNode("MatMul", {input1_qdq, input2_qdq}, {op_output}); + + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, + output_qparams[0].zero_point); }; } -// Runs an AveragePool model on the QNN CPU backend. Checks the graph node assignment, and that inference +// Runs an MatMul model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunMatMulOpOpTest(const std::vector& input1_shape, - const std::vector& input2_shape, +static void RunMatMulOpOpTest(const TestInputDef& input1_def, + const TestInputDef& input2_def, ExpectedEPNodeAssignment expected_ep_assignment, int opset = 13) { ProviderOptions provider_options; @@ -94,19 +65,20 @@ static void RunMatMulOpOpTest(const std::vector& input1_shape, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildMatMulOpTestCase(input1_shape, input2_shape), + RunQnnModelTest(BuildMatMulOpTestCase(input1_def, input2_def), provider_options, opset, - expected_ep_assignment); + expected_ep_assignment, + 2e-4f); } -// Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that inference -// outputs for QNN and CPU match. +// Runs a QDQ MatMul model on the QNN HTP backend. Checks the graph node assignment, and that the +// QDQ model is accurate on QNN EP (compared to CPU EP). template -static void RunQDQMatMulOpOpTest(const std::vector& input1_shape, - const std::vector& input2_shape, +static void RunQDQMatMulOpOpTest(const TestInputDef& input1_def, + const TestInputDef& input2_def, ExpectedEPNodeAssignment expected_ep_assignment, - int opset = 18, float fp32_abs_err = 1e-5f) { + int opset = 18) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -114,27 +86,28 @@ static void RunQDQMatMulOpOpTest(const std::vector& input1_shape, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildMatMulOpQDQTestCase(input1_shape, input2_shape), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildMatMulOpTestCase(input1_def, input2_def), + BuildMatMulOpQDQTestCase(input1_def, input2_def), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // // CPU tests: // -TEST_F(QnnCPUBackendTests, TestMatMulOp) { - RunMatMulOpOpTest({2, 2} /* input_shape1 */, - {2, 2} /* input_shape2 */, +TEST_F(QnnCPUBackendTests, MatMulOp) { + RunMatMulOpOpTest(TestInputDef({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}), + TestInputDef({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}), ExpectedEPNodeAssignment::All, 18); } -// QNN broadcast issue -TEST_F(QnnCPUBackendTests, DISABLED_TestMatMulOp2) { - RunMatMulOpOpTest({28, 1, 64} /* input_shape1 */, - {64, 32} /* input_shape2 */, +// Test MatMul broadcasting +TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) { + RunMatMulOpOpTest(TestInputDef({28, 1, 64}, false, -10.0f, 10.0f), + TestInputDef({64, 32}, false, -10.0f, 10.0f), ExpectedEPNodeAssignment::All, 18); } @@ -143,27 +116,17 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestMatMulOp2) { // HTP tests: // -TEST_F(QnnHTPBackendTests, TestMatMulOp_HTP_u8) { - RunQDQMatMulOpOpTest({2, 2} /* input_shape1 */, - {2, 2} /* input_shape2 */, - ExpectedEPNodeAssignment::All, - 18, 0.00381f); -} - -// QNN broadcast issue -TEST_F(QnnHTPBackendTests, DISABLED_TestMatMulOp2_HTP_u8) { - RunQDQMatMulOpOpTest({28, 1, 64} /* input_shape1 */, - {64, 32} /* input_shape2 */, - ExpectedEPNodeAssignment::All, - 18, 0.00381f); +TEST_F(QnnHTPBackendTests, MatMulOp_HTP_u8) { + RunQDQMatMulOpOpTest(TestInputDef({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}), + TestInputDef({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}), + ExpectedEPNodeAssignment::All, 18); } -// QNN broadcast issue -TEST_F(QnnHTPBackendTests, DISABLED_TestMatMulOp3_HTP_u8) { - RunQDQMatMulOpOpTest({28, 1, 32} /* input_shape1 */, - {32, 2} /* input_shape2 */, - ExpectedEPNodeAssignment::All, - 18, 0.00381f); +// Test MatMul broadcasting +TEST_F(QnnHTPBackendTests, MatMulOp_Broadcast) { + RunQDQMatMulOpOpTest(TestInputDef({28, 1, 64}, false, -10.0f, 10.0f), + TestInputDef({64, 32}, false, -10.0f, 10.0f), + ExpectedEPNodeAssignment::All, 18); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/max_pool_test.cpp b/onnxruntime/test/providers/qnn/max_pool_test.cpp index 1beac1d326ccd..f574948f02c17 100644 --- a/onnxruntime/test/providers/qnn/max_pool_test.cpp +++ b/onnxruntime/test/providers/qnn/max_pool_test.cpp @@ -6,6 +6,7 @@ #include #include +#include "core/graph/node_attr_utils.h" #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" @@ -17,122 +18,50 @@ namespace onnxruntime { namespace test { // Returns a function that creates a graph with a single MaxPool operator. -static GetTestModelFn BuildMaxPoolTestCase(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, dilations, - ceil_mode, storage_order, auto_pad](ModelTestBuilder& builder) { - // Random input data - auto input = builder.MakeInput(shape, 0.0f, 10.0f); - - auto* output = builder.MakeOutput(); +static GetTestModelFn BuildMaxPoolTestCase(const TestInputDef& input_def, + const std::vector& attrs) { + return [input_def, attrs](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* output = builder.MakeOutput(); Node& pool_node = builder.AddNode("MaxPool", {input}, {output}); - pool_node.AddAttribute("kernel_shape", kernel_shape); - - if (!strides.empty()) { - pool_node.AddAttribute("strides", strides); - } - - if (!dilations.empty()) { - pool_node.AddAttribute("dilations", dilations); - } - - pool_node.AddAttribute("auto_pad", auto_pad); - - if (!pads.empty() && auto_pad == "NOTSET") { - pool_node.AddAttribute("pads", pads); - } - - if (ceil_mode > 0) { - pool_node.AddAttribute("ceil_mode", ceil_mode); - } - - if (storage_order > 0) { - pool_node.AddAttribute("storage_order", storage_order); + for (const auto& attr : attrs) { + pool_node.AddAttributeProto(attr); } }; } // Returns a function that creates a graph with a QDQ MaxPool operator. template -GetQDQTestCaseFn BuildMaxPoolQDQTestCase(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, dilations, - ceil_mode, storage_order, auto_pad](ModelTestBuilder& builder) { - float dq_scale = 0.0038f; - float pool_output_scale = 0.0038f; - float q_scale = 0.0038f; - QuantType dq_zp = std::numeric_limits::max() / 2; - QuantType pool_output_zp = std::numeric_limits::max() / 2; - QuantType q_zp = std::numeric_limits::max() / 2; - - auto* input_arg = builder.MakeInput(shape, -1.0f, 1.0f); - auto* output_arg = builder.MakeOutput(); - - // add QDQ + MaxPool - auto* dq_output = AddQDQNodePair(builder, input_arg, dq_scale, dq_zp); - auto* MaxPool_output = builder.MakeIntermediate(); - Node& pool_node = builder.AddNode("MaxPool", {dq_output}, {MaxPool_output}); - - pool_node.AddAttribute("kernel_shape", kernel_shape); - - if (!strides.empty()) { - pool_node.AddAttribute("strides", strides); - } - - if (!dilations.empty()) { - pool_node.AddAttribute("dilations", dilations); +GetTestQDQModelFn BuildMaxPoolQDQTestCase(const TestInputDef& input_def, + const std::vector& attrs) { + return [input_def, attrs](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // MaxPool + NodeArg* pool_output = builder.MakeIntermediate(); + Node& pool_node = builder.AddNode("MaxPool", {input_qdq}, {pool_output}); + + for (const auto& attr : attrs) { + pool_node.AddAttributeProto(attr); } - pool_node.AddAttribute("auto_pad", auto_pad); - - if (!pads.empty() && auto_pad == "NOTSET") { - pool_node.AddAttribute("pads", pads); - } - - if (ceil_mode > 0) { - pool_node.AddAttribute("ceil_mode", ceil_mode); - } - - if (storage_order > 0) { - pool_node.AddAttribute("storage_order", storage_order); - } - - // add QDQ output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(MaxPool_output, - pool_output_scale, - pool_output_zp, - q_output); - builder.AddDequantizeLinearNode(q_output, - q_scale, - q_zp, - output_arg); + // op_output -> Q -> DQ -> output + // NOTE: Input and output quantization parameters must be equal for MaxPool. + output_qparams[0] = input_qparams; // Overwrite! + AddQDQNodePairWithOutputAsGraphOutput(builder, pool_output, input_qparams.scale, + input_qparams.zero_point); }; } // Runs an MaxPool model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunMaxPoolOpTest(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad, +static void RunMaxPoolOpTest(const TestInputDef& input_def, + const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment, int opset = 18) { ProviderOptions provider_options; @@ -142,7 +71,7 @@ static void RunMaxPoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildMaxPoolTestCase(shape, kernel_shape, strides, pads, dilations, ceil_mode, storage_order, auto_pad), + RunQnnModelTest(BuildMaxPoolTestCase(input_def, attrs), provider_options, opset, expected_ep_assignment); @@ -151,16 +80,10 @@ static void RunMaxPoolOpTest(const std::vector& shape, // Runs a QDQ MaxPool model on the QNN HTP backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. template -static void RunQDQMaxPoolOpTest(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad, +static void RunQDQMaxPoolOpTest(const TestInputDef& input_def, + const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment, - int opset = 18, float fp32_abs_err = 1e-5f) { + int opset = 18) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -168,11 +91,12 @@ static void RunQDQMaxPoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildMaxPoolQDQTestCase(shape, kernel_shape, strides, pads, dilations, ceil_mode, storage_order, auto_pad), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildMaxPoolTestCase(input_def, attrs), + BuildMaxPoolQDQTestCase(input_def, attrs), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // @@ -180,65 +104,53 @@ static void RunQDQMaxPoolOpTest(const std::vector& shape, // // MaxPool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnCPUBackendTests, TestMaxPool_Global) { - RunMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad - ExpectedEPNodeAssignment::All); -} - -TEST_F(QnnCPUBackendTests, TestMaxPool_Large_Input) { - RunMaxPoolOpTest({1, 125, 8, 56}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, MaxPool_Global) { + RunMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestMaxPool_Large_Input2) { - RunMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, MaxPool_Large_Input) { + RunMaxPoolOpTest(TestInputDef({1, 125, 8, 56}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnCPUBackendTests, DISABLED_TestMaxPool_Ceil) { - RunMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Ceil) { + RunMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnCPUBackendTests, DISABLED_TestMaxPool_Large_Input2_Ceil) { - RunMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Large_Input2_Ceil) { + RunMaxPoolOpTest(TestInputDef({1, 128, 16, 113}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } @@ -247,79 +159,66 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestMaxPool_Large_Input2_Ceil) { // HTP tests: // // QDQ MaxPool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnHTPBackendTests, TestMaxPool_Global_HTP_u8) { - RunQDQMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad - ExpectedEPNodeAssignment::All); -} - -// TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_Large_Input_HTP_u8) { - RunQDQMaxPoolOpTest({1, 125, 8, 56}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, MaxPool_Global_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_Large_Input2_HTP_u8) { - RunQDQMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, DISABLED_MaxPool_Large_Input_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 125, 8, 56}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestMaxPool_Ceil_HTP_u8) { - RunQDQMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, MaxPool_Ceil_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_Large_Input2_Ceil_HTP_u8) { - RunQDQMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, DISABLED_MaxPool_Large_Input2_Ceil_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 128, 16, 113}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_LargeInput_1Pads) { - RunQDQMaxPoolOpTest({1, 64, 384, 576}, // shape - {3, 3}, // kernel_shape - {2, 2}, // strides - {1, 1, 1, 1}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, DISABLED_MaxPool_LargeInput_1Pads) { + RunQDQMaxPoolOpTest(TestInputDef({1, 64, 384, 576}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{1, 1, 1, 1}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc index 14f2a351d414c..6a6dc6d84af2f 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc @@ -43,6 +43,84 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, const ProviderOption helper.feeds_, verification_params); } +void InferenceModel(const std::string& model_data, const char* log_id, + std::unique_ptr execution_provider, + ExpectedEPNodeAssignment expected_ep_assignment, const NameMLValMap& feeds, + std::vector& output_names, std::vector& output_vals) { + SessionOptions so; + so.session_logid = log_id; + RunOptions run_options; + run_options.run_tag = so.session_logid; + + InferenceSessionWrapper session_object{so, GetEnvironment()}; + + std::string provider_type = kCpuExecutionProvider; + if (execution_provider) { + provider_type = execution_provider->Type(); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(execution_provider))); + } + ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast(model_data.size()))); + ASSERT_STATUS_OK(session_object.Initialize()); + + const auto& graph = session_object.GetGraph(); + + auto ep_nodes = CountAssignedNodes(graph, provider_type); + if (expected_ep_assignment == ExpectedEPNodeAssignment::All) { + // Verify the entire graph is assigned to the EP + ASSERT_EQ(ep_nodes, graph.NumberOfNodes()) << "Not all nodes were assigned to " << provider_type; + } else if (expected_ep_assignment == ExpectedEPNodeAssignment::None) { + ASSERT_EQ(ep_nodes, 0) << "No nodes are supposed to be assigned to " << provider_type; + } else { + ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type; + } + + const auto& outputs = graph.GetOutputs(); + + // fetch all outputs if necessary. + if (output_names.empty()) { + output_names.reserve(outputs.size()); + for (const auto* node_arg : outputs) { + if (node_arg->Exists()) { + output_names.push_back(node_arg->Name()); + } + } + } + + ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &output_vals)); +} + +NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale) { + NodeArg* bias_int32 = nullptr; + + // Bias must be int32 to be detected as a QDQ node unit. + // We must quantize the data. + if (bias_def.IsRandomData()) { + // Create random initializer def that is quantized to int32 + const auto& rand_info = bias_def.GetRandomDataInfo(); + TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), static_cast(rand_info.min / bias_scale), + static_cast(rand_info.max / bias_scale)); + bias_int32 = MakeTestInput(builder, bias_int32_def); + } else { + assert(bias_def.IsRawData()); + // Create raw data initializer def that is quantized to int32 + const auto& bias_f32_raw = bias_def.GetRawData(); + const size_t num_elems = bias_f32_raw.size(); + + std::vector bias_int32_raw(num_elems); + for (size_t i = 0; i < num_elems; i++) { + bias_int32_raw[i] = static_cast(bias_f32_raw[i] / bias_scale); + } + + TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), bias_int32_raw); + bias_int32 = MakeTestInput(builder, bias_int32_def); + } + + auto* bias = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(bias_int32, bias_scale, 0, bias); + + return bias; +} + // Mock IKernelLookup class passed to QNN EP's GetCapability() function in order to // determine if the HTP backend is supported on specific platforms (e.g., Windows ARM64). // TODO: Remove once HTP can be emulated on Windows ARM64. diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index 21d34136c7c85..b091177b24ee2 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -5,19 +5,76 @@ #if !defined(ORT_MINIMAL_BUILD) #include +#include #include #include "core/framework/provider_options.h" #include "test/optimizer/qdq_test_utils.h" #include "test/util/include/test_utils.h" +#include "test/util/include/test/test_environment.h" +#include "test/util/include/default_providers.h" #include "gtest/gtest.h" namespace onnxruntime { namespace test { +// Signature for function that builds a float32 model. using GetTestModelFn = std::function; +// Class that stores quantization params (scale, zero point). +// Has a static function that computes quantization parameters from a floating-point range. +template +struct QuantParams { + float scale; + QType zero_point; + + static QuantParams Compute(float rmin, float rmax) { + if (rmin == 0.0f && rmax == 0.0f) { // Quantizing a single zero. + return QuantParams{1.0f, 0}; + } + + if (rmin == rmax) { // One data-point (x) to quantize. + if (rmin < 0) { // new range is [-x , 0.0f] + rmax = 0.0f; + } else { // new range is [0.0f, x] + rmin = 0.0f; + } + } + + constexpr float qmin = static_cast(std::numeric_limits::min()); + constexpr float qmax = static_cast(std::numeric_limits::max()); + + const float scale = (rmax - rmin) / (qmax - qmin); + const QType zero_point = static_cast(std::roundf((qmin - rmin) / scale)); + + return QuantParams{scale, zero_point}; + } +}; + +// Signature for function that builds a QDQ model. +// The parameter `output_qparams` contains quantization parameters that *can* be used for the QDQ model output. +// These output quantization parameters are computed by first running the float32 model and determining the +// range of output values. Note that the function is able to overwrite the output_qparams parameter if necessary +// (Example: MaxPool must have identical input and output quantization params). +template +using GetTestQDQModelFn = std::function>& output_qparams)>; + +// Computes quantization parameters for an array of floating-point values. +template +inline QuantParams GetDataQuantParams(gsl::span data) { + // Get min/max of raw data. + float min_val = std::numeric_limits::max(); + float max_val = std::numeric_limits::min(); + + for (auto val : data) { + min_val = std::min(min_val, val); + max_val = std::max(max_val, val); + } + + return QuantParams::Compute(min_val, max_val); +} + // Class that defines an input that can be created with ModelTestBuilder. // Defines whether the input is an initializer and if the data should be randomized or if // set to an explicit value. @@ -39,14 +96,18 @@ struct TestInputDef { TestInputDef(std::vector shape, bool is_initializer, T rand_min, T rand_max) : shape_(std::move(shape)), data_info_(RandomData{rand_min, rand_max}), - is_initializer_(is_initializer) {} + is_initializer_(is_initializer), + has_range_override_(false), + range_override_() {} // Create an input definition with explicit data. Specify its shape, whether it's an initializer, // and the raw data. TestInputDef(std::vector shape, bool is_initializer, std::vector data) : shape_(std::move(shape)), data_info_(RawData{std::move(data)}), - is_initializer_(is_initializer) {} + is_initializer_(is_initializer), + has_range_override_(false), + range_override_() {} TestInputDef(TestInputDef&& other) = default; TestInputDef(const TestInputDef& other) = default; @@ -54,6 +115,18 @@ struct TestInputDef { TestInputDef& operator=(const TestInputDef& other) = default; TestInputDef& operator=(TestInputDef&& other) = default; + // Overrides the range of input values reported by TestInputDef::GetRange(). + // This is useful when you want to quantize over a range that is larger or smaller + // than the actual range of the data. + // + // Returns a reference to this object to allow chaining. + TestInputDef& OverrideValueRange(T range_min, T range_max) { + range_override_.first = range_min; + range_override_.second = range_max; + has_range_override_ = true; + return *this; + } + const std::vector& GetShape() const { return shape_; } @@ -78,7 +151,15 @@ struct TestInputDef { return std::get(data_info_).data; } + // Get the range of values represented by this input, which is necessary for computing quantization parameters. + // For raw data, we return [min, max] of the elements. + // For random data, we return [rand_min, rand_max]. + // Optionally, the user can override this range by using OverrideValueRange(). std::pair GetRange() const { + if (has_range_override_) { + return range_override_; + } + auto which_type = data_info_.index(); std::pair range; @@ -105,28 +186,169 @@ struct TestInputDef { std::vector shape_; std::variant data_info_; bool is_initializer_; + bool has_range_override_; + std::pair range_override_; }; template -struct QuantParams { - float scale; - QType zero_point; +inline QuantParams GetTestInputQuantParams(const TestInputDef& input_def) { + const std::pair frange = input_def.GetRange(); + return QuantParams::Compute(frange.first, frange.second); +} - static QuantParams Compute(float rmin, float rmax) { - constexpr float qmin = static_cast(std::numeric_limits::min()); - constexpr float qmax = static_cast(std::numeric_limits::max()); +/** + * Inferences a given serialized model. Returns output values via an out-param. + * + * \param model_data The serialized ONNX model to inference. + * \param log_id The logger ID. + * \param execution_provider The EP on which to run the model. Set to nullptr for CPU EP. + * \param expected_ep_assignment Describes "which nodes" should be assigned to the EP. + * \param feeds The input feeds. + * \param output_names If empty, the function will write the output names. + * \param output_vals Initialized to the inference results. + */ +void InferenceModel(const std::string& model_data, const char* log_id, + std::unique_ptr execution_provider, + ExpectedEPNodeAssignment expected_ep_assignment, const NameMLValMap& feeds, + std::vector& output_names, std::vector& output_vals); - const float scale = (rmax - rmin) / (qmax - qmin); - const QType zero_point = static_cast((qmin - rmin) / scale); +/** + * Tests the accuracy of a QDQ model on QNN EP by runnning 3 inferences: + * + * 1. float model on CPU EP (baseline) + * 2. QDQ model on CPU EP + * 3. QDQ model on QNN EP + * + * This function checks that running the QDQ model on QNN EP (#3) is at least as accurate (+- small tolerance) + * as running the QDQ model on CPU EP (#2). We primarily measure accuracy by comparing to the baseline (#1). + * + * \param f32_model_fn Function that builds the float model (baseline for comparison). + * \param qdq_model_fn Function that builds the QDQ model (run by CPU EP and QNN EP). + * \param qnn_options QNN EP provider options. + * \param opset_version The opset version. + * \param expected_ep_assignment Describes "which nodes" should be assigned to the EP. + * \param fp32_abs_err Small tolerance used for floating-point comparisons. + * \param log_severity The logger's severity setting. + */ +template +inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTestQDQModelFn& qdq_model_fn, + const ProviderOptions& qnn_options, int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err, + logging::Severity log_severity = logging::Severity::kERROR) { + // Add kMSDomain to cover contrib op like Gelu + const std::unordered_map domain_to_version = {{"", opset_version}, {kMSDomain, 1}}; + + auto& logging_manager = DefaultLoggingManager(); + logging_manager.SetDefaultLoggerSeverity(log_severity); + + // Create float model and serialize it to a string. + onnxruntime::Model f32_model("f32_model", false, ModelMetaData(), PathString(), + IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {}, + logging_manager.DefaultLogger()); + ModelTestBuilder f32_helper(f32_model.MainGraph()); + std::string f32_model_data; + f32_model_fn(f32_helper); + f32_helper.SetGraphOutputs(); + ASSERT_STATUS_OK(f32_model.MainGraph().Resolve()); + f32_model.ToProto().SerializeToString(&f32_model_data); + + // Run f32 model on CPU EP and collect outputs. + std::vector cpu_f32_outputs; + std::vector output_names; + InferenceModel(f32_model_data, "f32_model_logger", nullptr, ExpectedEPNodeAssignment::All, + f32_helper.feeds_, output_names, cpu_f32_outputs); + const size_t num_outputs = cpu_f32_outputs.size(); + + // Compute output range(s) and quantization params. + std::vector> output_qparams; + std::vector> output_vals; + std::vector output_types; + output_qparams.resize(num_outputs); + output_vals.resize(num_outputs); + output_types.resize(num_outputs); + + for (size_t i = 0; i < num_outputs; i++) { + auto& tensor = cpu_f32_outputs[i].Get(); + int32_t elem_type = tensor.GetElementType(); + + if (elem_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + output_vals[i] = tensor.DataAsSpan(); + output_qparams[i] = GetDataQuantParams(output_vals[i]); + } - return QuantParams{scale, zero_point}; + output_types[i] = elem_type; } -}; -template -inline QuantParams GetTestInputQuantParams(const TestInputDef& input_def) { - const std::pair frange = input_def.GetRange(); - return QuantParams::Compute(frange.first, frange.second); + // Create QDQ model and serialize it to a string. + onnxruntime::Model qdq_model("qdq_model", false, ModelMetaData(), PathString(), + IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {}, + logging_manager.DefaultLogger()); + ModelTestBuilder qdq_helper(qdq_model.MainGraph()); + std::string qdq_model_data; + qdq_model_fn(qdq_helper, output_qparams); + qdq_helper.SetGraphOutputs(); + ASSERT_STATUS_OK(qdq_model.MainGraph().Resolve()); + qdq_model.ToProto().SerializeToString(&qdq_model_data); + + // Run QDQ model on QNN EP and collect outputs. + std::vector qnn_qdq_outputs; + InferenceModel(qdq_model_data, "qdq_model_logger", QnnExecutionProviderWithOptions(qnn_options), + expected_ep_assignment, qdq_helper.feeds_, output_names, qnn_qdq_outputs); + + if (expected_ep_assignment != ExpectedEPNodeAssignment::None) { + // Run QDQ model on CPU EP and collect outputs. + std::vector cpu_qdq_outputs; + InferenceModel(qdq_model_data, "qdq_model_logger", nullptr, ExpectedEPNodeAssignment::All, + qdq_helper.feeds_, output_names, cpu_qdq_outputs); + ASSERT_EQ(cpu_qdq_outputs.size(), num_outputs); + ASSERT_EQ(qnn_qdq_outputs.size(), num_outputs); + + // Compare accuracy of QDQ results with float model. + // QNN EP must be at least as accurate as CPU EP when running the QDQ model. + for (size_t i = 0; i < num_outputs; i++) { + auto& cpu_qdq_tensor = cpu_qdq_outputs[i].Get(); + auto& qnn_qdq_tensor = qnn_qdq_outputs[i].Get(); + + ASSERT_EQ(cpu_qdq_tensor.GetElementType(), output_types[i]); + ASSERT_EQ(qnn_qdq_tensor.GetElementType(), output_types[i]); + + if (output_types[i] == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + const size_t num_vals = output_vals[i].size(); + gsl::span cpu_f32_vals = output_vals[i]; + gsl::span cpu_qdq_vals = cpu_qdq_tensor.DataAsSpan(); + gsl::span qnn_qdq_vals = qnn_qdq_tensor.DataAsSpan(); + + ASSERT_EQ(num_vals, cpu_qdq_vals.size()); + ASSERT_EQ(num_vals, qnn_qdq_vals.size()); + + for (size_t j = 0; j < num_vals; j++) { + const float expected_val = cpu_f32_vals[j]; // "ground-truth" + const float qnn_qdq_val = qnn_qdq_vals[j]; + const float cpu_qdq_val = cpu_qdq_vals[j]; + const float cpu_err = std::fabs(expected_val - cpu_qdq_val); + const float qnn_err = std::fabs(expected_val - qnn_qdq_val); + + // Case 1 (qnn_err <= cpu_err): QNN EP is *more* accurate, which makes (qnn_err - cpu_err) zero or + // a negative value. + // Case 2 (qnn_err > cpu_err): QNN EP is less accurate, but the error difference is within 1 + // quantization unit (i.e., scale). This can occur due to rounding differences. + const bool is_as_accurate_as_cpu_qdq = (qnn_err - cpu_err) <= (output_qparams[i].scale + fp32_abs_err); + + EXPECT_TRUE(is_as_accurate_as_cpu_qdq) + << "Inaccuracy detected for output '" + << output_names[i] + << "', element " << j + << ".\nOutput quant params: scale=" << output_qparams[i].scale + << ", zero_point=" << static_cast(output_qparams[i].zero_point) + << ".\nExpected val: " << expected_val << "\n" + << "QNN QDQ val: " << qnn_qdq_val << " (err " << qnn_err << ")\n" + << "CPU QDQ val: " << cpu_qdq_val << " (err " << cpu_err << ")"; + } + } else { + VerifyOutput(output_names[i], cpu_f32_outputs[i].Get(), qnn_qdq_tensor, fp32_abs_err); + } + } + } } /** @@ -164,6 +386,38 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& return input; } +template <> +inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& input_def) { + NodeArg* input = nullptr; + const auto& shape = input_def.GetShape(); + const bool is_initializer = input_def.IsInitializer(); + + if (input_def.IsRawData()) { // Raw data. + const std::vector& raw_data = input_def.GetRawData(); + + if (is_initializer) { + input = builder.MakeInitializerBool(shape, raw_data); + } else { + input = builder.MakeInput(shape, raw_data); + } + } else { // Random data + if (is_initializer) { + input = builder.MakeRandInitializerBool(shape); + } else { + input = builder.MakeInputBool(shape); + } + } + + return input; +} + +// ONNX spec does not allow quantizing float to int32. However, this function will create an int32 input (divide by scale) +// and then return the output of DequantizeLinear. Note that bias_scale should be generally be equal +// to input_scale * weights_scale. See quantization tool: onnx_quantizer.py::quantize_bias_static() +// +// i.e., initial bias => manual quantization (int32) => DQ => final float bias +NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale); + /** * Runs a test model on the QNN EP. Checks the graph node assignment, and that inference * outputs for QNN and CPU match. diff --git a/onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc b/onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc deleted file mode 100644 index c854d2e5dc5e7..0000000000000 --- a/onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#if !defined(ORT_MINIMAL_BUILD) - -#include -#include - -#include "test/optimizer/qdq_test_utils.h" -#include "test/providers/qnn/qnn_test_utils.h" - -#include "gtest/gtest.h" - -namespace onnxruntime { -namespace test { - -/** - * Creates a graph with a single reduce operator (e.g., ReduceSum, ReduceMin, etc.). Reduce operators take the - * axes of reduction as either a node attribute or an optional input (depending on opset). - * - * \param reduce_op_type The string denoting the reduce operator's type (e.g., "ReduceSum"). - * \param input_shape The shape of the input. Input data is randomly generated with this shape. - * \param axes_as_input True if the "axes" are specified as a node input. - * \param axes The axes of reduction. - * \param keepdims True if the output's rank should match the input. This is a node attribute that defaults to true. - * \param noop_with_empty_axes True if empty axes should force the node to act as a NoOp (no operation). - * This is a node attribute that defaults to false. - * \param domain The domain to assign to the graph node. - * - * \return A function that builds the graph with the provided builder. - */ -template -static GetTestModelFn BuildReduceOpTestCase(const std::string& reduce_op_type, - const std::vector& input_shape, - bool axes_as_input, std::vector axes, bool keepdims, - bool noop_with_empty_axes) { - return [reduce_op_type, input_shape, axes_as_input, axes, keepdims, - noop_with_empty_axes](ModelTestBuilder& builder) { - std::vector input_args; - - // Input data arg - input_args.push_back(builder.MakeInput(input_shape, static_cast(0), - static_cast(20))); - - // Axes input (initializer) for newer opsets. - if (axes_as_input) { - input_args.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); - } - - auto* reduce_sum_output = builder.MakeOutput(); - Node& reduce_sum_node = builder.AddNode(reduce_op_type, input_args, {reduce_sum_output}); - reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); - - // Older opsets have "axes" as a node attribute. - if (!axes_as_input) { - reduce_sum_node.AddAttribute("axes", axes); - } else { - reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); - } - }; -} - -/** - * Runs a ReduceOp model on the QNN CPU backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param op_type The ReduceOp type (e.g., ReduceSum). - * \param opset The opset version. Some opset versions have "axes" as an attribute or input. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - * \param keepdims Common attribute for all reduce operations. - */ -template -static void RunReduceOpCpuTest(const std::string& op_type, int opset, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All, - bool keepdims = true) { - ProviderOptions provider_options; -#if defined(_WIN32) - provider_options["backend_path"] = "QnnCpu.dll"; -#else - provider_options["backend_path"] = "libQnnCpu.so"; -#endif - - RunQnnModelTest(BuildReduceOpTestCase(op_type, - {2, 2}, // input shape - ReduceOpHasAxesInput(op_type, opset), - {0, 1}, // axes - keepdims, - false), // noop_with_empty_axes - provider_options, - opset, - expected_ep_assignment); -} - -// -// ReduceSum -// - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is int32. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestInt32ReduceSumOpset13) { - RunReduceOpCpuTest("ReduceSum", 13); -} - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is int32. -// - Uses opset 11, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestInt32ReduceSumOpset11) { - RunReduceOpCpuTest("ReduceSum", 11); -} - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestFloatReduceSumOpset13) { - RunReduceOpCpuTest("ReduceSum", 13); -} - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 11, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestFloatReduceSumOpset11) { - RunReduceOpCpuTest("ReduceSum", 11); -} - -// -// ReduceProd -// - -// Test creates a graph with a ReduceProd node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceProdOpset18) { - RunReduceOpCpuTest("ReduceProd", 18); -} - -// Test creates a graph with a ReduceProd node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceProdOpset13) { - RunReduceOpCpuTest("ReduceProd", 13); -} - -// -// ReduceMax -// - -// Test creates a graph with a ReduceMax node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceMaxOpset18) { - RunReduceOpCpuTest("ReduceMax", 18); -} - -// Test creates a graph with a ReduceMax node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceMaxOpset13) { - RunReduceOpCpuTest("ReduceMax", 13); -} - -// -// ReduceMin -// - -// Test creates a graph with a ReduceMin node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceMinOpset18) { - RunReduceOpCpuTest("ReduceMin", 18); -} - -// Test creates a graph with a ReduceMin node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceMinOpset13) { - RunReduceOpCpuTest("ReduceMin", 13); -} - -// -// ReduceMean -// - -// Test creates a graph with a ReduceMean node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceMeanOpset18) { - RunReduceOpCpuTest("ReduceMean", 18); -} - -// Test creates a graph with a ReduceMean node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceMeanOpset13) { - RunReduceOpCpuTest("ReduceMean", 13); -} - -} // namespace test -} // namespace onnxruntime - -#endif // !defined(ORT_MINIMAL_BUILD) \ No newline at end of file diff --git a/onnxruntime/test/providers/qnn/reduce_op_htp_test.cc b/onnxruntime/test/providers/qnn/reduce_op_htp_test.cc deleted file mode 100644 index 86b319eea0b14..0000000000000 --- a/onnxruntime/test/providers/qnn/reduce_op_htp_test.cc +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#if !defined(ORT_MINIMAL_BUILD) - -#include -#include "core/graph/graph.h" - -#include "test/optimizer/qdq_test_utils.h" -#include "test/providers/qnn/qnn_test_utils.h" - -#include "gtest/gtest.h" - -namespace onnxruntime { -namespace test { -#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) - -// Creates the following graph if axes is an input (newer opsets): -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// axes (int32, initializer) -> | Reduce___ | -// |_______________________| -// -// Creates the following graph if axes is an attribute (older opsets): -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// | Reduce___ | -// |_______________________| -// -template -GetTestModelFn BuildQDQReduceOpTestCase(const std::string& reduce_op_type, const std::vector& input_shape, - bool axes_as_input, const std::vector& axes, bool keepdims, - bool noop_with_empty_axes) { - return [reduce_op_type, input_shape, axes_as_input, axes, keepdims, - noop_with_empty_axes](ModelTestBuilder& builder) { - using QuantTypeLimits = std::numeric_limits; - QuantType input_quant_min_value = QuantTypeLimits::min(); - QuantType input_quant_max_value = QuantTypeLimits::max(); - - auto* input_data = builder.MakeInput(input_shape, -100.0f, 100.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, .04f, - (input_quant_min_value + input_quant_max_value) / 2 + 1); - - // -> ReduceOp (e.g., ReduceSum) -> - std::vector reduce_op_inputs; - reduce_op_inputs.push_back(input_qdq_output); - - if (axes_as_input) { - reduce_op_inputs.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); - } - - auto* reduce_sum_output = builder.MakeIntermediate(); - Node& reduce_sum_node = builder.AddNode(reduce_op_type, reduce_op_inputs, {reduce_sum_output}); - reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); - - if (axes_as_input) { - reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); - } else { - reduce_sum_node.AddAttribute("axes", axes); - } - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(reduce_sum_output, .039f, - (QuantTypeLimits::min() + QuantTypeLimits::max()) / 2 + 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .039f, - (QuantTypeLimits::min() + QuantTypeLimits::max()) / 2 + 1, - final_output); - }; -} - -/** - * Runs a ReduceOp model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param op_type The ReduceOp type (e.g., ReduceSum). - * \param opset The opset version. Some opset versions have "axes" as an attribute or input. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - * \param keepdims Common attribute for all reduce operations. - */ -template -static void RunReduceOpQDQTest(const std::string& op_type, int opset, const std::vector& input_shape, - const std::vector& axes, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All, - bool keepdims = true) { - ProviderOptions provider_options; -#if defined(_WIN32) - provider_options["backend_path"] = "QnnHtp.dll"; -#else - provider_options["backend_path"] = "libQnnHtp.so"; -#endif - - // If QNN EP can support all ops, then we expect a single fused node in the graph. - // Otherwise, we'll get a graph with 5 individual nodes handled by CPU EP. - constexpr bool noop_with_empty_axes = false; - RunQnnModelTest(BuildQDQReduceOpTestCase(op_type, - input_shape, - ReduceOpHasAxesInput(op_type, opset), // New opset changed axes to input. - axes, - keepdims, - noop_with_empty_axes), - provider_options, - opset, - expected_ep_assignment); -} - -// -// ReduceSum -// - -// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumU8Opset13) { - RunReduceOpQDQTest("ReduceSum", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 11, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumU8Opset11) { - RunReduceOpQDQTest("ReduceSum", 11, {1, 3, 4, 4}, {0, 1, 2, 3}); -} - -// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses int8 as the quantization type. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumS8Opset13) { - RunReduceOpQDQTest("ReduceSum", 13, {2, 2}, {0, 1}); -} - -// Tests that keepdims = false generates expected results. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumS8Opset13_NoKeepDims) { - RunReduceOpQDQTest("ReduceSum", 13, {2, 2}, {1}, ExpectedEPNodeAssignment::All, false); -} - -// Test that we don't support rank 5 Reduce ops. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumS8Opset13_Rank5Unsupported) { - RunReduceOpQDQTest("ReduceSum", 13, {1, 3, 4, 4, 2}, {0, 1, 2, 3, 4}, ExpectedEPNodeAssignment::None); -} - -// -// ReduceMax -// - -// ReduceMax on Linux's HTP emulator is always off by an amount equal to the final DQ.scale -// Works fine on windows arm64. -#if !defined(__linux__) -// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMaxU8Opset18) { - RunReduceOpQDQTest("ReduceMax", 18, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceMaxU8Opset13) { - RunReduceOpQDQTest("ReduceMax", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses int8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMaxS8Opset18) { - RunReduceOpQDQTest("ReduceMax", 18, {2, 2}, {0, 1}); -} -#endif // !defined(__linux__) - -// -// ReduceMin -// -// ReduceMin on Linux's HTP emulator is always off by an amount equal to the final DQ.scale -// Works fine on windows arm64. -#if !defined(__linux__) -// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMinU8Opset18) { - RunReduceOpQDQTest("ReduceMin", 18, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceMinU8Opset13) { - RunReduceOpQDQTest("ReduceMin", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// Uses int8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQReduceMinS8Opset18) { - RunReduceOpQDQTest("ReduceMin", 18, {2, 2}, {0, 1}); -} -#endif // !defined(__linux__) - -// -// ReduceMean -// - -// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMeanU8Opset18) { - RunReduceOpQDQTest("ReduceMean", 18, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceMeanU8Opset13) { - RunReduceOpQDQTest("ReduceMean", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses int8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMeanS8Opset18) { - RunReduceOpQDQTest("ReduceMean", 18, {2, 2}, {0, 1}); -} - -#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -} // namespace test -} // namespace onnxruntime - -#endif \ No newline at end of file diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc new file mode 100644 index 0000000000000..e0357de3e52f1 --- /dev/null +++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc @@ -0,0 +1,618 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include +#include "core/graph/graph.h" + +#include "test/optimizer/qdq_test_utils.h" +#include "test/providers/qnn/qnn_test_utils.h" + +#include "gtest/gtest.h" + +namespace onnxruntime { +namespace test { + +/** + * Creates a graph with a single reduce operator (e.g., ReduceSum, ReduceMin, etc.). Reduce operators take the + * axes of reduction as either a node attribute or an optional input (depending on opset). + * + * \param reduce_op_type The string denoting the reduce operator's type (e.g., "ReduceSum"). + * \param input_def The input definition (shape, data, etc.) + * \param axes_as_input True if the "axes" are specified as a node input. + * \param axes The axes of reduction. + * \param keepdims True if the output's rank should match the input. This is a node attribute that defaults to true. + * \param noop_with_empty_axes True if empty axes should force the node to act as a NoOp (no operation). + * This is a node attribute that defaults to false. + * \param domain The domain to assign to the graph node. + * + * \return A function that builds the graph with the provided builder. + */ +template +static GetTestModelFn BuildReduceOpTestCase(const std::string& reduce_op_type, + const TestInputDef& input_def, + bool axes_as_input, std::vector axes, bool keepdims, + bool noop_with_empty_axes) { + return [reduce_op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes](ModelTestBuilder& builder) { + std::vector input_args; + + // Input data arg + input_args.push_back(MakeTestInput(builder, input_def)); + + // Axes input (initializer) for newer opsets. + if (axes_as_input) { + input_args.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); + } + + auto* reduce_sum_output = builder.MakeOutput(); + Node& reduce_sum_node = builder.AddNode(reduce_op_type, input_args, {reduce_sum_output}); + reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); + + // Older opsets have "axes" as a node attribute. + if (!axes_as_input) { + reduce_sum_node.AddAttribute("axes", axes); + } else { + reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); + } + }; +} + +/** + * Runs a ReduceOp model on the QNN CPU backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param op_type The ReduceOp type (e.g., ReduceSum). + * \param input_def The input definition (shape, data, etc.) + * \param axes The axes of reduction. + * \param opset The opset version. Some opset versions have "axes" as an attribute or input. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) + * \param keepdims Common attribute for all reduce operations. + */ +template +static void RunReduceOpCpuTest(const std::string& op_type, + const TestInputDef& input_def, + const std::vector& axes, + bool keepdims, + int opset, + ExpectedEPNodeAssignment expected_ep_assignment) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnCpu.dll"; +#else + provider_options["backend_path"] = "libQnnCpu.so"; +#endif + + RunQnnModelTest(BuildReduceOpTestCase(op_type, + input_def, //{2, 2}, // input shape + ReduceOpHasAxesInput(op_type, opset), + axes, //{0, 1}, // axes + keepdims, + false), // noop_with_empty_axes + provider_options, + opset, + expected_ep_assignment); +} + +// +// ReduceSum +// + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is int32. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceSumOpset13_Int32) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is int32. +// - Uses opset 11, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceSumOpset11_Int32) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 11, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceSumOpset13_Float) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 11, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceSumOpset11_Float) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 11, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceProd +// + +// Test creates a graph with a ReduceProd node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceProdOpset18) { + RunReduceOpCpuTest("ReduceProd", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceProd node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceProdOpset13) { + RunReduceOpCpuTest("ReduceProd", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMax +// + +// Test creates a graph with a ReduceMax node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceMaxOpset18) { + RunReduceOpCpuTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceMax node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceMaxOpset13) { + RunReduceOpCpuTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMin +// + +// Test creates a graph with a ReduceMin node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceMinOpset18) { + RunReduceOpCpuTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceMin node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceMinOpset13) { + RunReduceOpCpuTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMean +// + +// Test creates a graph with a ReduceMean node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceMeanOpset18) { + RunReduceOpCpuTest("ReduceMean", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceMean node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceMeanOpset13) { + RunReduceOpCpuTest("ReduceMean", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) + +// Creates the following graph if axes is an input (newer opsets): +// _______________________ +// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) +// axes (int32, initializer) -> | Reduce___ | +// |_______________________| +// +// Creates the following graph if axes is an attribute (older opsets): +// _______________________ +// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) +// | Reduce___ | +// |_______________________| +// +template +GetTestQDQModelFn BuildQDQReduceOpTestCase(const std::string& reduce_op_type, + const TestInputDef& input_def, + bool axes_as_input, const std::vector& axes, bool keepdims, + bool noop_with_empty_axes) { + return [reduce_op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // -> ReduceOp (e.g., ReduceSum) -> + std::vector reduce_op_inputs; + reduce_op_inputs.push_back(input_qdq); + + if (axes_as_input) { + reduce_op_inputs.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); + } + + auto* op_output = builder.MakeIntermediate(); + Node& reduce_sum_node = builder.AddNode(reduce_op_type, reduce_op_inputs, {op_output}); + reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); + + if (axes_as_input) { + reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); + } else { + reduce_sum_node.AddAttribute("axes", axes); + } + + // -> Q -> DQ -> final output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, output_qparams[0].zero_point); + }; +} + +/** + * Runs a ReduceOp model on the QNN HTP backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param op_type The ReduceOp type (e.g., ReduceSum). + * \param input_def The input definition (shape, data, etc.). + * \param axes The axes input (or attribute). + * \param keepdims Common attribute for all reduce operations. + * \param opset The opset version. Some opset versions have "axes" as an attribute or input. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) + */ +template +static void RunReduceOpQDQTest(const std::string& op_type, + const TestInputDef& input_def, + const std::vector& axes, + bool keepdims, + int opset, + ExpectedEPNodeAssignment expected_ep_assignment) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + constexpr bool noop_with_empty_axes = false; + const bool axes_as_input = ReduceOpHasAxesInput(op_type, opset); // Later opsets have "axes" as an input. + + TestQDQModelAccuracy(BuildReduceOpTestCase(op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes), + BuildQDQReduceOpTestCase(op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); +} + +// +// ReduceSum +// + +// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceSumU8Opset13) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// TODO: Investigate inaccuracy +// Input values: 3.21289 -5.9981 -1.72799 6.27263 +// Input quantization params [-10, 10]: scale=0.0784313753, zero_point=127 +// +// Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0068997270427644253, zero_point=0. +// Expected val: 1.7594304084777832 +// QNN QDQ val: 1.731831431388855 (err 0.027598977088928223) +// CPU QDQ val: 1.7594304084777832 (err 0) +TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumU8Opset13_Inaccurate) { + const std::vector input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f}; + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, input_data).OverrideValueRange(-10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} +// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 11, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceSumU8Opset11) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 11, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses int8 as the quantization type. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Tests that keepdims = false generates expected results. +TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_NoKeepDims) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {1}, // axes + false, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test that we don't support rank 5 Reduce ops. +TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_Rank5Unsupported) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({1, 3, 4, 4, 2}, false, -10.0f, 10.0f), + {0, 1, 2, 3, 4}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::None); +} + +// +// ReduceMax +// + +// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMaxU8Opset18) { + RunReduceOpQDQTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceMaxU8Opset13) { + RunReduceOpQDQTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses int8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMaxS8Opset18) { + RunReduceOpQDQTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMin +// + +// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMinU8Opset18) { + RunReduceOpQDQTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceMinU8Opset13) { + RunReduceOpQDQTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// Uses int8 as the quantization type. +TEST_F(QnnHTPBackendTests, ReduceMinS8Opset18) { + RunReduceOpQDQTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMean +// + +// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset18) { + RunReduceOpQDQTest("ReduceMean", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// TODO: Investigate inaccuracy +// Input values: 3.21289 -5.9981 -1.72799 6.27263 +// Input quantization params [-10, 10]: scale=0.0784313753, zero_point=127 +// +// Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0017249317606911063, zero_point=0. +// Expected val: 0.4398576021194458 +// QNN QDQ val: 0.43295785784721375 (err 0.0068997442722320557) +// CPU QDQ val: 0.4398576021194458 (err 0) +TEST_F(QnnHTPBackendTests, DISABLED_ReduceMeanU8Opset18_Inaccurate) { + const std::vector input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f}; + RunReduceOpQDQTest("ReduceMean", + TestInputDef({2, 2}, false, input_data).OverrideValueRange(-10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset13) { + RunReduceOpQDQTest("ReduceMean", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses int8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMeanS8Opset18) { + RunReduceOpQDQTest("ReduceMean", + TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {0, 1, 2, 3}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +} // namespace test +} // namespace onnxruntime + +#endif \ No newline at end of file diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc index c5913ad3db5b8..1d900a41b1331 100644 --- a/onnxruntime/test/providers/qnn/resize_test.cc +++ b/onnxruntime/test/providers/qnn/resize_test.cc @@ -27,18 +27,18 @@ namespace test { * * \return A function that builds the graph with the provided builder. */ -static GetTestModelFn BuildResizeTestCase(const std::vector& shape, - const std::vector& sizes_data, - const std::string& mode = "nearest", - const std::string& coordinate_transformation_mode = "half_pixel", - const std::string& nearest_mode = "round_prefer_floor") { - return [shape, sizes_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { - auto* input = builder.MakeInput(shape, 0.0f, 20.0f); - auto* roi = builder.MakeInitializer({0}, {}); - auto* scales = builder.MakeInitializer({0}, {}); - auto* sizes = builder.Make1DInitializer(sizes_data); - - auto* output = builder.MakeOutput(); +static GetTestModelFn GetResizeModelBuilder(const TestInputDef& input_def, + const std::vector& sizes_data, + const std::string& mode = "nearest", + const std::string& coordinate_transformation_mode = "half_pixel", + const std::string& nearest_mode = "round_prefer_floor") { + return [input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* roi = builder.MakeInitializer({0}, {}); + NodeArg* scales = builder.MakeInitializer({0}, {}); + NodeArg* sizes = builder.Make1DInitializer(sizes_data); + + NodeArg* output = builder.MakeOutput(); Node& resize_node = builder.AddNode("Resize", {input, roi, scales, sizes}, {output}); resize_node.AddAttribute("mode", mode); resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode); @@ -49,17 +49,17 @@ static GetTestModelFn BuildResizeTestCase(const std::vector& shape, }; } -static GetTestModelFn BuildResizeTestCaseWithScales(const std::vector& shape, - const std::vector& scales_data, - const std::string& mode = "nearest", - const std::string& coordinate_transformation_mode = "half_pixel", - const std::string& nearest_mode = "round_prefer_floor") { - return [shape, scales_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { - auto* input = builder.MakeInput(shape, 0.0f, 20.0f); - auto* roi = builder.MakeInitializer({0}, {}); - auto* scales = builder.Make1DInitializer(scales_data); - - auto* output = builder.MakeOutput(); +static GetTestModelFn GetResizeModelBuilderWithScales(const TestInputDef& input_def, + const std::vector& scales_data, + const std::string& mode = "nearest", + const std::string& coordinate_transformation_mode = "half_pixel", + const std::string& nearest_mode = "round_prefer_floor") { + return [input_def, scales_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* roi = builder.MakeInitializer({0}, {}); + NodeArg* scales = builder.Make1DInitializer(scales_data); + + NodeArg* output = builder.MakeOutput(); Node& resize_node = builder.AddNode("Resize", {input, roi, scales}, {output}); resize_node.AddAttribute("mode", mode); resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode); @@ -70,11 +70,45 @@ static GetTestModelFn BuildResizeTestCaseWithScales(const std::vector& }; } +template +static GetTestQDQModelFn GetQDQResizeModelBuilder(const TestInputDef& input_def, + const std::vector& sizes_data, + const std::string& mode = "nearest", + const std::string& coordinate_transformation_mode = "half_pixel", + const std::string& nearest_mode = "round_prefer_floor") { + return [input_def, sizes_data, mode, + coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + NodeArg* roi = builder.MakeInitializer({0}, {}); + NodeArg* scales = builder.MakeInitializer({0}, {}); + NodeArg* sizes = builder.Make1DInitializer(sizes_data); + + NodeArg* resize_output = builder.MakeIntermediate(); + Node& resize_node = builder.AddNode("Resize", {input_qdq, roi, scales, sizes}, {resize_output}); + resize_node.AddAttribute("mode", mode); + resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode); + + if (mode == "nearest") { + resize_node.AddAttribute("nearest_mode", nearest_mode); + } + + // Resize requires the output quantization parameters to match the input. + output_qparams[0] = input_qparams; + AddQDQNodePairWithOutputAsGraphOutput(builder, resize_output, output_qparams[0].scale, + output_qparams[0].zero_point); + }; +} + /** * Runs a Resize model on the QNN CPU backend. Checks the graph node assignment, and that inference * outputs for QNN and CPU match. * - * \param shape The shape of the input and output. Input data is randomly generated with this shape. + * \param input_def The input definition (shape, data, etc). * \param sizes_data The sizes input which determines the output shape. * \param mode The resize mode (e.g., nearest, linear). * \param coordinate_transformation_mode The coordinate transformation mode (e.g., half_pixel, pytorch_half_pixel). @@ -82,7 +116,7 @@ static GetTestModelFn BuildResizeTestCaseWithScales(const std::vector& * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). * \param opset The opset version to use. */ -static void RunCPUResizeOpTest(const std::vector& shape, const std::vector& sizes_data, +static void RunCPUResizeOpTest(const TestInputDef& input_def, const std::vector& sizes_data, const std::string& mode, const std::string& coordinate_transformation_mode, const std::string& nearest_mode, ExpectedEPNodeAssignment expected_ep_assignment, @@ -94,13 +128,13 @@ static void RunCPUResizeOpTest(const std::vector& shape, const std::vec provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildResizeTestCase(shape, sizes_data, mode, coordinate_transformation_mode, nearest_mode), + RunQnnModelTest(GetResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode), provider_options, opset, expected_ep_assignment); } -static void RunCPUResizeOpTestWithScales(const std::vector& shape, const std::vector& scales_data, +static void RunCPUResizeOpTestWithScales(const TestInputDef& input_def, const std::vector& scales_data, const std::string& mode, const std::string& coordinate_transformation_mode, const std::string& nearest_mode, ExpectedEPNodeAssignment expected_ep_assignment, @@ -112,17 +146,18 @@ static void RunCPUResizeOpTestWithScales(const std::vector& shape, cons provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildResizeTestCaseWithScales(shape, scales_data, mode, coordinate_transformation_mode, nearest_mode), + RunQnnModelTest(GetResizeModelBuilderWithScales(input_def, scales_data, mode, coordinate_transformation_mode, nearest_mode), provider_options, opset, expected_ep_assignment); } template -static void RunQDQResizeOpTest(const std::vector& shape, const std::vector& sizes_data, +static void RunQDQResizeOpTest(const TestInputDef& input_def, + const std::vector& sizes_data, const std::string& mode, const std::string& coordinate_transformation_mode, const std::string& nearest_mode, - ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err) { + ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -130,12 +165,13 @@ static void RunQDQResizeOpTest(const std::vector& shape, const std::vec provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQResizeTestCase(shape, sizes_data, mode, coordinate_transformation_mode, - nearest_mode, true), - provider_options, - 18, // opset - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(GetResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode), + GetQDQResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, + nearest_mode), + provider_options, + 18, // opset + expected_ep_assignment, + 1e-5f); } // @@ -152,57 +188,68 @@ static void RunQDQResizeOpTest(const std::vector& shape, const std::vec // Upsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestHalfPixel_rpf) { - RunCPUResizeOpTest({1, 2, 7, 5}, {1, 2, 21, 10}, "nearest", "half_pixel", "round_prefer_floor", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 2, 7, 5}, false, -10.0f, 10.0f), // Random input w/ range [-10, 10] + {1, 2, 21, 10}, // Sizes + "nearest", + "half_pixel", + "round_prefer_floor", ExpectedEPNodeAssignment::All); } // Upsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestHalfPixel_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 7, 5}, "nearest", "half_pixel", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 7, 5}, "nearest", "half_pixel", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestHalfPixel_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 3}, "nearest", "half_pixel", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestHalfPixel_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 3}, "nearest", "half_pixel", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestHalfPixel_rpf) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 2}, "nearest", "half_pixel", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestHalfPixel_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 2}, "nearest", "half_pixel", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Upsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestAlignCorners_rpf) { - RunCPUResizeOpTest({1, 2, 7, 5}, {1, 2, 21, 10}, "nearest", "align_corners", "round_prefer_floor", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestAlignCorners_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 2, 7, 5}, false, -10.0f, 10.0f), + {1, 2, 21, 10}, "nearest", "align_corners", "round_prefer_floor", ExpectedEPNodeAssignment::All); } // Upsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestAlignCorners_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 7, 5}, "nearest", "align_corners", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestAlignCorners_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 7, 5}, "nearest", "align_corners", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestAlignCorners_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 3}, "nearest", "align_corners", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 3}, "nearest", "align_corners", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestAlignCorners_rpf) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 2}, "nearest", "align_corners", "round_prefer_floor", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 2}, "nearest", "align_corners", "round_prefer_floor", ExpectedEPNodeAssignment::All); } @@ -210,23 +257,27 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestAlignCorners_rpf) // Cpu tests that use the "linear" mode. // -TEST_F(QnnCPUBackendTests, TestResize2xLinearHalfPixel) { - RunCPUResizeOpTest({1, 3, 4, 5}, {1, 3, 8, 10}, "linear", "half_pixel", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearHalfPixel) { + RunCPUResizeOpTest(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1, 3, 8, 10}, "linear", "half_pixel", "", ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestResize2xLinearHalfPixel_scales) { - RunCPUResizeOpTestWithScales({1, 3, 4, 5}, {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "half_pixel", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearHalfPixel_scales) { + RunCPUResizeOpTestWithScales(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "half_pixel", "", ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestResize2xLinearAlignCorners) { - RunCPUResizeOpTest({1, 3, 4, 5}, {1, 3, 8, 10}, "linear", "align_corners", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearAlignCorners) { + RunCPUResizeOpTest(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1, 3, 8, 10}, "linear", "align_corners", "", ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestResize2xLinearAlignCorners_scales) { - RunCPUResizeOpTestWithScales({1, 3, 4, 5}, {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "align_corners", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearAlignCorners_scales) { + RunCPUResizeOpTestWithScales(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "align_corners", "", ExpectedEPNodeAssignment::All); } @@ -235,19 +286,22 @@ TEST_F(QnnCPUBackendTests, TestResize2xLinearAlignCorners_scales) { // HTP tests: // -TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xLinearPytorchHalfPixel) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "linear", "pytorch_half_pixel", "", - ExpectedEPNodeAssignment::All, 0.0031f); +TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearPytorchHalfPixel) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "linear", "pytorch_half_pixel", "", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestHalfPixelRoundPreferFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "half_pixel", "round_prefer_floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestHalfPixelRoundPreferFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "nearest", "half_pixel", "round_prefer_floor", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestAsymmetricFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "asymmetric", "floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestAsymmetricFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "nearest", "asymmetric", "floor", + ExpectedEPNodeAssignment::All); } // TODO: Investigate with Qualcomm. The qnn-onnx-converter tool translates ONNX Resize [nearest, asymmetric, ceil] to @@ -259,19 +313,22 @@ TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestAsymmetricFloor) { // are an almost-equal pair // Actual : 16 - byte object, // where the value pair(0.15, 0.501) at index #1 don't match, which is 0.351 from 0.15 -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQU8Resize2xNearestAsymmetricCeil) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "asymmetric", "ceil", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, DISABLED_ResizeU8_2xNearestAsymmetricCeil) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "nearest", "asymmetric", "ceil", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8Resize3xNearestAsymmetricFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 12, 12}, "nearest", "asymmetric", "floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_3xNearestAsymmetricFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 12, 12}, "nearest", "asymmetric", "floor", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8ResizeHalfNearestAsymmetricFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 2, 2}, "nearest", "asymmetric", "floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_HalfNearestAsymmetricFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 2, 2}, "nearest", "asymmetric", "floor", + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index 93bd96e9549e8..5b4049d52c16f 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -20,6 +20,21 @@ namespace test { using UInt8Limits = std::numeric_limits; +template +static GetTestModelFn BuildUnaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, + const std::vector& attrs, + const std::string& domain = kOnnxDomain) { + return [op_type, input0_def, attrs, domain](ModelTestBuilder& builder) { + NodeArg* input0 = MakeTestInput(builder, input0_def); + + auto* output = builder.MakeOutput(); + auto& op_node = builder.AddNode(op_type, {input0}, {output}, domain); + for (const auto& attr : attrs) { + op_node.AddAttributeProto(attr); + } + }; +} + // Creates the graph: // _______________________ // | | @@ -28,60 +43,100 @@ using UInt8Limits = std::numeric_limits; // // Currently used to test QNN EP. template -GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const TestInputDef& input_def, - const std::string& op_type, - const std::vector& attrs = {}, - const std::string& domain = kOnnxDomain) { - return [input_def, op_type, attrs, domain](ModelTestBuilder& builder) { - const InputQType quant_zero_point = 0; - const float quant_scale = 1.0f; - - auto* input = MakeTestInput(builder, input_def); - auto* dq_input = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input, quant_scale, quant_zero_point, dq_input); +GetTestQDQModelFn BuildQDQUnaryOpTestCase(const TestInputDef& input_def, + const std::string& op_type, + const std::vector& attrs, + const std::string& domain = kOnnxDomain) { + return [input_def, op_type, attrs, domain](ModelTestBuilder& builder, + std::vector>& output_qparams) { + auto* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); auto* op_output = builder.MakeIntermediate(); - auto& op_node = builder.AddNode(op_type, {dq_input}, {op_output}, domain); + auto& op_node = builder.AddNode(op_type, {input_qdq}, {op_output}, domain); for (const auto& attr : attrs) { op_node.AddAttributeProto(attr); } - auto* q_output = builder.MakeOutput(); - builder.AddQuantizeLinearNode(op_output, quant_scale, quant_zero_point, q_output); + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } -template -static GetTestModelFn BuildQDQBinaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, - const TestInputDef& input1_def) { +/** + * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param input_shape The input's shape. + * \param test_description Description of the test for error reporting. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). + * \param num_modes_in_graph The number of expected nodes in the graph. + */ +template +static void RunQDQUnaryOpTest(const TestInputDef& input_def, const std::string& op_type, + const std::vector& attrs, + int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment, + const std::string& domain = kOnnxDomain) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Runs model with DQ-> Op -> Q and compares the outputs of the CPU and QNN EPs. + TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, attrs, domain), + BuildQDQUnaryOpTestCase(input_def, op_type, attrs, domain), + provider_options, + opset_version, + expected_ep_assignment, + 1e-5f); +} + +template +static GetTestModelFn BuildBinaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, + const TestInputDef& input1_def) { return [op_type, input0_def, input1_def](ModelTestBuilder& builder) { - const InputQType zero_point = std::numeric_limits::max() / 2; - constexpr float qdq_scale = 0.0004f; + NodeArg* input0 = MakeTestInput(builder, input0_def); + NodeArg* input1 = MakeTestInput(builder, input1_def); + auto* output = builder.MakeOutput(); + builder.AddNode(op_type, {input0, input1}, {output}); + }; +} + +template +static GetTestQDQModelFn BuildQDQBinaryOpTestCase(const std::string& op_type, + const TestInputDef& input0_def, + const TestInputDef& input1_def) { + return [op_type, input0_def, input1_def](ModelTestBuilder& builder, + std::vector>& output_qparams) { NodeArg* input0 = MakeTestInput(builder, input0_def); NodeArg* input1 = MakeTestInput(builder, input1_def); - NodeArg* output = builder.MakeOutput(); // input -> Q -> DQ -> Op - auto* qdq0_output = AddQDQNodePair(builder, input0, qdq_scale, zero_point); - auto* qdq1_output = AddQDQNodePair(builder, input1, qdq_scale, zero_point); + QuantParams input0_qparams = GetTestInputQuantParams(input0_def); + auto* qdq0_output = AddQDQNodePair(builder, input0, input0_qparams.scale, input0_qparams.zero_point); + + QuantParams input1_qparams = GetTestInputQuantParams(input1_def); + auto* qdq1_output = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); // Op -> op_output auto* op_output = builder.MakeIntermediate(); builder.AddNode(op_type, {qdq0_output, qdq1_output}, {op_output}); // op_output -> Q -> DQ -> output - auto* op_q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(op_output, qdq_scale, zero_point, op_q_output); - builder.AddDequantizeLinearNode(op_q_output, qdq_scale, zero_point, output); + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, + output_qparams[0].zero_point); }; } -template -static void RunQDQBinaryOpTest(const std::string& op_type, const TestInputDef& input0_def, - const TestInputDef& input1_def, - int opset_version, +template +static void RunQDQBinaryOpTest(const std::string& op_type, const TestInputDef& input0_def, + const TestInputDef& input1_def, int opset_version, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) @@ -90,28 +145,18 @@ static void RunQDQBinaryOpTest(const std::string& op_type, const TestInputDef(op_type, input0_def, input1_def), - provider_options, - opset_version, - expected_ep_assignment); + TestQDQModelAccuracy(BuildBinaryOpTestCase(op_type, input0_def, input1_def), + BuildQDQBinaryOpTestCase(op_type, input0_def, input1_def), + provider_options, + opset_version, + expected_ep_assignment, + 1e-5f); } -/** - * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param input_shape The input's shape. - * \param test_description Description of the test for error reporting. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). - * \param num_modes_in_graph The number of expected nodes in the graph. - */ -template -static void RunQDQSingleInputOpTest(const TestInputDef& input_def, const std::string& op_type, - const std::vector& attrs, - int opset_version, - ExpectedEPNodeAssignment expected_ep_assignment, - const std::string& domain = kOnnxDomain) { +template +static void RunBinaryOpTest(const std::string& op_type, const TestInputDef& input0_def, + const TestInputDef& input1_def, int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -119,8 +164,8 @@ static void RunQDQSingleInputOpTest(const TestInputDef& input_def, c provider_options["backend_path"] = "libQnnHtp.so"; #endif - // Runs model with DQ-> Op -> Q and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildQDQSingleInputOpTestCase(input_def, op_type, attrs, domain), + // Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs. + RunQnnModelTest(BuildBinaryOpTestCase(op_type, input0_def, input1_def), provider_options, opset_version, expected_ep_assignment); @@ -128,87 +173,143 @@ static void RunQDQSingleInputOpTest(const TestInputDef& input_def, c // Check that QNN compiles DQ -> Gelu -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQGeluTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Gelu", {}, 11, ExpectedEPNodeAssignment::All, kMSDomain); +TEST_F(QnnHTPBackendTests, UnaryOp_Gelu) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "Gelu", + {}, + 11, + ExpectedEPNodeAssignment::All, + kMSDomain); // GeLu is a contrib op. } // Check that QNN compiles DQ -> Elu -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQEluTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Elu", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Elu) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "Elu", + {}, + 11, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> HardSwish -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQHardSwishTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "HardSwish", {}, 14, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_HardSwish) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "HardSwish", + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Atan -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQAtanTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Atan", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Atan) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "Atan", + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Asin -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQAsinTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, 0, 1), // input range 0 ~ 1 - "Asin", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Asin) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -0.5f, 0.5f), // input range -0.5 to 0.5 + "Asin", {}, + 13, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Sign -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQSignTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Sign", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Sign) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), + "Sign", {}, + 13, ExpectedEPNodeAssignment::All); } -// Check that QNN compiles DQ -> Sign -> Q as a single unit. +// Check that QNN compiles DQ -> Sin -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQSinTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Sin", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Sin) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -3.14159f, 3.14159f), + "Sin", {}, + 11, ExpectedEPNodeAssignment::All); +} + +// Check that QNN compiles DQ -> Cos -> Q as a single unit. +// Use an input of rank 3. +TEST_F(QnnHTPBackendTests, UnaryOp_Cos) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {-3.14159f, -1.5f, -0.5f, 0.0f, 1.5, 3.14159f}), + "Cos", {}, + 11, ExpectedEPNodeAssignment::All); +} + +// TODO: Inaccuracy when computing cos(-1.88436) +// +// cos(-1.88436f) fp32 cpu ep = -0.308450460 +// cos(-1.88436f) qdq cpu ep = -0.298039228 +// cos(-1.88436f) qdq QNN ep = -0.321568638 +// +// QNN error: 0.013118177652359009, CPU error: 0.010411232709884644 +// +// input quant params: scale=0.0246399231, zero_point=127 +// output quant params: scale=0.00784313772, zero_point=127 +TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Cos_Inaccurate) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f}), + "Cos", {}, + 11, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (-1) for SoftMax opset 13 works. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax13_DefaultAxis) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {}, // Uses default axis of -1 for opset 13 - 13, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {}, // Uses default axis of -1 for opset 13 + 13, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that an axis != -1 is not supported. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax13_UnsupportedAxis) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {utils::MakeAttribute("axis", static_cast(1))}, - 13, ExpectedEPNodeAssignment::None); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {utils::MakeAttribute("axis", static_cast(1))}, + 13, ExpectedEPNodeAssignment::None); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (1) for SoftMax opset < 13 does not work. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax11_DefaultAxisFails) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {}, // Uses default axis of 1 for opset < 13. - 11, ExpectedEPNodeAssignment::None); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_DefaultAxisFails) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {}, // Uses default axis of 1 for opset < 13. + 11, ExpectedEPNodeAssignment::None); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that setting an axis value of -1 works for Softmax opset < 13. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax11_SetValidAxis) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {utils::MakeAttribute("axis", static_cast(-1))}, - 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_SetValidAxis) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {utils::MakeAttribute("axis", static_cast(-1))}, + 11, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Abs op. +TEST_F(QnnHTPBackendTests, UnaryOp_Abs) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), + "Abs", + {}, + 13, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Ceil op. +TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -100.0f, 100.0f), + "Ceil", + {}, + 13, ExpectedEPNodeAssignment::All); } // Run QDQ model on HTP twice @@ -225,68 +326,138 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) { const std::string context_binary_file = "./qnn_context_binary_test.bin"; provider_options["qnn_context_cache_path"] = context_binary_file; - const TestInputDef input_def({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()); + const TestInputDef input_def({1, 2, 3}, false, -10.0f, 10.0f); + const std::string op_type = "Atan"; // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs. // 1st run will generate the Qnn context cache binary file - RunQnnModelTest(BuildQDQSingleInputOpTestCase(input_def, "Atan"), - provider_options, - 11, - ExpectedEPNodeAssignment::All); + TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, {}), + BuildQDQUnaryOpTestCase(input_def, op_type, {}), + provider_options, + 14, + ExpectedEPNodeAssignment::All, + 1e-5f); // Make sure the Qnn context cache binary file is generated EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str())); // 2nd run will load and run from Qnn context cache binary file - RunQnnModelTest(BuildQDQSingleInputOpTestCase(input_def, "Atan"), + TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, {}), + BuildQDQUnaryOpTestCase(input_def, op_type, {}), + provider_options, + 14, + ExpectedEPNodeAssignment::All, + 1e-5f); +} + +TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { + ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Note: a graph input -> Q -> DQ -> is optimized by Qnn to have a perfectly accurate output. + // ORT's CPU EP, on the otherhand, actually quantizes and dequantizes the input, which leads to different outputs. + auto builder_func = [](ModelTestBuilder& builder) { + const TestInputDef input0_def({1, 2, 3}, false, {1.0f, 2.0f, 10.0f, 20.0f, 100.0f, 200.0f}); + + // input -> Q -> Transpose -> DQ -> output + NodeArg* input0 = MakeTestInput(builder, input0_def); + QuantParams qparams = GetTestInputQuantParams(input0_def); + + auto* quant_input = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(input0, qparams.scale, qparams.zero_point, quant_input); + + auto* op_output = builder.MakeIntermediate(); + builder.AddNode("Transpose", {quant_input}, {op_output}); + + NodeArg* output = builder.MakeOutput(); + builder.AddDequantizeLinearNode(op_output, qparams.scale, qparams.zero_point, output); + }; + + // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs. + // 1st run will generate the Qnn context cache binary file + RunQnnModelTest(builder_func, provider_options, - 11, + 13, ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestSub4D_SmallInputs) { - RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +// Test QDQ Add +TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) { + RunQDQBinaryOpTest("Add", TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + 17, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Sub +TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) { + RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), + TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. -TEST_F(QnnHTPBackendTests, DISABLED_TestSub4D_LargeInputs) { - RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Sub4D_LargeInputs) { + RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. -TEST_F(QnnHTPBackendTests, DISABLED_TestSub4D_Broadcast) { - RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Sub4D_Broadcast) { + RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), + 17, ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestDiv4D_SmallInputs) { - RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) { + RunQDQBinaryOpTest("Div", + TestInputDef({1, 2, 2, 2}, false, {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f}), + TestInputDef({1, 2, 2, 2}, false, {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f}), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. -TEST_F(QnnHTPBackendTests, DISABLED_TestDiv4D_LargeInputs) { - RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_LargeInputs) { + RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. // Fails accuracy when input0 has dims [1,3,768,768] -TEST_F(QnnHTPBackendTests, DISABLED_TestDiv4D_Broadcast) { - RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_Broadcast) { + RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), + 17, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Mul +TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D) { + RunQDQBinaryOpTest("Mul", TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + 17, ExpectedEPNodeAssignment::All); +} +// Test QDQ And +TEST_F(QnnHTPBackendTests, BinaryOp_And4D) { + RunBinaryOpTest("And", TestInputDef({1, 4}, false, {false, false, true, true}), + TestInputDef({1, 4}, false, {false, true, false, true}), + 17, ExpectedEPNodeAssignment::All); +} + +// Test that Or is not yet supported on HTP backend. +TEST_F(QnnHTPBackendTests, BinaryOp_HTP_Or_Unsupported) { + RunBinaryOpTest("Or", TestInputDef({1, 4}, false, {false, false, true, true}), + TestInputDef({1, 4}, false, {false, true, false, true}), + 17, ExpectedEPNodeAssignment::None); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h index 83eb4f59bd42c..19e244afdded3 100644 --- a/onnxruntime/test/util/include/test_utils.h +++ b/onnxruntime/test/util/include/test_utils.h @@ -40,6 +40,12 @@ struct EPVerificationParams { const std::function* graph_verifier{nullptr}; }; +// Verify equality of two output tensors. +void VerifyOutput(const std::string& output_name, + const Tensor& expected_tensor, + const Tensor& tensor, + float fp32_abs_err); + // Return number of nodes in the Graph and any subgraphs that are assigned to the specified execution provider int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type); diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 1d38aea91066d..bc2d9aec8599b 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -18,6 +18,48 @@ namespace onnxruntime { namespace test { +void VerifyOutput(const std::string& output_name, + const Tensor& expected_tensor, + const Tensor& tensor, + float fp32_abs_err) { + ASSERT_TRUE(SpanEq(expected_tensor.Shape().GetDims(), tensor.Shape().GetDims())); + ASSERT_EQ(expected_tensor.GetElementType(), tensor.GetElementType()); + auto element_type = expected_tensor.GetElementType(); + switch (element_type) { + case ONNX_NAMESPACE::TensorProto_DataType_UINT32: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT32: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_UINT8: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT8: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { + EXPECT_THAT(expected_tensor.DataAsSpan(), + ::testing::Pointwise(::testing::FloatNear(fp32_abs_err), tensor.DataAsSpan())); + break; + } + default: + ORT_THROW("Unhandled data type. Please add 'case' statement for ", element_type); + } +} + static void VerifyOutputs(const std::vector& output_names, const std::vector& expected_fetches, const std::vector& fetches, @@ -27,41 +69,7 @@ static void VerifyOutputs(const std::vector& output_names, for (size_t i = 0, end = expected_fetches.size(); i < end; ++i) { auto& ltensor = expected_fetches[i].Get(); auto& rtensor = fetches[i].Get(); - ASSERT_TRUE(SpanEq(ltensor.Shape().GetDims(), rtensor.Shape().GetDims())); - auto element_type = ltensor.GetElementType(); - switch (element_type) { - case ONNX_NAMESPACE::TensorProto_DataType_UINT32: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT32: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT64: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_UINT8: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT8: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_BOOL: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - EXPECT_THAT(ltensor.DataAsSpan(), - ::testing::Pointwise(::testing::FloatNear(params.fp32_abs_err), rtensor.DataAsSpan())); - break; - } - default: - ORT_THROW("Unhandled data type. Please add 'case' statement for ", element_type); - } + VerifyOutput(output_names[i], ltensor, rtensor, params.fp32_abs_err); } }