diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc index 4f24fa26d8896..5dcf27c9b5d2e 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc @@ -64,10 +64,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() { {"Atan", {}}, {"Asin", {}}, {"Sin", {}}, + {"Cos", {}}, {"Sign", {}}, {"Tanh", {}}, {"Exp", {}}, - {"LRN", {}}}; + {"LRN", {}}, + {"Ceil", {}}, + {"Abs", {}}}; } static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() { return {{"Add", {}}, diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 43998084618c0..d80594d8f72c7 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -145,34 +145,28 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp if (it != node_unit_supported_result.cend()) { return it->second; } else { - // quantized required, filter out the non-quantized nodes, filter in the QDQ nodes - auto IsQdqNode = [](const NodeUnit& node_unit) { - if ("QuantizeLinear" == node_unit.OpType() || "DequantizeLinear" == node_unit.OpType()) { - return true; - } else { - return false; - } - }; + const std::string& op_type = node_unit.OpType(); + const bool is_qdq_node = op_type == "QuantizeLinear" || op_type == "DequantizeLinear"; // Is NPU backend, is single node, case by case // Q/DQ nodes -- supported // Transpose nodes -- supported // Cast nodes -- need to call CastOpBuilder::IsOpSupported if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) { - if (IsQdqNode(node_unit)) { // Qnn has Quantize & Dequantize Op + if (is_qdq_node) { // Qnn has Quantize & Dequantize Op LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name(); return true; } // Tranpose only changes the data layout. NPU still supports it. - if ("Transpose" == node_unit.OpType()) { + if ("Transpose" == op_type) { LOGS(logger, VERBOSE) << "Single Transpose node is supported for NPU backend. Node name: " << node_unit.Name(); return true; } - // For Cast, need to call IsOpSupported (below) to validate input and output types. + // For Cast, And, and Or, we need to call IsOpSupported (below) to validate input and output types. // For other single non-qdq nodes, immediately return not supported. - if (node_unit.OpType() != "Cast") { + if (op_type != "Cast" && op_type != "And" && op_type != "Or") { LOGS(logger, WARNING) << "Non-QDQ " << node_unit.OpType() << " operators are not supported on HTP or DSP backends. " << node_unit.OpType() << " node `" << node_unit.Name() << " will not be assigned to QNN EP."; @@ -181,14 +175,14 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp } // Non-NPU backend, quantized model not supported, but a QDQ node encountered - if (!is_npu_backend && IsQdqNode(node_unit)) { + if (!is_npu_backend && is_qdq_node) { LOGS(logger, ERROR) << "QDQ models are only supported on HTP or DSP backends. " << node_unit.OpType() << " node `" << node_unit.Name() << "` will not be assigned to QNN EP."; return false; } bool supported = false; - const auto* op_builder = qnn::GetOpBuilder(node_unit.OpType()); + const auto* op_builder = qnn::GetOpBuilder(op_type); if (op_builder == nullptr) { LOGS(logger, WARNING) << "Operators of type `" << node_unit.OpType() << "` are not supported by QNN EP." << node_unit.OpType() << " node `" << node_unit.Name() diff --git a/onnxruntime/test/optimizer/graph_transform_test_builder.h b/onnxruntime/test/optimizer/graph_transform_test_builder.h index d0be5aa201671..361903c386dd5 100644 --- a/onnxruntime/test/optimizer/graph_transform_test_builder.h +++ b/onnxruntime/test/optimizer/graph_transform_test_builder.h @@ -219,6 +219,15 @@ class ModelTestBuilder { return &graph_.GetOrCreateNodeArg(name, nullptr); } + NodeArg* MakeRandInitializerBool(const std::vector& shape) { + std::vector data_uint8 = rand_gen_.Uniform(shape, 0, 1); + std::vector data; + for (uint8_t x : data_uint8) { + data.push_back(x != 0); + } + return MakeInitializerBool(shape, data); + } + template NodeArg* MakeInitializer(const std::vector& shape, T min, T max) { return MakeInitializer(shape, rand_gen_.Uniform(shape, min, max)); diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h index 62dd322f292f2..7f6865a89e6e6 100644 --- a/onnxruntime/test/optimizer/qdq_test_utils.h +++ b/onnxruntime/test/optimizer/qdq_test_utils.h @@ -91,102 +91,6 @@ GetQDQTestCaseFn BuildQDQConvTransposeTestCase(const std::vector& input }; } -// Creates the following graph: -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// axes (int32, initializer) -> | Gather | -// |_______________________| -// -template -GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector& input_shape, - const std::vector indices, - const std::vector& indices_shape, - int64_t axis) { - return [input_shape, indices, indices_shape, axis](ModelTestBuilder& builder) { - auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, .003f, 1); - - auto* indices_input = builder.MakeInitializer(indices_shape, indices); - - auto* gather_output = builder.MakeIntermediate(); - Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output}); - gather_node.AddAttribute("axis", axis); - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(gather_output, .003f, 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .003f, 1, - final_output); - }; -} - -// Creates the following graph: -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// axes (int32, initializer) -> | Gather | -// |_______________________| -// -template -GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector& input_shape, - const IndicesType indices, - int64_t axis) { - return [input_shape, indices, axis](ModelTestBuilder& builder) { - auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, .003f, 1); - - auto* indices_input = builder.MakeScalarInitializer(indices); - - auto* gather_output = builder.MakeIntermediate(); - Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output}); - gather_node.AddAttribute("axis", axis); - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(gather_output, .003f, 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .003f, 1, - final_output); - }; -} - -// Creates the following graph: -// _______________________ -// | | -// input (f32) -> Q -> DQ -> | LeakyRelu | -> Q -> DQ -> output (f32) -// |_______________________| -// -template -GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector& input_shape) { - return [input_shape](ModelTestBuilder& builder) { - auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, 0.0473f, 137); - - auto* leakyrelu_output = builder.MakeIntermediate(); - Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output}); - leakyrelu_node.AddAttribute("alpha", 0.2f); - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(leakyrelu_output, 0.02696f, 48, - q_output); - - builder.AddDequantizeLinearNode(q_output, 0.02696f, 48, - final_output); - }; -} - template GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector& input_shape, const std::vector& weights_shape) { return [input_shape, weights_shape](ModelTestBuilder& builder) { diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc index 66b53109d7f05..e579e3274e699 100644 --- a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc +++ b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc @@ -20,21 +20,29 @@ static GetTestModelFn BuildArgMxxTestCase(const std::string& op_type, TestInputD const std::vector& attrs) { return [op_type, input_def, attrs](ModelTestBuilder& builder) { auto* input = MakeTestInput(builder, input_def); - auto* output = builder.MakeOutput(); - Node& argm_node = builder.AddNode(op_type, {input}, {output}); + auto* argm_output = builder.MakeIntermediate(); + Node& argm_node = builder.AddNode(op_type, {input}, {argm_output}); for (const auto& attr : attrs) { argm_node.AddAttributeProto(attr); } + + // Add cast to uint32 + auto* output = builder.MakeOutput(); + Node& cast_node = builder.AddNode("Cast", {argm_output}, {output}); + const auto dst_type = ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32; + cast_node.AddAttribute("to", static_cast(dst_type)); }; } // Builds a QDQ model with ArgMin/ArgMax and a Cast to uint32. The quantization parameters are computed from the provided // input definition. template -static GetTestModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef input_def, - const std::vector& attrs) { - return [op_type, input_def, attrs](ModelTestBuilder& builder) { +static GetTestQDQModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef input_def, + const std::vector& attrs) { + return [op_type, input_def, attrs](ModelTestBuilder& builder, + std::vector>& output_qparams) { + ORT_UNUSED_PARAMETER(output_qparams); QuantParams input_qparams = GetTestInputQuantParams(input_def); auto* input = MakeTestInput(builder, input_def); @@ -75,8 +83,8 @@ static void RunCPUArgMxxOpTest(const std::string& op_type, TestInputDef i expected_ep_assignment); } -// Runs an ArgMax/ArgMin model on the QNN CPU backend. Checks the graph node assignment, and that inference -// outputs for QNN EP and CPU EP match. +// Runs a QDQ ArgMax/ArgMin model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference +// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model). template static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef input_def, const std::vector& attrs, @@ -90,10 +98,12 @@ static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef i provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQArgMxxTestCase(op_type, input_def, attrs), - provider_options, - opset, - expected_ep_assignment); + TestQDQModelAccuracy(BuildArgMxxTestCase(op_type, input_def, attrs), // baseline float32 model + BuildQDQArgMxxTestCase(op_type, input_def, attrs), // QDQ model + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc index c501622b5bb61..114802d56cfd3 100644 --- a/onnxruntime/test/providers/qnn/average_pool_test.cc +++ b/onnxruntime/test/providers/qnn/average_pool_test.cc @@ -17,16 +17,15 @@ namespace onnxruntime { namespace test { // Returns a function that creates a graph with a single AveragePool operator. -static GetTestModelFn BuildAveragePoolTestCase(const std::vector& shape, +static GetTestModelFn BuildAveragePoolTestCase(const TestInputDef& input_def, const std::vector& kernel_shape, const std::vector& strides, const std::vector& pads, int64_t count_include_pad, const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, + return [input_def, kernel_shape, strides, pads, count_include_pad, auto_pad](ModelTestBuilder& builder) { - // Random input data - auto input = builder.MakeInput(shape, 0.0f, 10.0f); + auto* input = MakeTestInput(builder, input_def); auto* output = builder.MakeOutput(); Node& pool_node = builder.AddNode("AveragePool", {input}, {output}); @@ -51,26 +50,20 @@ static GetTestModelFn BuildAveragePoolTestCase(const std::vector& shape // Returns a function that creates a graph with a QDQ AveragePool operator. template -GetQDQTestCaseFn BuildAveragePoolQDQTestCase(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - int64_t count_include_pad, - const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, - count_include_pad, auto_pad](ModelTestBuilder& builder) { - float dq_scale = 0.0038f; - float pool_output_scale = 0.0038f; - float q_scale = 0.0038f; - QuantType dq_zp = std::numeric_limits::max() / 2; - QuantType pool_output_zp = std::numeric_limits::max() / 2; - QuantType q_zp = std::numeric_limits::max() / 2; - - auto* input_arg = builder.MakeInput(shape, -1.0f, 1.0f); - auto* output_arg = builder.MakeOutput(); +GetTestQDQModelFn BuildAveragePoolQDQTestCase(const TestInputDef& input_def, + const std::vector& kernel_shape, + const std::vector& strides, + const std::vector& pads, + int64_t count_include_pad, + const std::string& auto_pad = "NOTSET") { + return [input_def, kernel_shape, strides, pads, + count_include_pad, auto_pad](ModelTestBuilder& builder, + std::vector>& output_qparams) { + auto* input_arg = MakeTestInput(builder, input_def); // add QDQ + AveragePool - auto* dq_output = AddQDQNodePair(builder, input_arg, dq_scale, dq_zp); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* dq_output = AddQDQNodePair(builder, input_arg, input_qparams.scale, input_qparams.zero_point); auto* averagepool_output = builder.MakeIntermediate(); Node& pool_node = builder.AddNode("AveragePool", {dq_output}, {averagepool_output}); @@ -90,22 +83,15 @@ GetQDQTestCaseFn BuildAveragePoolQDQTestCase(const std::vector& shape, pool_node.AddAttribute("count_include_pad", count_include_pad); } - // add QDQ output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(averagepool_output, - pool_output_scale, - pool_output_zp, - q_output); - builder.AddDequantizeLinearNode(q_output, - q_scale, - q_zp, - output_arg); + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, averagepool_output, + output_qparams[0].scale, output_qparams[0].zero_point); }; } // Runs an AveragePool model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunAveragePoolOpTest(const std::vector& shape, +static void RunAveragePoolOpTest(const TestInputDef& input_def, const std::vector& kernel_shape, const std::vector& strides, const std::vector& pads, @@ -120,16 +106,16 @@ static void RunAveragePoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildAveragePoolTestCase(shape, kernel_shape, strides, pads, count_include_pad, auto_pad), + RunQnnModelTest(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad), provider_options, opset, expected_ep_assignment); } -// Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that inference -// outputs for QNN and CPU match. +// Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that accuracy +// on QNN EP is at least as good as on CPU EP. template -static void RunQDQAveragePoolOpTest(const std::vector& shape, +static void RunQDQAveragePoolOpTest(const TestInputDef& input_def, const std::vector& kernel_shape, const std::vector& strides, const std::vector& pads, @@ -144,12 +130,13 @@ static void RunQDQAveragePoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildAveragePoolQDQTestCase(shape, kernel_shape, strides, pads, count_include_pad, - auto_pad), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad), + BuildAveragePoolQDQTestCase(input_def, kernel_shape, strides, pads, count_include_pad, + auto_pad), + provider_options, + opset, + expected_ep_assignment, + fp32_abs_err); } // @@ -157,45 +144,45 @@ static void RunQDQAveragePoolOpTest(const std::vector& shape, // // AveragePool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnCPUBackendTests, TestAveragePool_Global) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - 0, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_Global) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {3, 3}, // kernel_shape + {3, 3}, // strides + {0, 0, 0, 0}, // pads + 0, // count_include_pad "NOTSET", ExpectedEPNodeAssignment::All); } // AveragePool that counts padding. -TEST_F(QnnCPUBackendTests, TestAveragePool_CountIncludePad) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {0, 0, 0, 0}, // pads - 1, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_CountIncludePad) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {1, 1}, // kernel_shape + {1, 1}, // strides + {0, 0, 0, 0}, // pads + 1, // count_include_pad "NOTSET", ExpectedEPNodeAssignment::All); } // AveragePool that use auto_pad 'SAME_UPPER'. -TEST_F(QnnCPUBackendTests, TestAveragePool_AutopadSameUpper) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 1, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameUpper) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 1, // count_include_pad "SAME_UPPER", ExpectedEPNodeAssignment::All); } // AveragePool that use auto_pad 'SAME_LOWER'. -TEST_F(QnnCPUBackendTests, TestAveragePool_AutopadSameLower) { - RunAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 1, // count_include_pad +TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameLower) { + RunAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // random input + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 1, // count_include_pad "SAME_LOWER", ExpectedEPNodeAssignment::All); } @@ -206,8 +193,10 @@ TEST_F(QnnCPUBackendTests, TestAveragePool_AutopadSameLower) { // // QDQ AveragePool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnHTPBackendTests, TestAveragePool_Global_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape +TEST_F(QnnHTPBackendTests, AveragePool_Global_HTP) { + std::vector input = {32.1289f, -59.981f, -17.2799f, 62.7263f, 33.6205f, -19.3515f, -54.0113f, 37.5648f, 61.5357f, + -52.5769f, 27.3637f, -9.01382f, -65.5612f, 19.9497f, -47.9228f, 26.9813f, 83.064f, 0.362503f}; + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), {3, 3}, // kernel_shape {3, 3}, // strides {0, 0, 0, 0}, // pads @@ -217,39 +206,48 @@ TEST_F(QnnHTPBackendTests, TestAveragePool_Global_HTP_u8) { } // QDQ AveragePool that counts padding. -TEST_F(QnnHTPBackendTests, TestAveragePool_CountIncludePad_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape +TEST_F(QnnHTPBackendTests, AveragePool_CountIncludePad_HTP_u8) { + std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), {1, 1}, // kernel_shape {1, 1}, // strides {0, 0, 0, 0}, // pads 1, // count_include_pad "NOTSET", ExpectedEPNodeAssignment::All, - 18, 0.00381f); + 18); } // QDQ AveragePool that use auto_pad 'SAME_UPPER'. -TEST_F(QnnHTPBackendTests, TestAveragePool_AutopadSameUpper_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 0, // count_include_pad +TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameUpper_HTP_u8) { + std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 0, // count_include_pad "SAME_UPPER", ExpectedEPNodeAssignment::All, - 18, 0.00381f); + 18); } // QDQ AveragePool that use auto_pad 'SAME_LOWER'. -TEST_F(QnnHTPBackendTests, TestAveragePool_AutopadSameLower_HTP_u8) { - RunQDQAveragePoolOpTest({1, 2, 3, 3}, // shape - {1, 1}, // kernel_shape - {1, 1}, // strides - {}, // pads - 0, // count_include_pad +TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameLower_HTP_u8) { + std::vector input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + + RunQDQAveragePoolOpTest(TestInputDef({1, 2, 3, 3}, false, input), + {1, 1}, // kernel_shape + {1, 1}, // strides + {}, // pads + 0, // count_include_pad "SAME_LOWER", ExpectedEPNodeAssignment::All, - 18, 0.00381f); + 18); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc index d69ce53d41b5c..9a4021c5563c8 100644 --- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc @@ -15,53 +15,133 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -// Creates the graph: -// _______________________ -// input_u8 -> DQ -> | | -// scale_u8 (initializer) -> DQ -> | | -// bias_u8 (initializer) -> DQ -> | BatchNormalization | -> Q -> output_u8 -// mean_u8 (initializer) -> DQ -> | | -// var_u8 (initializer) -> DQ -> |_______________________| -// -// Currently used to test QNN EP. +// Computes the mean and variance of inputs within a channel. +// Requires an input with rank >= 3 +static void ComputeChannelMeanAndVar(const std::vector& input_data, const std::vector& input_shape, + std::vector& mean_vals, std::vector& var_vals) { + const size_t input_rank = input_shape.size(); + const size_t num_batches = input_shape[0]; + const size_t num_channels = input_shape[1]; + + size_t batch_stride = 1; + for (size_t i = 1; i < input_rank; i++) { + batch_stride *= input_shape[i]; + } + const size_t channel_stride = batch_stride / num_channels; + + assert(mean_vals.size() == num_channels); + assert(var_vals.size() == num_channels); + for (size_t i = 0; i < num_channels; i++) { + mean_vals[i] = 0.0f; + var_vals[i] = 0.0f; + } + + // Compute running sum of elements within each channel. The running sum is stored in the mean_vals array directly. + for (size_t b = 0; b < num_batches; b++) { + const size_t batch_start = b * batch_stride; + + for (size_t c = 0; c < num_channels; c++) { + const size_t chan_start = batch_start + (c * channel_stride); + + for (size_t i = chan_start; i < chan_start + channel_stride; i++) { + mean_vals[c] += input_data[i]; + } + } + } + + // Divide sums by the number of elements in a channel to get the mean. + for (size_t c = 0; c < num_channels; c++) { + mean_vals[c] /= static_cast(num_batches * channel_stride); + } + + // Compute running sum of deviations from mean within each channel. The running sum is stored in the var_vals array directly. + for (size_t b = 0; b < num_batches; b++) { + const size_t batch_start = b * batch_stride; + + for (size_t c = 0; c < num_channels; c++) { + const size_t chan_start = batch_start + (c * channel_stride); + + for (size_t i = chan_start; i < chan_start + channel_stride; i++) { + const float deviation = input_data[i] - mean_vals[c]; + var_vals[c] += (deviation * deviation); + } + } + } + + // Divide sums by the number of elements in a channel to get the variance. + for (size_t c = 0; c < num_channels; c++) { + var_vals[c] /= static_cast(num_batches * channel_stride); + } +} + +static GetTestModelFn BuildBatchNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def) { + ORT_ENFORCE(input_def.IsRawData()); // Need raw data to compute mean and variance inputs. + ORT_ENFORCE(input_def.GetShape().size() > 2); // Need at least rank 3 data for convenience. + + return [input_def, scale_def, bias_def](ModelTestBuilder& builder) { + const auto& input_shape = input_def.GetShape(); + const auto& input_data = input_def.GetRawData(); + const int64_t num_channels = input_shape[1]; + + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* scale = MakeTestInput(builder, scale_def); + NodeArg* bias = MakeTestInput(builder, bias_def); + + std::vector mean_vals(num_channels); + std::vector var_vals(num_channels); + ComputeChannelMeanAndVar(input_data, input_shape, mean_vals, var_vals); + + NodeArg* mean = builder.MakeInitializer({num_channels}, mean_vals); + NodeArg* var = builder.MakeInitializer({num_channels}, var_vals); + NodeArg* output = builder.MakeOutput(); + builder.AddNode("BatchNormalization", {input, scale, bias, mean, var}, {output}); + }; +} + template -GetQDQTestCaseFn BuildQDQBatchNormTestCase(const std::vector& input_shape) { - return [input_shape](ModelTestBuilder& builder) { +GetTestQDQModelFn BuildQDQBatchNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def) { + ORT_ENFORCE(input_def.IsRawData()); // Need raw data to compute mean and variance inputs. + ORT_ENFORCE(input_def.GetShape().size() > 2); // Need at least rank 3 data for convenience. + + return [input_def, scale_def, bias_def](ModelTestBuilder& builder, + std::vector>& output_qparams) { + const auto& input_shape = input_def.GetShape(); + const auto& input_data = input_def.GetRawData(); const int64_t num_channels = input_shape[1]; - const InputQType quant_zero_point = 0; - const float quant_scale = 1.0f; - auto* input = builder.MakeInput(input_shape, std::numeric_limits::min(), - std::numeric_limits::max()); - auto* dq_input = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input, 0.0039f, quant_zero_point, dq_input); + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); - auto* dq_scale_output = builder.MakeIntermediate(); - auto* scale = builder.MakeInitializer({num_channels}, static_cast(1), static_cast(127)); - builder.AddDequantizeLinearNode(scale, 0.0028f, quant_zero_point, dq_scale_output); + NodeArg* scale = MakeTestInput(builder, scale_def); + QuantParams scale_qparams = GetTestInputQuantParams(scale_def); + NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); - auto* dq_bias_output = builder.MakeIntermediate(); - auto* bias = builder.MakeInitializer({num_channels}, std::vector(num_channels)); - builder.AddDequantizeLinearNode(bias, quant_scale, quant_zero_point, dq_bias_output); + NodeArg* bias = MakeTestInput(builder, bias_def); + QuantParams bias_qparams = GetTestInputQuantParams(bias_def); + NodeArg* bias_qdq = AddQDQNodePair(builder, bias, bias_qparams.scale, bias_qparams.zero_point); - auto* dq_mean_output = builder.MakeIntermediate(); - auto* mean = builder.MakeInitializer({num_channels}, std::vector(num_channels)); - builder.AddDequantizeLinearNode(mean, quant_scale, quant_zero_point, dq_mean_output); + std::vector mean_vals(num_channels); + std::vector var_vals(num_channels); + ComputeChannelMeanAndVar(input_data, input_shape, mean_vals, var_vals); - auto* dq_var_output = builder.MakeIntermediate(); - auto* var = builder.MakeInitializer({num_channels}, std::vector(num_channels, 255)); - builder.AddDequantizeLinearNode(var, 0.003921f, 0, dq_var_output); + NodeArg* mean = builder.MakeInitializer({num_channels}, mean_vals); + QuantParams mean_qparams = GetDataQuantParams(mean_vals); + NodeArg* mean_qdq = AddQDQNodePair(builder, mean, mean_qparams.scale, mean_qparams.zero_point); - auto* batchnorm_output = builder.MakeIntermediate(); - builder.AddNode("BatchNormalization", {dq_input, dq_scale_output, dq_bias_output, dq_mean_output, dq_var_output}, {batchnorm_output}); + NodeArg* var = builder.MakeInitializer({num_channels}, var_vals); + QuantParams var_qparams = GetDataQuantParams(var_vals); + NodeArg* var_qdq = AddQDQNodePair(builder, var, var_qparams.scale, var_qparams.zero_point); - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(batchnorm_output, 0.00377f, quant_zero_point, q_output); + auto* batchnorm_output = builder.MakeIntermediate(); + builder.AddNode("BatchNormalization", {input_qdq, scale_qdq, bias_qdq, mean_qdq, var_qdq}, + {batchnorm_output}); - auto* final_output = builder.MakeOutput(); - builder.AddDequantizeLinearNode(q_output, 0.00377f, - quant_zero_point, - final_output); + AddQDQNodePairWithOutputAsGraphOutput(builder, batchnorm_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } @@ -72,7 +152,9 @@ GetQDQTestCaseFn BuildQDQBatchNormTestCase(const std::vector& input_sha * \param input_shape The input's shape. * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). */ -static void RunBatchNormQDQTest(const std::vector& input_shape, +static void RunBatchNormQDQTest(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) @@ -82,28 +164,49 @@ static void RunBatchNormQDQTest(const std::vector& input_shape, #endif // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildQDQBatchNormTestCase(input_shape), - provider_options, - 11, - expected_ep_assignment); + TestQDQModelAccuracy(BuildBatchNormTestCase(input_def, scale_def, bias_def), + BuildQDQBatchNormTestCase(input_def, scale_def, bias_def), + provider_options, + 11, + expected_ep_assignment, + 1e-5f); } +// TODO: FIX TRANSLATION!!! // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQBatchNorm1D) { - RunBatchNormQDQTest({1, 2, 3}, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm1D) { + constexpr int64_t num_channels = 2; + + RunBatchNormQDQTest(TestInputDef({1, num_channels, 3}, false, {-5.0f, -4.0f, -3.0f, 0.0f, 2.0f, 5.0f}), // Input data + TestInputDef({num_channels}, true, {1.0f, 2.0f}), // Scale initializer + TestInputDef({num_channels}, true, {1.1f, 2.1f}), // Bias initializer + ExpectedEPNodeAssignment::All); } +// TODO: FIX TRANSLATION!!! // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit. // Use an input of rank 4. -TEST_F(QnnHTPBackendTests, TestQDQBatchNorm2D) { - RunBatchNormQDQTest({2, 3, 4, 5}, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm2D) { + constexpr int64_t num_channels = 2; + std::vector input_data = {-8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 1.1f, 3.3f, 8.0f, + -7.0f, -5.0f, -3.0f, -1.0f, 0.0f, 2.1f, 4.3f, 7.0f}; + + RunBatchNormQDQTest(TestInputDef({2, num_channels, 2, 2}, false, input_data), // Input data + TestInputDef({num_channels}, true, {1.0f, 2.0f}), // Scale initializer + TestInputDef({num_channels}, true, {1.1f, 2.1f}), // Bias initializer + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit. // Use an input of rank 5. QNN BatchNormalization doesn't support 5D on HTP -TEST_F(QnnHTPBackendTests, TestQDQBatchNorm3D) { - RunBatchNormQDQTest({1, 2, 3, 4, 5}, ExpectedEPNodeAssignment::None); +TEST_F(QnnHTPBackendTests, BatchNorm3D) { + constexpr int64_t num_channels = 2; + constexpr int64_t num_elems = 1 * num_channels * 3 * 4 * 5; + RunBatchNormQDQTest(TestInputDef({1, num_channels, 3, 4, 5}, false, std::vector(num_elems)), // Input data (all zeros) + TestInputDef({num_channels}, true, {1.0f, 2.0f}), // Scale initializer + TestInputDef({num_channels}, true, {1.1f, 2.1f}), // Bias initializer + ExpectedEPNodeAssignment::None); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc index ddaf7bbf59ad7..147c1dda13e66 100644 --- a/onnxruntime/test/providers/qnn/conv_test.cc +++ b/onnxruntime/test/providers/qnn/conv_test.cc @@ -142,65 +142,36 @@ static void RunCPUConvOpTest(const std::string& conv_op_type, const TestInputDef // Creates a graph with a single Q/DQ Conv operator. Used for testing HTP backend. template -static GetTestModelFn BuildQDQConvTestCase(const std::string& conv_op_type, const TestInputDef& input_def, - const TestInputDef& weights_def, - const TestInputDef& bias_def, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - const std::string& auto_pad = "NOTSET") { - return [conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, auto_pad](ModelTestBuilder& builder) { - auto* output = builder.MakeOutput(); - - using InputQLimits = std::numeric_limits; - - const float input_scale = 0.004f; - const float weight_scale = 0.004f; - const InputQType io_zp = (InputQLimits::min() + InputQLimits::max()) / 2 + 1; - +static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& conv_op_type, const TestInputDef& input_def, + const TestInputDef& weights_def, + const TestInputDef& bias_def, + const std::vector& strides, + const std::vector& pads, + const std::vector& dilations, + const std::string& auto_pad = "NOTSET") { + return [conv_op_type, input_def, weights_def, bias_def, strides, pads, + dilations, auto_pad](ModelTestBuilder& builder, + std::vector>& output_qparams) { std::vector conv_inputs; // input -> Q/DQ -> auto* input = MakeTestInput(builder, input_def); - auto* input_qdq = AddQDQNodePair(builder, input, input_scale, io_zp); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); conv_inputs.push_back(input_qdq); // weights -> Q/DQ -> auto* weights = MakeTestInput(builder, weights_def); - auto* weights_qdq = AddQDQNodePair(builder, weights, weight_scale, io_zp); + QuantParams weights_qparams = GetTestInputQuantParams(weights_def); + auto* weights_qdq = AddQDQNodePair(builder, weights, weights_qparams.scale, weights_qparams.zero_point); conv_inputs.push_back(weights_qdq); // bias -> if (!bias_def.GetShape().empty()) { - NodeArg* bias_int32 = nullptr; - const float bias_scale = input_scale * weight_scale; // Taken from python quantization tool: onnx_quantizer.py::quantize_bias_static() - - // Bias must be int32 to be detected as a QDQ node unit. - // We must quantize the data. - if (bias_def.IsRandomData()) { - // Create random initializer def that is quantized to int32 - const auto& rand_info = bias_def.GetRandomDataInfo(); - TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), static_cast(rand_info.min / bias_scale), - static_cast(rand_info.max / bias_scale)); - bias_int32 = MakeTestInput(builder, bias_int32_def); - } else { - assert(bias_def.IsRawData()); - // Create raw data initializer def that is quantized to int32 - const auto& bias_f32_raw = bias_def.GetRawData(); - const size_t num_elems = bias_f32_raw.size(); - - std::vector bias_int32_raw(num_elems); - for (size_t i = 0; i < num_elems; i++) { - bias_int32_raw[i] = static_cast(bias_f32_raw[i] / bias_scale); - } - - TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), bias_int32_raw); - bias_int32 = MakeTestInput(builder, bias_int32_def); - } - - auto* bias = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(bias_int32, bias_scale, 0, bias); - conv_inputs.push_back(bias); + // Bias requirement taken from python quantization tool: onnx_quantizer.py::quantize_bias_static() + const float bias_scale = input_qparams.scale * weights_qparams.scale; + + conv_inputs.push_back(MakeTestQDQBiasInput(builder, bias_def, bias_scale)); } auto* conv_output = builder.MakeIntermediate(); @@ -218,9 +189,7 @@ static GetTestModelFn BuildQDQConvTestCase(const std::string& conv_op_type, cons conv_node.AddAttribute("dilations", dilations); } - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(conv_output, input_scale, io_zp, q_output); - builder.AddDequantizeLinearNode(q_output, input_scale, io_zp, output); + AddQDQNodePairWithOutputAsGraphOutput(builder, conv_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } @@ -245,18 +214,19 @@ static void RunHTPConvOpTest(const std::string& conv_op_type, const TestInputDef provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQConvTestCase(conv_op_type, input_def, weights_def, bias_def, - strides, pads, dilations, auto_pad), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, auto_pad), + BuildQDQConvTestCase(conv_op_type, input_def, weights_def, bias_def, + strides, pads, dilations, auto_pad), + provider_options, + opset, + expected_ep_assignment, + fp32_abs_err); } // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as a dynamic input. // TODO: Segfaults when calling graphFinalize(). -TEST_F(QnnCPUBackendTests, DISABLED_TestCPUConvf32_dynamic_bias) { +TEST_F(QnnCPUBackendTests, DISABLED_Convf32_dynamic_bias) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, true, 0.0f, 1.0f), // Random static weights @@ -270,7 +240,7 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestCPUConvf32_dynamic_bias) { // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as an initializer. -TEST_F(QnnCPUBackendTests, TestCPUConvf32_bias_initializer) { +TEST_F(QnnCPUBackendTests, Convf32_bias_initializer) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, true, 0.0f, 1.0f), // Random static weights @@ -283,7 +253,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_bias_initializer) { } // Tests Conv's auto_pad value "SAME_UPPER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadUpper) { +TEST_F(QnnCPUBackendTests, Convf32_AutoPadUpper) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, true, -1.0f, 1.0f), // Random static weights @@ -296,7 +266,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadUpper) { } // Tests ConvTranspose's auto_pad value "SAME_UPPER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadUpper) { +TEST_F(QnnCPUBackendTests, ConvTransposef32_AutoPadUpper) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({1, 2, 2, 2}, true, -1.0f, 1.0f), // Random static weights @@ -309,7 +279,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadUpper) { } // Tests Conv's auto_pad value "SAME_LOWER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadLower) { +TEST_F(QnnCPUBackendTests, Convf32_AutoPadLower) { RunCPUConvOpTest("Conv", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({2, 1, 2, 2}, false, -1.0f, 1.0f), // Random dynamic weights @@ -322,7 +292,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_AutoPadLower) { } // Tests ConvTranspose's auto_pad value "SAME_LOWER" (compares to CPU EP). -TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadLower) { +TEST_F(QnnCPUBackendTests, ConvTransposef32_AutoPadLower) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 1, 3, 3}, false, -3.0f, 3.0f), // Random dynamic input TestInputDef({1, 2, 2, 2}, false, -1.0f, 1.0f), // Random dynamic weights @@ -335,7 +305,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTransposef32_AutoPadLower) { } // large input,output, pads -TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input1_pad_bias_initializer) { +TEST_F(QnnCPUBackendTests, Convf32_large_input1_pad_bias_initializer) { RunCPUConvOpTest("Conv", TestInputDef({1, 3, 60, 452}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({16, 3, 3, 3}, true, 0.0f, 1.0f), // Random dynamic weights @@ -349,7 +319,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input1_pad_bias_initializer) { 1e-4f); } -TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input2_nopad_bias_initializer) { +TEST_F(QnnCPUBackendTests, Convf32_large_input2_nopad_bias_initializer) { #if defined(_WIN32) // Tolerance needs to be > 1.52588e-05 on Windows x64 // TODO: Investigate why @@ -372,7 +342,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvf32_large_input2_nopad_bias_initializer) { } // Test 1D Conv with static weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_StaticWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, Conv1Df32_StaticWeights_DefaultBias) { RunCPUConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights @@ -385,7 +355,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_StaticWeights_DefaultBias) { } // Test 1D Conv with dynamic weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_DynamicWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, Conv1Df32_DynamicWeights_DefaultBias) { RunCPUConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({1, 2, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights @@ -398,7 +368,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConv1Df32_DynamicWeights_DefaultBias) { } // Test 1D ConvTranspose with static weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_StaticWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, ConvTranspose1Df32_StaticWeights_DefaultBias) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights @@ -411,7 +381,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_StaticWeights_DefaultBias) } // Test 1D ConvTranspose with dynamic weights (implemented in QNN EP as 2D convolution with height of 1). -TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_DynamicWeights_DefaultBias) { +TEST_F(QnnCPUBackendTests, ConvTranspose1Df32_DynamicWeights_DefaultBias) { RunCPUConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input TestInputDef({2, 1, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights @@ -427,7 +397,7 @@ TEST_F(QnnCPUBackendTests, TestCPUConvTranspose1Df32_DynamicWeights_DefaultBias) // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as a dynamic input. -TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_bias_dynamic_input) { +TEST_F(QnnHTPBackendTests, ConvU8S32_bias_dynamic_input) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static input @@ -441,35 +411,35 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_bias_dynamic_input) { // Test that dynamic weights with default bias works for Conv. This was previously not working // on older versions of QNN sdk. -TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_DynamicWeight_NoBias) { +TEST_F(QnnHTPBackendTests, ConvU8S32_DynamicWeight_NoBias) { RunHTPConvOpTest("Conv", - TestInputDef({1, 3, 32, 32}, false, 0.0f, 10.0f), // Random dynamic input - TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights - TestInputDef(), // Default bias - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations + TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Random dynamic input + TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights + TestInputDef(), // Default bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Test that dynamic weights with default bias works for ConvTranspose. This was previously not working // on older versions of QNN sdk. -TEST_F(QnnHTPBackendTests, TestQDQConvTransposeU8S32_DynamicWeight_NoBias) { +TEST_F(QnnHTPBackendTests, ConvTransposeU8S32_DynamicWeight_NoBias) { RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 3, 32, 32}, false, 0.0f, 100.0f), // Random dynamic input - TestInputDef({3, 1, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights - TestInputDef(), // Default bias - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations + TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Random dynamic input + TestInputDef({3, 1, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights + TestInputDef(), // Default bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as an initializer. -TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_bias_initializer) { +TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static weight @@ -482,7 +452,7 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_bias_initializer) { } // Tests 1D Conv with bias as an initializer. -TEST_F(QnnHTPBackendTests, TestQDQConv1DU8S32_bias_initializer) { +TEST_F(QnnHTPBackendTests, Conv1DU8S32_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -495,7 +465,7 @@ TEST_F(QnnHTPBackendTests, TestQDQConv1DU8S32_bias_initializer) { } // Tests 1D ConvTranspose with bias as an initializer. -TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8S32_bias_initializer) { +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8S32_bias_initializer) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -508,7 +478,7 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8S32_bias_initializer) { } // Tests auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_AutoPadUpper) { +TEST_F(QnnHTPBackendTests, ConvU8S32_AutoPadUpper) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights @@ -518,12 +488,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8S32_AutoPadUpper) { {1, 1}, // dilations "SAME_UPPER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests Conv1d auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadUpper) { +TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_AutoPadUpper) { RunHTPConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -533,12 +502,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadUpper) { {1}, // dilations "SAME_UPPER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests TransposeConv1d auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadUpper) { +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_AutoPadUpper) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -548,12 +516,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadUpper) { {1}, // dilations "SAME_UPPER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests Conv's auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, ConvU8U8S32_AutoPadLower) { RunHTPConvOpTest("Conv", TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights @@ -563,12 +530,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvU8U8S32_AutoPadLower) { {1, 1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests ConvTranspose's auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvTransposeU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, ConvTransposeU8U8S32_AutoPadLower) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights @@ -578,12 +544,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTransposeU8U8S32_AutoPadLower) { {1, 1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests Conv1d auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_AutoPadLower) { RunHTPConvOpTest("Conv", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -593,12 +558,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConv1DU8U8S32_AutoPadLower) { {1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // Tests ConvTranspose 1d auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadLower) { +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_AutoPadLower) { RunHTPConvOpTest("ConvTranspose", TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight @@ -608,12 +572,11 @@ TEST_F(QnnHTPBackendTests, TestQDQConvTranspose1DU8U8S32_AutoPadLower) { {1}, // dilations "SAME_LOWER", // auto_pad ExpectedEPNodeAssignment::All, - 13, - 1e-4f); + 13); } // TODO: re-enable tests once HTP issues are resolved -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8U8S32_large_input1_padding_bias_initializer) { +TEST_F(QnnHTPBackendTests, DISABLED_ConvU8U8S32_large_input1_padding_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 3, 60, 452}, false, 0.f, 10.f), // Dynamic input TestInputDef({16, 3, 3, 3}, true, -1.f, 1.f), // Static weights @@ -625,7 +588,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8U8S32_large_input1_padding_bias ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8S32_large_input2_bias_initializer) { +TEST_F(QnnHTPBackendTests, DISABLED_ConvU8S32_large_input2_bias_initializer) { RunHTPConvOpTest("Conv", TestInputDef({1, 128, 8, 56}, false, 0.f, 10.f), // Dynamic input TestInputDef({32, 128, 1, 1}, true, -1.f, 1.f), // Random static weights @@ -638,7 +601,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8S32_large_input2_bias_initializ } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQConvU8U8S32_LargeInput_Dilations_Pads) { +TEST_F(QnnHTPBackendTests, DISABLED_ConvU8U8S32_LargeInput_Dilations_Pads) { RunHTPConvOpTest("Conv", TestInputDef({1, 3, 768, 1152}, false, 0.f, 10.f), // Dynamic input TestInputDef({64, 3, 7, 7}, true, -1.f, 1.f), // Random static weights diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc index 3571cdff9b6cc..d2ca9d8ff71e0 100644 --- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc @@ -6,7 +6,6 @@ #include #include "core/graph/graph.h" -#include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" #include "gtest/gtest.h" @@ -15,17 +14,47 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -/** - * Runs a Gather op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param opset The opset version. - * \param scalar_indices whether the incidices input is scalar or not. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - */ +// Function that builds a float model with a Gather op. +template +static GetTestModelFn BuildGatherOpTestCase(const TestInputDef& input_def, + const TestInputDef& indices_def, + int64_t axis = 0) { + return [input_def, indices_def, axis](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* indices = MakeTestInput(builder, indices_def); + NodeArg* output = builder.MakeOutput(); + + Node& gather_node = builder.AddNode("Gather", {input, indices}, {output}); + gather_node.AddAttribute("axis", axis); + }; +} + +// Function that builds a QDQ model with a Gather op. +template +static GetTestQDQModelFn BuildQDQGatherOpTestCase(const TestInputDef& input_def, + const TestInputDef& indices_def, + int64_t axis = 0) { + return [input_def, indices_def, axis](ModelTestBuilder& builder, + std::vector>& output_qparams) { + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + NodeArg* indices = MakeTestInput(builder, indices_def); + + NodeArg* gather_output = builder.MakeIntermediate(); + Node& gather_node = builder.AddNode("Gather", {input_qdq, indices}, {gather_output}); + gather_node.AddAttribute("axis", axis); + + AddQDQNodePairWithOutputAsGraphOutput(builder, gather_output, output_qparams[0].scale, output_qparams[0].zero_point); + }; +} + +// Test the accuracy of a QDQ Gather model on QNN EP. Checks if the QDQ model on QNN EP as accurate as the QDQ model on CPU EP +// (compared to float32 model). template -static void RunGatherOpQDQTest(int opset, bool scalar_indices = false, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) { +static void RunQDQGatherOpTest(const TestInputDef& input_def, const TestInputDef& indices_def, + int64_t axis, int opset, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -33,54 +62,69 @@ static void RunGatherOpQDQTest(int opset, bool scalar_indices = false, provider_options["backend_path"] = "libQnnHtp.so"; #endif - if (scalar_indices) { - RunQnnModelTest(BuildQDQGatherOpScalarIndicesTestCase({2, 3, 4}, // input shape - 1, // indices - 1), // axis - provider_options, - opset, - expected_ep_assignment); - } else { - RunQnnModelTest(BuildQDQGatherOpTestCase({2, 3, 4}, // input shape - std::vector{1}, // indices - {1}, // indices_shape - 1), // axis - provider_options, - opset, - expected_ep_assignment); - } + TestQDQModelAccuracy(BuildGatherOpTestCase(input_def, indices_def, axis), + BuildQDQGatherOpTestCase(input_def, indices_def, axis), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpU8) { - RunGatherOpQDQTest(11); +// Static int64 indices with default axis. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt64_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, true, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::All); +} + +// Tests that dynamic int64 indices are not supported on HTP backend. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt64_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, false, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::None); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses int8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpI8) { - RunGatherOpQDQTest(11); +// Static int32 indices with default axis. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, true, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::All); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpScalarIndicesU8) { - RunGatherOpQDQTest(11, true); +// Dynamic int32 indices with default axis. +TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt32_Axis0) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, false, {0, 1, 1, 2}), + 0, + 13, + ExpectedEPNodeAssignment::All); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // -// - Uses int8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQGatherOpScalarIndicesI8) { - RunGatherOpQDQTest(11, true); +// Static int32 indices with axis = 1 +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_Axis1) { + RunQDQGatherOpTest(TestInputDef({3, 3}, false, {1.0f, 1.2f, 1.9f, 2.3f, 3.4f, 3.9f, 4.5f, 5.7f, 5.9f}), + TestInputDef({1, 2}, true, {0, 2}), + 1, + 13, + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc index 3846a2868a895..683c4d49fa99d 100644 --- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc @@ -16,47 +16,56 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -// Creates the graph: -// _______________________ -// input_u8 -> DQ -> | | -> Q -> output_u8 -// scale_u8 (initializer) -> DQ -> | InstanceNormalization | -// bias_u8 (initializer) -> DQ -> |_______________________| -// -// Currently used to test QNN EP. -template -GetQDQTestCaseFn BuildQDQInstanceNormTestCase(const TestInputDef& input_def, - const TestInputDef& scale_def, - const TestInputDef& bias_def, - const std::vector& attrs) { +// Function that builds a float32 model with an InstanceNormalization operator. +GetTestModelFn BuildInstanceNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, + const std::vector& attrs) { return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder) { - const QuantType quant_zero_point = 0; - const float quant_scale = 1.0f; - - auto* dq_scale_output = builder.MakeIntermediate(); - auto* scale = MakeTestInput(builder, scale_def); - builder.AddDequantizeLinearNode(scale, quant_scale, quant_zero_point, dq_scale_output); + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* scale = MakeTestInput(builder, scale_def); + NodeArg* bias = MakeTestInput(builder, bias_def); - // Add bias (initializer) -> DQ -> - auto* dq_bias_output = builder.MakeIntermediate(); - auto* bias = MakeTestInput(builder, bias_def); - builder.AddDequantizeLinearNode(bias, 1.0f, 0, dq_bias_output); + NodeArg* output = builder.MakeOutput(); + Node& op_node = builder.AddNode("InstanceNormalization", {input, scale, bias}, {output}); - // Add input_u8 -> DQ -> - auto* input_u8 = MakeTestInput(builder, input_def); - auto* dq_input_output = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input_u8, quant_scale, quant_zero_point, dq_input_output); + for (const auto& attr : attrs) { + op_node.AddAttributeProto(attr); + } + }; +} - // Add dq_input_output -> InstanceNormalization -> +// Function that builds a QDQ model with an InstanceNormalization operator. +template +static GetTestQDQModelFn BuildQDQInstanceNormTestCase(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, + const std::vector& attrs) { + return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input => Q => DQ => + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // scale => Q => DQ => + NodeArg* scale = MakeTestInput(builder, scale_def); + QuantParams scale_qparams = GetTestInputQuantParams(scale_def); + NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); + + // bias (as int32) => DQ => + NodeArg* bias_qdq = MakeTestQDQBiasInput(builder, bias_def, input_qparams.scale * scale_qparams.scale); + + // InstanceNormalization operator. auto* instance_norm_output = builder.MakeIntermediate(); - Node& inst_norm_node = builder.AddNode("InstanceNormalization", {dq_input_output, dq_scale_output, dq_bias_output}, + Node& inst_norm_node = builder.AddNode("InstanceNormalization", {input_qdq, scale_qdq, bias_qdq}, {instance_norm_output}); for (const auto& attr : attrs) { inst_norm_node.AddAttributeProto(attr); } // Add instance_norm_output -> Q -> output_u8 - auto* output_u8 = builder.MakeOutput(); - builder.AddQuantizeLinearNode(instance_norm_output, quant_scale, quant_zero_point, output_u8); + AddQDQNodePairWithOutputAsGraphOutput(builder, instance_norm_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } @@ -71,9 +80,9 @@ GetQDQTestCaseFn BuildQDQInstanceNormTestCase(const TestInputDef& inp * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). */ template -static void RunInstanceNormQDQTest(const TestInputDef& input_def, - const TestInputDef& scale_def, - const TestInputDef& bias_def, +static void RunInstanceNormQDQTest(const TestInputDef& input_def, + const TestInputDef& scale_def, + const TestInputDef& bias_def, const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; @@ -84,50 +93,39 @@ static void RunInstanceNormQDQTest(const TestInputDef& input_def, #endif // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildQDQInstanceNormTestCase(input_def, scale_def, bias_def, attrs), - provider_options, - 18, - expected_ep_assignment); + TestQDQModelAccuracy(BuildInstanceNormTestCase(input_def, scale_def, bias_def, attrs), + BuildQDQInstanceNormTestCase(input_def, scale_def, bias_def, attrs), + provider_options, + 18, + expected_ep_assignment, + 1e-5f); } // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit. // Use an input of rank 4. -TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3}, false, 0, 255), - TestInputDef({2}, true, 0, 127), - TestInputDef({2}, true, 0, 10), +TEST_F(QnnHTPBackendTests, InstanceNormU8) { + RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), + TestInputDef({2}, true, -2.0f, 2.0f), + TestInputDef({2}, true, -3.0f, 3.0f), {}, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8Rank3) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3}, false, {6, 4, 2, 6, 8, 2}), - TestInputDef({2}, true, {1, 2}), - TestInputDef({2}, true, {1, 3}), - {}, - ExpectedEPNodeAssignment::All); -} - -// TODO: This test now fails in QNN SDK version 2.12.0 (windows arm64 and linux x86_64). -// This worked in QNN SDK version 2.10.0. Need to determine the severity of this inaccuracy. -// -// Exepcted output: 2 6 2 42 42 0 -// Actual output: 2 6 2 43 43 0 -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQInstanceNormU8Rank3_QnnSdk_2_12_Regression) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3}, false, {3, 4, 3, 9, 9, 8}), - TestInputDef({2}, true, {2, 57}), - TestInputDef({2}, true, {3, 2}), +TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3) { + RunInstanceNormQDQTest(TestInputDef({1, 2, 3}, false, {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f}), + TestInputDef({2}, true, {1.0f, 2.0f}), + TestInputDef({2}, true, {1.0f, 3.0f}), {}, ExpectedEPNodeAssignment::All); } // Check that QNN InstanceNorm operator does not handle inputs with rank > 4. -TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8Rank5) { - RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3, 3}, false, 0, 255), - TestInputDef({2}, true, 0, 127), - TestInputDef({2}, true, 0, 10), +TEST_F(QnnHTPBackendTests, InstanceNormU8Rank5) { + RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3, 3}, false, -10.0f, 10.0f), + TestInputDef({2}, true, -2.0f, 2.0f), + TestInputDef({2}, true, -3.0f, 3.0f), {}, ExpectedEPNodeAssignment::None); } diff --git a/onnxruntime/test/providers/qnn/layer_norm_test.cc b/onnxruntime/test/providers/qnn/layer_norm_test.cc index d9512d16a1f28..3b73a6bf800a3 100644 --- a/onnxruntime/test/providers/qnn/layer_norm_test.cc +++ b/onnxruntime/test/providers/qnn/layer_norm_test.cc @@ -113,6 +113,7 @@ static void RunLayerNormQDQTest(const std::vector& input_shape, #endif // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. + // TODO: Use new QDQ accuracy testing approach (see TestQDQModelAccuracy) RunQnnModelTest(BuildQDQLayerNormTestCase(input_shape, scale_shape, axis_value), provider_options, 11, @@ -122,11 +123,14 @@ static void RunLayerNormQDQTest(const std::vector& input_shape, // Check that QNN compiles DQ -> LayerNormalization -> Q as a single unit. // Use an input of rank 3. // Failed QNN op validation: QnnDsp Param[0] has incorrect Value 3 +// TODO: Use new QDQ accuracy testing approach (see TestQDQModelAccuracy) TEST_F(QnnHTPBackendTests, TestQDQLayerNorm1DAxis0) { RunLayerNormQDQTest({1, 2, 3}, {1, 2, 3}, ExpectedEPNodeAssignment::None); } // Failed QNN FinalizeGraphs: QnnDsp Failed to finalize graph (id: 1) with err 1002 +// +// TODO: Use new QDQ accuracy testing approach (see TestQDQModelAccuracy) TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis2) { RunLayerNormQDQTest({1, 2, 3}, {3}, ExpectedEPNodeAssignment::All, -1); } @@ -136,4 +140,4 @@ TEST_F(QnnHTPBackendTests, DISABLED_TestQDQLayerNorm1DAxis2) { } // namespace test } // namespace onnxruntime -#endif \ No newline at end of file +#endif diff --git a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc index 489ac1924eb8e..772476cb0d245 100644 --- a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc @@ -15,17 +15,44 @@ namespace onnxruntime { namespace test { #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -/** - * Runs a LeakyRelu op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param op_type The LeakyRelu op type (e.g., ReduceSum). - * \param opset The opset version. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - */ +// Creates a function that builds a model with a LeakyRelu operator. +static GetTestModelFn BuildLeakyReluOpTestCase(const TestInputDef& input_def, float alpha) { + return [input_def, alpha](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* output = builder.MakeOutput(); + Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input}, {output}); + leakyrelu_node.AddAttribute("alpha", alpha); + }; +} + +// Creates a function that builds a QDQ model with a LeakyRelu operator. +template +static GetTestQDQModelFn BuildQDQLeakyReluOpTestCase(const TestInputDef& input_def, + float alpha) { + return [input_def, alpha](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input => Q => DQ => + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // LeakryRelu + auto* leakyrelu_output = builder.MakeIntermediate(); + Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq}, {leakyrelu_output}); + leakyrelu_node.AddAttribute("alpha", alpha); + + // => Q => DQ -> final output + AddQDQNodePairWithOutputAsGraphOutput(builder, leakyrelu_output, output_qparams[0].scale, + output_qparams[0].zero_point); + }; +} + +// Checks the accuracy of a QDQ LeakyRelu model by comparing to ORT CPU EP. template -static void RunLeakyReluOpQDQTest(int opset, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) { +static void RunLeakyReluOpQDQTest(const TestInputDef& input_def, + float alpha, + int opset, + ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -33,26 +60,34 @@ static void RunLeakyReluOpQDQTest(int opset, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQLeakyReluOpTestCase({2, 3, 4}), - provider_options, - opset, - expected_ep_assignment); + TestQDQModelAccuracy(BuildLeakyReluOpTestCase(input_def, alpha), + BuildQDQLeakyReluOpTestCase(input_def, alpha), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all // nodes are supported by the QNN EP, and that the inference results match the CPU EP results. // // - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet15) { - RunLeakyReluOpQDQTest(15); +TEST_F(QnnHTPBackendTests, LeakyReluOpSet15) { + RunLeakyReluOpQDQTest(TestInputDef({1, 2, 3}, false, {-40.0f, -20.0f, 0.0f, 10.0f, 30.0f, 40.0f}), + 0.2f, + 15, + ExpectedEPNodeAssignment::All); } // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all // nodes are supported by the QNN EP, and that the inference results match the CPU EP results. // // - Uses uint8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet16) { - RunLeakyReluOpQDQTest(16); +TEST_F(QnnHTPBackendTests, LeakyReluOpSet16) { + RunLeakyReluOpQDQTest(TestInputDef({1, 2, 3}, false, {-40.0f, -20.0f, 0.0f, 10.0f, 30.0f, 40.0f}), + 0.2f, + 16, + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc index 3b28678bcb0a7..82f7b246aa5e4 100644 --- a/onnxruntime/test/providers/qnn/lrn_op_test.cc +++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc @@ -17,10 +17,10 @@ namespace onnxruntime { namespace test { // Creates a graph with a single LRN operator. Used for testing CPU backend. -static GetTestModelFn BuildLRNTestCase(const std::vector& shape, int64_t size, +static GetTestModelFn BuildLRNTestCase(const TestInputDef& input_def, int64_t size, float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { - return [shape, size, alpha, beta, bias](ModelTestBuilder& builder) { - auto* input = builder.MakeInput(shape, 0.0f, 20.0f); + return [input_def, size, alpha, beta, bias](ModelTestBuilder& builder) { + auto* input = MakeTestInput(builder, input_def); auto* output = builder.MakeOutput(); Node& lrn_node = builder.AddNode("LRN", {input}, {output}); @@ -31,40 +31,34 @@ static GetTestModelFn BuildLRNTestCase(const std::vector& shape, int64_ }; } -// Q/DQ scaled used to build Q/DQ test model. This is a global constant -// because results from HTP backend are off by exactly this amount. -static constexpr float qdq_scale = 0.0038f; - // Creates a graph with a single Q/DQ LRN operator. Used for testing HTP backend. template -static GetTestModelFn BuildQDQLRNTestCase(const std::vector& shape, int64_t size, - float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { - return [shape, size, alpha, beta, bias](ModelTestBuilder& builder) { - const InputQType zero_point = std::numeric_limits::max() / 2; - - auto* input = builder.MakeInput(shape, -1.0f, 1.0f); - auto* output = builder.MakeOutput(); - - // input -> Q -> DQ -> LRN - auto* qdq_output = AddQDQNodePair(builder, input, qdq_scale, zero_point); - auto* lrn_output = builder.MakeIntermediate(); - - Node& lrn_node = builder.AddNode("LRN", {qdq_output}, {lrn_output}); +static GetTestQDQModelFn BuildQDQLRNTestCase(const TestInputDef& input_def, int64_t size, + float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f) { + return [input_def, size, alpha, beta, bias](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // LRN + NodeArg* lrn_output = builder.MakeIntermediate(); + Node& lrn_node = builder.AddNode("LRN", {input_qdq}, {lrn_output}); lrn_node.AddAttribute("size", size); lrn_node.AddAttribute("alpha", alpha); lrn_node.AddAttribute("beta", beta); lrn_node.AddAttribute("bias", bias); - // -> Q -> DQ -> output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(lrn_output, qdq_scale, zero_point, q_output); - builder.AddDequantizeLinearNode(q_output, qdq_scale, zero_point, output); + // LRN output -> Q -> DQ -> final output + AddQDQNodePairWithOutputAsGraphOutput(builder, lrn_output, output_qparams[0].scale, + output_qparams[0].zero_point); }; } // Runs an LRN model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN EP and CPU EP match. -static void RunCPULRNOpTest(const std::vector& shape, int64_t size, +static void RunCPULRNOpTest(const TestInputDef& input_def, int64_t size, ExpectedEPNodeAssignment expected_ep_assignment, float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f, int opset = 13) { ProviderOptions provider_options; @@ -77,7 +71,7 @@ static void RunCPULRNOpTest(const std::vector& shape, int64_t size, fp32_abs_err = 1.5e-5f; // On linux we need slightly larger tolerance. #endif - RunQnnModelTest(BuildLRNTestCase(shape, size, alpha, beta, bias), + RunQnnModelTest(BuildLRNTestCase(input_def, size, alpha, beta, bias), provider_options, opset, expected_ep_assignment, @@ -87,10 +81,10 @@ static void RunCPULRNOpTest(const std::vector& shape, int64_t size, // Runs an LRN model on the QNN HTP backend. Checks the graph node assignment, and that inference // outputs for QNN EP and CPU EP match. template -static void RunQDQLRNOpTest(const std::vector& shape, int64_t size, +static void RunQDQLRNOpTest(const TestInputDef& input_def, int64_t size, ExpectedEPNodeAssignment expected_ep_assignment, float alpha = 0.0001f, float beta = 0.75f, float bias = 1.0f, - int opset = 13, float fp32_abs_err = qdq_scale) { + int opset = 13) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -98,27 +92,34 @@ static void RunQDQLRNOpTest(const std::vector& shape, int64_t size, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQLRNTestCase(shape, size, alpha, beta, bias), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err + 0.0001f); + TestQDQModelAccuracy(BuildLRNTestCase(input_def, size, alpha, beta, bias), + BuildQDQLRNTestCase(input_def, size, alpha, beta, bias), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // // CPU tests: // -TEST_F(QnnCPUBackendTests, TestCPULRNSize3) { - RunCPULRNOpTest({1, 128, 4, 5}, 3, ExpectedEPNodeAssignment::All); +TEST_F(QnnCPUBackendTests, LRNSize3) { + RunCPULRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 3, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestCPULRNSize5) { - RunCPULRNOpTest({1, 128, 4, 5}, 5, ExpectedEPNodeAssignment::All); +TEST_F(QnnCPUBackendTests, LRNSize5) { + RunCPULRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 5, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestCPULRN_size_larger_than_channel) { - RunCPULRNOpTest({1, 128, 4, 5}, 255, ExpectedEPNodeAssignment::All); +TEST_F(QnnCPUBackendTests, LRN_size_larger_than_channel) { + RunCPULRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 255, // Size + ExpectedEPNodeAssignment::All); } #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) @@ -126,16 +127,22 @@ TEST_F(QnnCPUBackendTests, TestCPULRN_size_larger_than_channel) { // HTP tests: // -TEST_F(QnnHTPBackendTests, TestHTPLRNSize3) { - RunQDQLRNOpTest({1, 128, 4, 5}, 3, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, LRNSize3) { + RunQDQLRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 3, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestHTPLRNSize5) { - RunQDQLRNOpTest({1, 128, 4, 5}, 5, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, LRNSize5) { + RunQDQLRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 5, // Size + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestHTPLRN_size_larger_than_channel) { - RunQDQLRNOpTest({1, 128, 4, 5}, 255, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, LRN_size_larger_than_channel) { + RunQDQLRNOpTest(TestInputDef({1, 128, 4, 5}, false, -10.0f, 10.0f), + 255, // Size + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp index 5c7a08ae06080..421bdfdaf1bb6 100644 --- a/onnxruntime/test/providers/qnn/matmul_test.cpp +++ b/onnxruntime/test/providers/qnn/matmul_test.cpp @@ -6,7 +6,6 @@ #include #include -#include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" #include "onnx/onnx_pb.h" @@ -17,74 +16,46 @@ namespace onnxruntime { namespace test { // Returns a function that creates a graph with MatMul operator. -static GetTestModelFn BuildMatMulOpTestCase(const std::vector& input1_shape, - const std::vector& input2_shape) { - return [input1_shape, input2_shape](ModelTestBuilder& builder) { - // Random input data - auto input1 = builder.MakeInput(input1_shape, 0.0f, 10.0f); - auto input2 = builder.MakeInput(input2_shape, 0.0f, 10.0f); - - auto* output = builder.MakeOutput(); +static GetTestModelFn BuildMatMulOpTestCase(const TestInputDef& input1_def, + const TestInputDef& input2_def) { + return [input1_def, input2_def](ModelTestBuilder& builder) { + NodeArg* input1 = MakeTestInput(builder, input1_def); + NodeArg* input2 = MakeTestInput(builder, input2_def); + NodeArg* output = builder.MakeOutput(); builder.AddNode("MatMul", {input1, input2}, {output}); }; } -// Returns a function that creates a graph with a QDQ AveragePool operator. +// Returns a function that creates a graph with a QDQ MatMul operator. template -GetQDQTestCaseFn BuildMatMulOpQDQTestCase(const std::vector& input1_shape, - const std::vector& input2_shape) { - return [input1_shape, input2_shape](ModelTestBuilder& builder) { - float pool_output_scale = 0.0038f; - float q_scale = 0.0038f; - QuantType pool_output_zp = std::numeric_limits::max() / 2; - QuantType q_zp = std::numeric_limits::max() / 2; - - auto* input_arg = builder.MakeInput(input1_shape, -1.f, 1.f); - auto* output_arg = builder.MakeOutput(); - - using InputLimits = std::numeric_limits; - - // add QDQ input - auto* q1_output = builder.MakeIntermediate(); - auto* dq1_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(input_arg, - pool_output_scale, - pool_output_zp, - q1_output); - builder.AddDequantizeLinearNode(q1_output, - q_scale, - q_zp, - dq1_output); - - // add input b initializer (NNAPI only supports case of MatMul A*B - B is an initializer) - auto* dq_2_output = builder.MakeIntermediate(); - auto* input_b = builder.MakeInitializer(input2_shape, InputLimits::min(), InputLimits::max()); - builder.AddDequantizeLinearNode(input_b, - q_scale, - q_zp, - dq_2_output); - - // add MatMul operator - auto* matmul_op_output = builder.MakeIntermediate(); - builder.AddNode("MatMul", {dq1_output, dq_2_output}, {matmul_op_output}); - - // add QDQ output - auto* q3_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(matmul_op_output, - pool_output_scale, - pool_output_zp, - q3_output); - builder.AddDequantizeLinearNode(q3_output, - q_scale, - q_zp, - output_arg); +static GetTestQDQModelFn BuildMatMulOpQDQTestCase(const TestInputDef& input1_def, + const TestInputDef& input2_def) { + return [input1_def, input2_def](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input1 -> Q -> DQ -> + NodeArg* input1 = MakeTestInput(builder, input1_def); + QuantParams input1_qparams = GetTestInputQuantParams(input1_def); + auto* input1_qdq = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); + + // input2 -> Q -> DQ -> + NodeArg* input2 = MakeTestInput(builder, input2_def); + QuantParams input2_qparams = GetTestInputQuantParams(input2_def); + auto* input2_qdq = AddQDQNodePair(builder, input2, input2_qparams.scale, input2_qparams.zero_point); + + // MatMul + auto* op_output = builder.MakeIntermediate(); + builder.AddNode("MatMul", {input1_qdq, input2_qdq}, {op_output}); + + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, + output_qparams[0].zero_point); }; } -// Runs an AveragePool model on the QNN CPU backend. Checks the graph node assignment, and that inference +// Runs an MatMul model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunMatMulOpOpTest(const std::vector& input1_shape, - const std::vector& input2_shape, +static void RunMatMulOpOpTest(const TestInputDef& input1_def, + const TestInputDef& input2_def, ExpectedEPNodeAssignment expected_ep_assignment, int opset = 13) { ProviderOptions provider_options; @@ -94,19 +65,20 @@ static void RunMatMulOpOpTest(const std::vector& input1_shape, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildMatMulOpTestCase(input1_shape, input2_shape), + RunQnnModelTest(BuildMatMulOpTestCase(input1_def, input2_def), provider_options, opset, - expected_ep_assignment); + expected_ep_assignment, + 2e-4f); } -// Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that inference -// outputs for QNN and CPU match. +// Runs a QDQ MatMul model on the QNN HTP backend. Checks the graph node assignment, and that the +// QDQ model is accurate on QNN EP (compared to CPU EP). template -static void RunQDQMatMulOpOpTest(const std::vector& input1_shape, - const std::vector& input2_shape, +static void RunQDQMatMulOpOpTest(const TestInputDef& input1_def, + const TestInputDef& input2_def, ExpectedEPNodeAssignment expected_ep_assignment, - int opset = 18, float fp32_abs_err = 1e-5f) { + int opset = 18) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -114,27 +86,28 @@ static void RunQDQMatMulOpOpTest(const std::vector& input1_shape, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildMatMulOpQDQTestCase(input1_shape, input2_shape), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildMatMulOpTestCase(input1_def, input2_def), + BuildMatMulOpQDQTestCase(input1_def, input2_def), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // // CPU tests: // -TEST_F(QnnCPUBackendTests, TestMatMulOp) { - RunMatMulOpOpTest({2, 2} /* input_shape1 */, - {2, 2} /* input_shape2 */, +TEST_F(QnnCPUBackendTests, MatMulOp) { + RunMatMulOpOpTest(TestInputDef({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}), + TestInputDef({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}), ExpectedEPNodeAssignment::All, 18); } -// QNN broadcast issue -TEST_F(QnnCPUBackendTests, DISABLED_TestMatMulOp2) { - RunMatMulOpOpTest({28, 1, 64} /* input_shape1 */, - {64, 32} /* input_shape2 */, +// Test MatMul broadcasting +TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) { + RunMatMulOpOpTest(TestInputDef({28, 1, 64}, false, -10.0f, 10.0f), + TestInputDef({64, 32}, false, -10.0f, 10.0f), ExpectedEPNodeAssignment::All, 18); } @@ -143,27 +116,17 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestMatMulOp2) { // HTP tests: // -TEST_F(QnnHTPBackendTests, TestMatMulOp_HTP_u8) { - RunQDQMatMulOpOpTest({2, 2} /* input_shape1 */, - {2, 2} /* input_shape2 */, - ExpectedEPNodeAssignment::All, - 18, 0.00381f); -} - -// QNN broadcast issue -TEST_F(QnnHTPBackendTests, DISABLED_TestMatMulOp2_HTP_u8) { - RunQDQMatMulOpOpTest({28, 1, 64} /* input_shape1 */, - {64, 32} /* input_shape2 */, - ExpectedEPNodeAssignment::All, - 18, 0.00381f); +TEST_F(QnnHTPBackendTests, MatMulOp_HTP_u8) { + RunQDQMatMulOpOpTest(TestInputDef({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}), + TestInputDef({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}), + ExpectedEPNodeAssignment::All, 18); } -// QNN broadcast issue -TEST_F(QnnHTPBackendTests, DISABLED_TestMatMulOp3_HTP_u8) { - RunQDQMatMulOpOpTest({28, 1, 32} /* input_shape1 */, - {32, 2} /* input_shape2 */, - ExpectedEPNodeAssignment::All, - 18, 0.00381f); +// Test MatMul broadcasting +TEST_F(QnnHTPBackendTests, MatMulOp_Broadcast) { + RunQDQMatMulOpOpTest(TestInputDef({28, 1, 64}, false, -10.0f, 10.0f), + TestInputDef({64, 32}, false, -10.0f, 10.0f), + ExpectedEPNodeAssignment::All, 18); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/max_pool_test.cpp b/onnxruntime/test/providers/qnn/max_pool_test.cpp index 1beac1d326ccd..f574948f02c17 100644 --- a/onnxruntime/test/providers/qnn/max_pool_test.cpp +++ b/onnxruntime/test/providers/qnn/max_pool_test.cpp @@ -6,6 +6,7 @@ #include #include +#include "core/graph/node_attr_utils.h" #include "test/optimizer/qdq_test_utils.h" #include "test/providers/qnn/qnn_test_utils.h" @@ -17,122 +18,50 @@ namespace onnxruntime { namespace test { // Returns a function that creates a graph with a single MaxPool operator. -static GetTestModelFn BuildMaxPoolTestCase(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, dilations, - ceil_mode, storage_order, auto_pad](ModelTestBuilder& builder) { - // Random input data - auto input = builder.MakeInput(shape, 0.0f, 10.0f); - - auto* output = builder.MakeOutput(); +static GetTestModelFn BuildMaxPoolTestCase(const TestInputDef& input_def, + const std::vector& attrs) { + return [input_def, attrs](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* output = builder.MakeOutput(); Node& pool_node = builder.AddNode("MaxPool", {input}, {output}); - pool_node.AddAttribute("kernel_shape", kernel_shape); - - if (!strides.empty()) { - pool_node.AddAttribute("strides", strides); - } - - if (!dilations.empty()) { - pool_node.AddAttribute("dilations", dilations); - } - - pool_node.AddAttribute("auto_pad", auto_pad); - - if (!pads.empty() && auto_pad == "NOTSET") { - pool_node.AddAttribute("pads", pads); - } - - if (ceil_mode > 0) { - pool_node.AddAttribute("ceil_mode", ceil_mode); - } - - if (storage_order > 0) { - pool_node.AddAttribute("storage_order", storage_order); + for (const auto& attr : attrs) { + pool_node.AddAttributeProto(attr); } }; } // Returns a function that creates a graph with a QDQ MaxPool operator. template -GetQDQTestCaseFn BuildMaxPoolQDQTestCase(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad = "NOTSET") { - return [shape, kernel_shape, strides, pads, dilations, - ceil_mode, storage_order, auto_pad](ModelTestBuilder& builder) { - float dq_scale = 0.0038f; - float pool_output_scale = 0.0038f; - float q_scale = 0.0038f; - QuantType dq_zp = std::numeric_limits::max() / 2; - QuantType pool_output_zp = std::numeric_limits::max() / 2; - QuantType q_zp = std::numeric_limits::max() / 2; - - auto* input_arg = builder.MakeInput(shape, -1.0f, 1.0f); - auto* output_arg = builder.MakeOutput(); - - // add QDQ + MaxPool - auto* dq_output = AddQDQNodePair(builder, input_arg, dq_scale, dq_zp); - auto* MaxPool_output = builder.MakeIntermediate(); - Node& pool_node = builder.AddNode("MaxPool", {dq_output}, {MaxPool_output}); - - pool_node.AddAttribute("kernel_shape", kernel_shape); - - if (!strides.empty()) { - pool_node.AddAttribute("strides", strides); - } - - if (!dilations.empty()) { - pool_node.AddAttribute("dilations", dilations); +GetTestQDQModelFn BuildMaxPoolQDQTestCase(const TestInputDef& input_def, + const std::vector& attrs) { + return [input_def, attrs](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // MaxPool + NodeArg* pool_output = builder.MakeIntermediate(); + Node& pool_node = builder.AddNode("MaxPool", {input_qdq}, {pool_output}); + + for (const auto& attr : attrs) { + pool_node.AddAttributeProto(attr); } - pool_node.AddAttribute("auto_pad", auto_pad); - - if (!pads.empty() && auto_pad == "NOTSET") { - pool_node.AddAttribute("pads", pads); - } - - if (ceil_mode > 0) { - pool_node.AddAttribute("ceil_mode", ceil_mode); - } - - if (storage_order > 0) { - pool_node.AddAttribute("storage_order", storage_order); - } - - // add QDQ output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(MaxPool_output, - pool_output_scale, - pool_output_zp, - q_output); - builder.AddDequantizeLinearNode(q_output, - q_scale, - q_zp, - output_arg); + // op_output -> Q -> DQ -> output + // NOTE: Input and output quantization parameters must be equal for MaxPool. + output_qparams[0] = input_qparams; // Overwrite! + AddQDQNodePairWithOutputAsGraphOutput(builder, pool_output, input_qparams.scale, + input_qparams.zero_point); }; } // Runs an MaxPool model on the QNN CPU backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. -static void RunMaxPoolOpTest(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad, +static void RunMaxPoolOpTest(const TestInputDef& input_def, + const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment, int opset = 18) { ProviderOptions provider_options; @@ -142,7 +71,7 @@ static void RunMaxPoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildMaxPoolTestCase(shape, kernel_shape, strides, pads, dilations, ceil_mode, storage_order, auto_pad), + RunQnnModelTest(BuildMaxPoolTestCase(input_def, attrs), provider_options, opset, expected_ep_assignment); @@ -151,16 +80,10 @@ static void RunMaxPoolOpTest(const std::vector& shape, // Runs a QDQ MaxPool model on the QNN HTP backend. Checks the graph node assignment, and that inference // outputs for QNN and CPU match. template -static void RunQDQMaxPoolOpTest(const std::vector& shape, - const std::vector& kernel_shape, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - int64_t ceil_mode, - int64_t storage_order, - const std::string& auto_pad, +static void RunQDQMaxPoolOpTest(const TestInputDef& input_def, + const std::vector& attrs, ExpectedEPNodeAssignment expected_ep_assignment, - int opset = 18, float fp32_abs_err = 1e-5f) { + int opset = 18) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -168,11 +91,12 @@ static void RunQDQMaxPoolOpTest(const std::vector& shape, provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildMaxPoolQDQTestCase(shape, kernel_shape, strides, pads, dilations, ceil_mode, storage_order, auto_pad), - provider_options, - opset, - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(BuildMaxPoolTestCase(input_def, attrs), + BuildMaxPoolQDQTestCase(input_def, attrs), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); } // @@ -180,65 +104,53 @@ static void RunQDQMaxPoolOpTest(const std::vector& shape, // // MaxPool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnCPUBackendTests, TestMaxPool_Global) { - RunMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad - ExpectedEPNodeAssignment::All); -} - -TEST_F(QnnCPUBackendTests, TestMaxPool_Large_Input) { - RunMaxPoolOpTest({1, 125, 8, 56}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, MaxPool_Global) { + RunMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestMaxPool_Large_Input2) { - RunMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, MaxPool_Large_Input) { + RunMaxPoolOpTest(TestInputDef({1, 125, 8, 56}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnCPUBackendTests, DISABLED_TestMaxPool_Ceil) { - RunMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Ceil) { + RunMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnCPUBackendTests, DISABLED_TestMaxPool_Large_Input2_Ceil) { - RunMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Large_Input2_Ceil) { + RunMaxPoolOpTest(TestInputDef({1, 128, 16, 113}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } @@ -247,79 +159,66 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestMaxPool_Large_Input2_Ceil) { // HTP tests: // // QDQ MaxPool with kernel size equal to the spatial dimension of input tensor. -TEST_F(QnnHTPBackendTests, TestMaxPool_Global_HTP_u8) { - RunQDQMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad - ExpectedEPNodeAssignment::All); -} - -// TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_Large_Input_HTP_u8) { - RunQDQMaxPoolOpTest({1, 125, 8, 56}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, MaxPool_Global_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_Large_Input2_HTP_u8) { - RunQDQMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, DISABLED_MaxPool_Large_Input_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 125, 8, 56}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestMaxPool_Ceil_HTP_u8) { - RunQDQMaxPoolOpTest({1, 2, 3, 3}, // shape - {3, 3}, // kernel_shape - {3, 3}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, MaxPool_Ceil_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 2, 3, 3}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{3, 3}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_Large_Input2_Ceil_HTP_u8) { - RunQDQMaxPoolOpTest({1, 128, 16, 113}, // shape - {2, 2}, // kernel_shape - {2, 2}, // strides - {0, 0, 0, 0}, // pads - {1, 1}, // dialations - 1, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, DISABLED_MaxPool_Large_Input2_Ceil_HTP_u8) { + RunQDQMaxPoolOpTest(TestInputDef({1, 128, 16, 113}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{2, 2}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{0, 0, 0, 0}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(1)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). -TEST_F(QnnHTPBackendTests, DISABLED_TestMaxPool_LargeInput_1Pads) { - RunQDQMaxPoolOpTest({1, 64, 384, 576}, // shape - {3, 3}, // kernel_shape - {2, 2}, // strides - {1, 1, 1, 1}, // pads - {1, 1}, // dialations - 0, // ceil_mode - 0, // storage_order - "NOTSET", // auto_pad +TEST_F(QnnHTPBackendTests, DISABLED_MaxPool_LargeInput_1Pads) { + RunQDQMaxPoolOpTest(TestInputDef({1, 64, 384, 576}, false, -10.0f, 10.0f), // Dynamic input with range [-10, 10] + {utils::MakeAttribute("kernel_shape", std::vector{3, 3}), + utils::MakeAttribute("strides", std::vector{2, 2}), + utils::MakeAttribute("pads", std::vector{1, 1, 1, 1}), + utils::MakeAttribute("dilations", std::vector{1, 1}), + utils::MakeAttribute("ceil_mode", static_cast(0)), + utils::MakeAttribute("storage_order", static_cast(0)), + utils::MakeAttribute("auto_pad", "NOTSET")}, ExpectedEPNodeAssignment::All); } diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc index 14f2a351d414c..6a6dc6d84af2f 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc @@ -43,6 +43,84 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, const ProviderOption helper.feeds_, verification_params); } +void InferenceModel(const std::string& model_data, const char* log_id, + std::unique_ptr execution_provider, + ExpectedEPNodeAssignment expected_ep_assignment, const NameMLValMap& feeds, + std::vector& output_names, std::vector& output_vals) { + SessionOptions so; + so.session_logid = log_id; + RunOptions run_options; + run_options.run_tag = so.session_logid; + + InferenceSessionWrapper session_object{so, GetEnvironment()}; + + std::string provider_type = kCpuExecutionProvider; + if (execution_provider) { + provider_type = execution_provider->Type(); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(execution_provider))); + } + ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast(model_data.size()))); + ASSERT_STATUS_OK(session_object.Initialize()); + + const auto& graph = session_object.GetGraph(); + + auto ep_nodes = CountAssignedNodes(graph, provider_type); + if (expected_ep_assignment == ExpectedEPNodeAssignment::All) { + // Verify the entire graph is assigned to the EP + ASSERT_EQ(ep_nodes, graph.NumberOfNodes()) << "Not all nodes were assigned to " << provider_type; + } else if (expected_ep_assignment == ExpectedEPNodeAssignment::None) { + ASSERT_EQ(ep_nodes, 0) << "No nodes are supposed to be assigned to " << provider_type; + } else { + ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type; + } + + const auto& outputs = graph.GetOutputs(); + + // fetch all outputs if necessary. + if (output_names.empty()) { + output_names.reserve(outputs.size()); + for (const auto* node_arg : outputs) { + if (node_arg->Exists()) { + output_names.push_back(node_arg->Name()); + } + } + } + + ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &output_vals)); +} + +NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale) { + NodeArg* bias_int32 = nullptr; + + // Bias must be int32 to be detected as a QDQ node unit. + // We must quantize the data. + if (bias_def.IsRandomData()) { + // Create random initializer def that is quantized to int32 + const auto& rand_info = bias_def.GetRandomDataInfo(); + TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), static_cast(rand_info.min / bias_scale), + static_cast(rand_info.max / bias_scale)); + bias_int32 = MakeTestInput(builder, bias_int32_def); + } else { + assert(bias_def.IsRawData()); + // Create raw data initializer def that is quantized to int32 + const auto& bias_f32_raw = bias_def.GetRawData(); + const size_t num_elems = bias_f32_raw.size(); + + std::vector bias_int32_raw(num_elems); + for (size_t i = 0; i < num_elems; i++) { + bias_int32_raw[i] = static_cast(bias_f32_raw[i] / bias_scale); + } + + TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), bias_int32_raw); + bias_int32 = MakeTestInput(builder, bias_int32_def); + } + + auto* bias = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(bias_int32, bias_scale, 0, bias); + + return bias; +} + // Mock IKernelLookup class passed to QNN EP's GetCapability() function in order to // determine if the HTP backend is supported on specific platforms (e.g., Windows ARM64). // TODO: Remove once HTP can be emulated on Windows ARM64. diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index 21d34136c7c85..b091177b24ee2 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -5,19 +5,76 @@ #if !defined(ORT_MINIMAL_BUILD) #include +#include #include #include "core/framework/provider_options.h" #include "test/optimizer/qdq_test_utils.h" #include "test/util/include/test_utils.h" +#include "test/util/include/test/test_environment.h" +#include "test/util/include/default_providers.h" #include "gtest/gtest.h" namespace onnxruntime { namespace test { +// Signature for function that builds a float32 model. using GetTestModelFn = std::function; +// Class that stores quantization params (scale, zero point). +// Has a static function that computes quantization parameters from a floating-point range. +template +struct QuantParams { + float scale; + QType zero_point; + + static QuantParams Compute(float rmin, float rmax) { + if (rmin == 0.0f && rmax == 0.0f) { // Quantizing a single zero. + return QuantParams{1.0f, 0}; + } + + if (rmin == rmax) { // One data-point (x) to quantize. + if (rmin < 0) { // new range is [-x , 0.0f] + rmax = 0.0f; + } else { // new range is [0.0f, x] + rmin = 0.0f; + } + } + + constexpr float qmin = static_cast(std::numeric_limits::min()); + constexpr float qmax = static_cast(std::numeric_limits::max()); + + const float scale = (rmax - rmin) / (qmax - qmin); + const QType zero_point = static_cast(std::roundf((qmin - rmin) / scale)); + + return QuantParams{scale, zero_point}; + } +}; + +// Signature for function that builds a QDQ model. +// The parameter `output_qparams` contains quantization parameters that *can* be used for the QDQ model output. +// These output quantization parameters are computed by first running the float32 model and determining the +// range of output values. Note that the function is able to overwrite the output_qparams parameter if necessary +// (Example: MaxPool must have identical input and output quantization params). +template +using GetTestQDQModelFn = std::function>& output_qparams)>; + +// Computes quantization parameters for an array of floating-point values. +template +inline QuantParams GetDataQuantParams(gsl::span data) { + // Get min/max of raw data. + float min_val = std::numeric_limits::max(); + float max_val = std::numeric_limits::min(); + + for (auto val : data) { + min_val = std::min(min_val, val); + max_val = std::max(max_val, val); + } + + return QuantParams::Compute(min_val, max_val); +} + // Class that defines an input that can be created with ModelTestBuilder. // Defines whether the input is an initializer and if the data should be randomized or if // set to an explicit value. @@ -39,14 +96,18 @@ struct TestInputDef { TestInputDef(std::vector shape, bool is_initializer, T rand_min, T rand_max) : shape_(std::move(shape)), data_info_(RandomData{rand_min, rand_max}), - is_initializer_(is_initializer) {} + is_initializer_(is_initializer), + has_range_override_(false), + range_override_() {} // Create an input definition with explicit data. Specify its shape, whether it's an initializer, // and the raw data. TestInputDef(std::vector shape, bool is_initializer, std::vector data) : shape_(std::move(shape)), data_info_(RawData{std::move(data)}), - is_initializer_(is_initializer) {} + is_initializer_(is_initializer), + has_range_override_(false), + range_override_() {} TestInputDef(TestInputDef&& other) = default; TestInputDef(const TestInputDef& other) = default; @@ -54,6 +115,18 @@ struct TestInputDef { TestInputDef& operator=(const TestInputDef& other) = default; TestInputDef& operator=(TestInputDef&& other) = default; + // Overrides the range of input values reported by TestInputDef::GetRange(). + // This is useful when you want to quantize over a range that is larger or smaller + // than the actual range of the data. + // + // Returns a reference to this object to allow chaining. + TestInputDef& OverrideValueRange(T range_min, T range_max) { + range_override_.first = range_min; + range_override_.second = range_max; + has_range_override_ = true; + return *this; + } + const std::vector& GetShape() const { return shape_; } @@ -78,7 +151,15 @@ struct TestInputDef { return std::get(data_info_).data; } + // Get the range of values represented by this input, which is necessary for computing quantization parameters. + // For raw data, we return [min, max] of the elements. + // For random data, we return [rand_min, rand_max]. + // Optionally, the user can override this range by using OverrideValueRange(). std::pair GetRange() const { + if (has_range_override_) { + return range_override_; + } + auto which_type = data_info_.index(); std::pair range; @@ -105,28 +186,169 @@ struct TestInputDef { std::vector shape_; std::variant data_info_; bool is_initializer_; + bool has_range_override_; + std::pair range_override_; }; template -struct QuantParams { - float scale; - QType zero_point; +inline QuantParams GetTestInputQuantParams(const TestInputDef& input_def) { + const std::pair frange = input_def.GetRange(); + return QuantParams::Compute(frange.first, frange.second); +} - static QuantParams Compute(float rmin, float rmax) { - constexpr float qmin = static_cast(std::numeric_limits::min()); - constexpr float qmax = static_cast(std::numeric_limits::max()); +/** + * Inferences a given serialized model. Returns output values via an out-param. + * + * \param model_data The serialized ONNX model to inference. + * \param log_id The logger ID. + * \param execution_provider The EP on which to run the model. Set to nullptr for CPU EP. + * \param expected_ep_assignment Describes "which nodes" should be assigned to the EP. + * \param feeds The input feeds. + * \param output_names If empty, the function will write the output names. + * \param output_vals Initialized to the inference results. + */ +void InferenceModel(const std::string& model_data, const char* log_id, + std::unique_ptr execution_provider, + ExpectedEPNodeAssignment expected_ep_assignment, const NameMLValMap& feeds, + std::vector& output_names, std::vector& output_vals); - const float scale = (rmax - rmin) / (qmax - qmin); - const QType zero_point = static_cast((qmin - rmin) / scale); +/** + * Tests the accuracy of a QDQ model on QNN EP by runnning 3 inferences: + * + * 1. float model on CPU EP (baseline) + * 2. QDQ model on CPU EP + * 3. QDQ model on QNN EP + * + * This function checks that running the QDQ model on QNN EP (#3) is at least as accurate (+- small tolerance) + * as running the QDQ model on CPU EP (#2). We primarily measure accuracy by comparing to the baseline (#1). + * + * \param f32_model_fn Function that builds the float model (baseline for comparison). + * \param qdq_model_fn Function that builds the QDQ model (run by CPU EP and QNN EP). + * \param qnn_options QNN EP provider options. + * \param opset_version The opset version. + * \param expected_ep_assignment Describes "which nodes" should be assigned to the EP. + * \param fp32_abs_err Small tolerance used for floating-point comparisons. + * \param log_severity The logger's severity setting. + */ +template +inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTestQDQModelFn& qdq_model_fn, + const ProviderOptions& qnn_options, int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err, + logging::Severity log_severity = logging::Severity::kERROR) { + // Add kMSDomain to cover contrib op like Gelu + const std::unordered_map domain_to_version = {{"", opset_version}, {kMSDomain, 1}}; + + auto& logging_manager = DefaultLoggingManager(); + logging_manager.SetDefaultLoggerSeverity(log_severity); + + // Create float model and serialize it to a string. + onnxruntime::Model f32_model("f32_model", false, ModelMetaData(), PathString(), + IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {}, + logging_manager.DefaultLogger()); + ModelTestBuilder f32_helper(f32_model.MainGraph()); + std::string f32_model_data; + f32_model_fn(f32_helper); + f32_helper.SetGraphOutputs(); + ASSERT_STATUS_OK(f32_model.MainGraph().Resolve()); + f32_model.ToProto().SerializeToString(&f32_model_data); + + // Run f32 model on CPU EP and collect outputs. + std::vector cpu_f32_outputs; + std::vector output_names; + InferenceModel(f32_model_data, "f32_model_logger", nullptr, ExpectedEPNodeAssignment::All, + f32_helper.feeds_, output_names, cpu_f32_outputs); + const size_t num_outputs = cpu_f32_outputs.size(); + + // Compute output range(s) and quantization params. + std::vector> output_qparams; + std::vector> output_vals; + std::vector output_types; + output_qparams.resize(num_outputs); + output_vals.resize(num_outputs); + output_types.resize(num_outputs); + + for (size_t i = 0; i < num_outputs; i++) { + auto& tensor = cpu_f32_outputs[i].Get(); + int32_t elem_type = tensor.GetElementType(); + + if (elem_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + output_vals[i] = tensor.DataAsSpan(); + output_qparams[i] = GetDataQuantParams(output_vals[i]); + } - return QuantParams{scale, zero_point}; + output_types[i] = elem_type; } -}; -template -inline QuantParams GetTestInputQuantParams(const TestInputDef& input_def) { - const std::pair frange = input_def.GetRange(); - return QuantParams::Compute(frange.first, frange.second); + // Create QDQ model and serialize it to a string. + onnxruntime::Model qdq_model("qdq_model", false, ModelMetaData(), PathString(), + IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {}, + logging_manager.DefaultLogger()); + ModelTestBuilder qdq_helper(qdq_model.MainGraph()); + std::string qdq_model_data; + qdq_model_fn(qdq_helper, output_qparams); + qdq_helper.SetGraphOutputs(); + ASSERT_STATUS_OK(qdq_model.MainGraph().Resolve()); + qdq_model.ToProto().SerializeToString(&qdq_model_data); + + // Run QDQ model on QNN EP and collect outputs. + std::vector qnn_qdq_outputs; + InferenceModel(qdq_model_data, "qdq_model_logger", QnnExecutionProviderWithOptions(qnn_options), + expected_ep_assignment, qdq_helper.feeds_, output_names, qnn_qdq_outputs); + + if (expected_ep_assignment != ExpectedEPNodeAssignment::None) { + // Run QDQ model on CPU EP and collect outputs. + std::vector cpu_qdq_outputs; + InferenceModel(qdq_model_data, "qdq_model_logger", nullptr, ExpectedEPNodeAssignment::All, + qdq_helper.feeds_, output_names, cpu_qdq_outputs); + ASSERT_EQ(cpu_qdq_outputs.size(), num_outputs); + ASSERT_EQ(qnn_qdq_outputs.size(), num_outputs); + + // Compare accuracy of QDQ results with float model. + // QNN EP must be at least as accurate as CPU EP when running the QDQ model. + for (size_t i = 0; i < num_outputs; i++) { + auto& cpu_qdq_tensor = cpu_qdq_outputs[i].Get(); + auto& qnn_qdq_tensor = qnn_qdq_outputs[i].Get(); + + ASSERT_EQ(cpu_qdq_tensor.GetElementType(), output_types[i]); + ASSERT_EQ(qnn_qdq_tensor.GetElementType(), output_types[i]); + + if (output_types[i] == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + const size_t num_vals = output_vals[i].size(); + gsl::span cpu_f32_vals = output_vals[i]; + gsl::span cpu_qdq_vals = cpu_qdq_tensor.DataAsSpan(); + gsl::span qnn_qdq_vals = qnn_qdq_tensor.DataAsSpan(); + + ASSERT_EQ(num_vals, cpu_qdq_vals.size()); + ASSERT_EQ(num_vals, qnn_qdq_vals.size()); + + for (size_t j = 0; j < num_vals; j++) { + const float expected_val = cpu_f32_vals[j]; // "ground-truth" + const float qnn_qdq_val = qnn_qdq_vals[j]; + const float cpu_qdq_val = cpu_qdq_vals[j]; + const float cpu_err = std::fabs(expected_val - cpu_qdq_val); + const float qnn_err = std::fabs(expected_val - qnn_qdq_val); + + // Case 1 (qnn_err <= cpu_err): QNN EP is *more* accurate, which makes (qnn_err - cpu_err) zero or + // a negative value. + // Case 2 (qnn_err > cpu_err): QNN EP is less accurate, but the error difference is within 1 + // quantization unit (i.e., scale). This can occur due to rounding differences. + const bool is_as_accurate_as_cpu_qdq = (qnn_err - cpu_err) <= (output_qparams[i].scale + fp32_abs_err); + + EXPECT_TRUE(is_as_accurate_as_cpu_qdq) + << "Inaccuracy detected for output '" + << output_names[i] + << "', element " << j + << ".\nOutput quant params: scale=" << output_qparams[i].scale + << ", zero_point=" << static_cast(output_qparams[i].zero_point) + << ".\nExpected val: " << expected_val << "\n" + << "QNN QDQ val: " << qnn_qdq_val << " (err " << qnn_err << ")\n" + << "CPU QDQ val: " << cpu_qdq_val << " (err " << cpu_err << ")"; + } + } else { + VerifyOutput(output_names[i], cpu_f32_outputs[i].Get(), qnn_qdq_tensor, fp32_abs_err); + } + } + } } /** @@ -164,6 +386,38 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& return input; } +template <> +inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef& input_def) { + NodeArg* input = nullptr; + const auto& shape = input_def.GetShape(); + const bool is_initializer = input_def.IsInitializer(); + + if (input_def.IsRawData()) { // Raw data. + const std::vector& raw_data = input_def.GetRawData(); + + if (is_initializer) { + input = builder.MakeInitializerBool(shape, raw_data); + } else { + input = builder.MakeInput(shape, raw_data); + } + } else { // Random data + if (is_initializer) { + input = builder.MakeRandInitializerBool(shape); + } else { + input = builder.MakeInputBool(shape); + } + } + + return input; +} + +// ONNX spec does not allow quantizing float to int32. However, this function will create an int32 input (divide by scale) +// and then return the output of DequantizeLinear. Note that bias_scale should be generally be equal +// to input_scale * weights_scale. See quantization tool: onnx_quantizer.py::quantize_bias_static() +// +// i.e., initial bias => manual quantization (int32) => DQ => final float bias +NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale); + /** * Runs a test model on the QNN EP. Checks the graph node assignment, and that inference * outputs for QNN and CPU match. diff --git a/onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc b/onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc deleted file mode 100644 index c854d2e5dc5e7..0000000000000 --- a/onnxruntime/test/providers/qnn/reduce_op_cpu_test.cc +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#if !defined(ORT_MINIMAL_BUILD) - -#include -#include - -#include "test/optimizer/qdq_test_utils.h" -#include "test/providers/qnn/qnn_test_utils.h" - -#include "gtest/gtest.h" - -namespace onnxruntime { -namespace test { - -/** - * Creates a graph with a single reduce operator (e.g., ReduceSum, ReduceMin, etc.). Reduce operators take the - * axes of reduction as either a node attribute or an optional input (depending on opset). - * - * \param reduce_op_type The string denoting the reduce operator's type (e.g., "ReduceSum"). - * \param input_shape The shape of the input. Input data is randomly generated with this shape. - * \param axes_as_input True if the "axes" are specified as a node input. - * \param axes The axes of reduction. - * \param keepdims True if the output's rank should match the input. This is a node attribute that defaults to true. - * \param noop_with_empty_axes True if empty axes should force the node to act as a NoOp (no operation). - * This is a node attribute that defaults to false. - * \param domain The domain to assign to the graph node. - * - * \return A function that builds the graph with the provided builder. - */ -template -static GetTestModelFn BuildReduceOpTestCase(const std::string& reduce_op_type, - const std::vector& input_shape, - bool axes_as_input, std::vector axes, bool keepdims, - bool noop_with_empty_axes) { - return [reduce_op_type, input_shape, axes_as_input, axes, keepdims, - noop_with_empty_axes](ModelTestBuilder& builder) { - std::vector input_args; - - // Input data arg - input_args.push_back(builder.MakeInput(input_shape, static_cast(0), - static_cast(20))); - - // Axes input (initializer) for newer opsets. - if (axes_as_input) { - input_args.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); - } - - auto* reduce_sum_output = builder.MakeOutput(); - Node& reduce_sum_node = builder.AddNode(reduce_op_type, input_args, {reduce_sum_output}); - reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); - - // Older opsets have "axes" as a node attribute. - if (!axes_as_input) { - reduce_sum_node.AddAttribute("axes", axes); - } else { - reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); - } - }; -} - -/** - * Runs a ReduceOp model on the QNN CPU backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param op_type The ReduceOp type (e.g., ReduceSum). - * \param opset The opset version. Some opset versions have "axes" as an attribute or input. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - * \param keepdims Common attribute for all reduce operations. - */ -template -static void RunReduceOpCpuTest(const std::string& op_type, int opset, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All, - bool keepdims = true) { - ProviderOptions provider_options; -#if defined(_WIN32) - provider_options["backend_path"] = "QnnCpu.dll"; -#else - provider_options["backend_path"] = "libQnnCpu.so"; -#endif - - RunQnnModelTest(BuildReduceOpTestCase(op_type, - {2, 2}, // input shape - ReduceOpHasAxesInput(op_type, opset), - {0, 1}, // axes - keepdims, - false), // noop_with_empty_axes - provider_options, - opset, - expected_ep_assignment); -} - -// -// ReduceSum -// - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is int32. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestInt32ReduceSumOpset13) { - RunReduceOpCpuTest("ReduceSum", 13); -} - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is int32. -// - Uses opset 11, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestInt32ReduceSumOpset11) { - RunReduceOpCpuTest("ReduceSum", 11); -} - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestFloatReduceSumOpset13) { - RunReduceOpCpuTest("ReduceSum", 13); -} - -// Test creates a graph with a ReduceSum node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 11, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestFloatReduceSumOpset11) { - RunReduceOpCpuTest("ReduceSum", 11); -} - -// -// ReduceProd -// - -// Test creates a graph with a ReduceProd node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceProdOpset18) { - RunReduceOpCpuTest("ReduceProd", 18); -} - -// Test creates a graph with a ReduceProd node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceProdOpset13) { - RunReduceOpCpuTest("ReduceProd", 13); -} - -// -// ReduceMax -// - -// Test creates a graph with a ReduceMax node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceMaxOpset18) { - RunReduceOpCpuTest("ReduceMax", 18); -} - -// Test creates a graph with a ReduceMax node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceMaxOpset13) { - RunReduceOpCpuTest("ReduceMax", 13); -} - -// -// ReduceMin -// - -// Test creates a graph with a ReduceMin node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceMinOpset18) { - RunReduceOpCpuTest("ReduceMin", 18); -} - -// Test creates a graph with a ReduceMin node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceMinOpset13) { - RunReduceOpCpuTest("ReduceMin", 13); -} - -// -// ReduceMean -// - -// Test creates a graph with a ReduceMean node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnCPUBackendTests, TestReduceMeanOpset18) { - RunReduceOpCpuTest("ReduceMean", 18); -} - -// Test creates a graph with a ReduceMean node, and checks that all -// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. -// -// - The input and output data type is float. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnCPUBackendTests, TestReduceMeanOpset13) { - RunReduceOpCpuTest("ReduceMean", 13); -} - -} // namespace test -} // namespace onnxruntime - -#endif // !defined(ORT_MINIMAL_BUILD) \ No newline at end of file diff --git a/onnxruntime/test/providers/qnn/reduce_op_htp_test.cc b/onnxruntime/test/providers/qnn/reduce_op_htp_test.cc deleted file mode 100644 index 86b319eea0b14..0000000000000 --- a/onnxruntime/test/providers/qnn/reduce_op_htp_test.cc +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#if !defined(ORT_MINIMAL_BUILD) - -#include -#include "core/graph/graph.h" - -#include "test/optimizer/qdq_test_utils.h" -#include "test/providers/qnn/qnn_test_utils.h" - -#include "gtest/gtest.h" - -namespace onnxruntime { -namespace test { -#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) - -// Creates the following graph if axes is an input (newer opsets): -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// axes (int32, initializer) -> | Reduce___ | -// |_______________________| -// -// Creates the following graph if axes is an attribute (older opsets): -// _______________________ -// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) -// | Reduce___ | -// |_______________________| -// -template -GetTestModelFn BuildQDQReduceOpTestCase(const std::string& reduce_op_type, const std::vector& input_shape, - bool axes_as_input, const std::vector& axes, bool keepdims, - bool noop_with_empty_axes) { - return [reduce_op_type, input_shape, axes_as_input, axes, keepdims, - noop_with_empty_axes](ModelTestBuilder& builder) { - using QuantTypeLimits = std::numeric_limits; - QuantType input_quant_min_value = QuantTypeLimits::min(); - QuantType input_quant_max_value = QuantTypeLimits::max(); - - auto* input_data = builder.MakeInput(input_shape, -100.0f, 100.0f); - auto* final_output = builder.MakeOutput(); - - // input_data -> Q/DQ -> - auto* input_qdq_output = AddQDQNodePair(builder, input_data, .04f, - (input_quant_min_value + input_quant_max_value) / 2 + 1); - - // -> ReduceOp (e.g., ReduceSum) -> - std::vector reduce_op_inputs; - reduce_op_inputs.push_back(input_qdq_output); - - if (axes_as_input) { - reduce_op_inputs.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); - } - - auto* reduce_sum_output = builder.MakeIntermediate(); - Node& reduce_sum_node = builder.AddNode(reduce_op_type, reduce_op_inputs, {reduce_sum_output}); - reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); - - if (axes_as_input) { - reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); - } else { - reduce_sum_node.AddAttribute("axes", axes); - } - - // -> Q/DQ -> final_output - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(reduce_sum_output, .039f, - (QuantTypeLimits::min() + QuantTypeLimits::max()) / 2 + 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .039f, - (QuantTypeLimits::min() + QuantTypeLimits::max()) / 2 + 1, - final_output); - }; -} - -/** - * Runs a ReduceOp model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param op_type The ReduceOp type (e.g., ReduceSum). - * \param opset The opset version. Some opset versions have "axes" as an attribute or input. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) - * \param keepdims Common attribute for all reduce operations. - */ -template -static void RunReduceOpQDQTest(const std::string& op_type, int opset, const std::vector& input_shape, - const std::vector& axes, - ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All, - bool keepdims = true) { - ProviderOptions provider_options; -#if defined(_WIN32) - provider_options["backend_path"] = "QnnHtp.dll"; -#else - provider_options["backend_path"] = "libQnnHtp.so"; -#endif - - // If QNN EP can support all ops, then we expect a single fused node in the graph. - // Otherwise, we'll get a graph with 5 individual nodes handled by CPU EP. - constexpr bool noop_with_empty_axes = false; - RunQnnModelTest(BuildQDQReduceOpTestCase(op_type, - input_shape, - ReduceOpHasAxesInput(op_type, opset), // New opset changed axes to input. - axes, - keepdims, - noop_with_empty_axes), - provider_options, - opset, - expected_ep_assignment); -} - -// -// ReduceSum -// - -// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumU8Opset13) { - RunReduceOpQDQTest("ReduceSum", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 11, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumU8Opset11) { - RunReduceOpQDQTest("ReduceSum", 11, {1, 3, 4, 4}, {0, 1, 2, 3}); -} - -// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses int8 as the quantization type. -// - Uses opset 13, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumS8Opset13) { - RunReduceOpQDQTest("ReduceSum", 13, {2, 2}, {0, 1}); -} - -// Tests that keepdims = false generates expected results. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumS8Opset13_NoKeepDims) { - RunReduceOpQDQTest("ReduceSum", 13, {2, 2}, {1}, ExpectedEPNodeAssignment::All, false); -} - -// Test that we don't support rank 5 Reduce ops. -TEST_F(QnnHTPBackendTests, TestQDQReduceSumS8Opset13_Rank5Unsupported) { - RunReduceOpQDQTest("ReduceSum", 13, {1, 3, 4, 4, 2}, {0, 1, 2, 3, 4}, ExpectedEPNodeAssignment::None); -} - -// -// ReduceMax -// - -// ReduceMax on Linux's HTP emulator is always off by an amount equal to the final DQ.scale -// Works fine on windows arm64. -#if !defined(__linux__) -// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMaxU8Opset18) { - RunReduceOpQDQTest("ReduceMax", 18, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceMaxU8Opset13) { - RunReduceOpQDQTest("ReduceMax", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses int8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMaxS8Opset18) { - RunReduceOpQDQTest("ReduceMax", 18, {2, 2}, {0, 1}); -} -#endif // !defined(__linux__) - -// -// ReduceMin -// -// ReduceMin on Linux's HTP emulator is always off by an amount equal to the final DQ.scale -// Works fine on windows arm64. -#if !defined(__linux__) -// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMinU8Opset18) { - RunReduceOpQDQTest("ReduceMin", 18, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceMinU8Opset13) { - RunReduceOpQDQTest("ReduceMin", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// Uses int8 as the quantization type. -TEST_F(QnnHTPBackendTests, TestQDQReduceMinS8Opset18) { - RunReduceOpQDQTest("ReduceMin", 18, {2, 2}, {0, 1}); -} -#endif // !defined(__linux__) - -// -// ReduceMean -// - -// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMeanU8Opset18) { - RunReduceOpQDQTest("ReduceMean", 18, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses uint8 as the quantization type. -// - Uses opset 13, which has "axes" as an attribute. -TEST_F(QnnHTPBackendTests, TestQDQReduceMeanU8Opset13) { - RunReduceOpQDQTest("ReduceMean", 13, {2, 2}, {0, 1}); -} - -// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all -// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. -// -// - Uses int8 as the quantization type. -// - Uses opset 18, which has "axes" as an input. -TEST_F(QnnHTPBackendTests, TestQDQReduceMeanS8Opset18) { - RunReduceOpQDQTest("ReduceMean", 18, {2, 2}, {0, 1}); -} - -#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) -} // namespace test -} // namespace onnxruntime - -#endif \ No newline at end of file diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc new file mode 100644 index 0000000000000..e0357de3e52f1 --- /dev/null +++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc @@ -0,0 +1,618 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include +#include "core/graph/graph.h" + +#include "test/optimizer/qdq_test_utils.h" +#include "test/providers/qnn/qnn_test_utils.h" + +#include "gtest/gtest.h" + +namespace onnxruntime { +namespace test { + +/** + * Creates a graph with a single reduce operator (e.g., ReduceSum, ReduceMin, etc.). Reduce operators take the + * axes of reduction as either a node attribute or an optional input (depending on opset). + * + * \param reduce_op_type The string denoting the reduce operator's type (e.g., "ReduceSum"). + * \param input_def The input definition (shape, data, etc.) + * \param axes_as_input True if the "axes" are specified as a node input. + * \param axes The axes of reduction. + * \param keepdims True if the output's rank should match the input. This is a node attribute that defaults to true. + * \param noop_with_empty_axes True if empty axes should force the node to act as a NoOp (no operation). + * This is a node attribute that defaults to false. + * \param domain The domain to assign to the graph node. + * + * \return A function that builds the graph with the provided builder. + */ +template +static GetTestModelFn BuildReduceOpTestCase(const std::string& reduce_op_type, + const TestInputDef& input_def, + bool axes_as_input, std::vector axes, bool keepdims, + bool noop_with_empty_axes) { + return [reduce_op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes](ModelTestBuilder& builder) { + std::vector input_args; + + // Input data arg + input_args.push_back(MakeTestInput(builder, input_def)); + + // Axes input (initializer) for newer opsets. + if (axes_as_input) { + input_args.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); + } + + auto* reduce_sum_output = builder.MakeOutput(); + Node& reduce_sum_node = builder.AddNode(reduce_op_type, input_args, {reduce_sum_output}); + reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); + + // Older opsets have "axes" as a node attribute. + if (!axes_as_input) { + reduce_sum_node.AddAttribute("axes", axes); + } else { + reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); + } + }; +} + +/** + * Runs a ReduceOp model on the QNN CPU backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param op_type The ReduceOp type (e.g., ReduceSum). + * \param input_def The input definition (shape, data, etc.) + * \param axes The axes of reduction. + * \param opset The opset version. Some opset versions have "axes" as an attribute or input. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) + * \param keepdims Common attribute for all reduce operations. + */ +template +static void RunReduceOpCpuTest(const std::string& op_type, + const TestInputDef& input_def, + const std::vector& axes, + bool keepdims, + int opset, + ExpectedEPNodeAssignment expected_ep_assignment) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnCpu.dll"; +#else + provider_options["backend_path"] = "libQnnCpu.so"; +#endif + + RunQnnModelTest(BuildReduceOpTestCase(op_type, + input_def, //{2, 2}, // input shape + ReduceOpHasAxesInput(op_type, opset), + axes, //{0, 1}, // axes + keepdims, + false), // noop_with_empty_axes + provider_options, + opset, + expected_ep_assignment); +} + +// +// ReduceSum +// + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is int32. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceSumOpset13_Int32) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is int32. +// - Uses opset 11, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceSumOpset11_Int32) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 11, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceSumOpset13_Float) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceSum node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 11, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceSumOpset11_Float) { + RunReduceOpCpuTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 11, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceProd +// + +// Test creates a graph with a ReduceProd node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceProdOpset18) { + RunReduceOpCpuTest("ReduceProd", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceProd node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceProdOpset13) { + RunReduceOpCpuTest("ReduceProd", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMax +// + +// Test creates a graph with a ReduceMax node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceMaxOpset18) { + RunReduceOpCpuTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceMax node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceMaxOpset13) { + RunReduceOpCpuTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMin +// + +// Test creates a graph with a ReduceMin node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceMinOpset18) { + RunReduceOpCpuTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceMin node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceMinOpset13) { + RunReduceOpCpuTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMean +// + +// Test creates a graph with a ReduceMean node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnCPUBackendTests, ReduceMeanOpset18) { + RunReduceOpCpuTest("ReduceMean", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 18, + ExpectedEPNodeAssignment::All); +} + +// Test creates a graph with a ReduceMean node, and checks that all +// nodes are supported by the QNN EP (cpu backend), and that the inference results match the CPU EP results. +// +// - The input and output data type is float. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnCPUBackendTests, ReduceMeanOpset13) { + RunReduceOpCpuTest("ReduceMean", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + std::vector{0, 1}, + true, // keepdims + 13, + ExpectedEPNodeAssignment::All); +} + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) + +// Creates the following graph if axes is an input (newer opsets): +// _______________________ +// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) +// axes (int32, initializer) -> | Reduce___ | +// |_______________________| +// +// Creates the following graph if axes is an attribute (older opsets): +// _______________________ +// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32) +// | Reduce___ | +// |_______________________| +// +template +GetTestQDQModelFn BuildQDQReduceOpTestCase(const std::string& reduce_op_type, + const TestInputDef& input_def, + bool axes_as_input, const std::vector& axes, bool keepdims, + bool noop_with_empty_axes) { + return [reduce_op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + // -> ReduceOp (e.g., ReduceSum) -> + std::vector reduce_op_inputs; + reduce_op_inputs.push_back(input_qdq); + + if (axes_as_input) { + reduce_op_inputs.push_back(builder.MakeInitializer({static_cast(axes.size())}, axes)); + } + + auto* op_output = builder.MakeIntermediate(); + Node& reduce_sum_node = builder.AddNode(reduce_op_type, reduce_op_inputs, {op_output}); + reduce_sum_node.AddAttribute("keepdims", static_cast(keepdims)); + + if (axes_as_input) { + reduce_sum_node.AddAttribute("noop_with_empty_axes", static_cast(noop_with_empty_axes)); + } else { + reduce_sum_node.AddAttribute("axes", axes); + } + + // -> Q -> DQ -> final output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, output_qparams[0].zero_point); + }; +} + +/** + * Runs a ReduceOp model on the QNN HTP backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param op_type The ReduceOp type (e.g., ReduceSum). + * \param input_def The input definition (shape, data, etc.). + * \param axes The axes input (or attribute). + * \param keepdims Common attribute for all reduce operations. + * \param opset The opset version. Some opset versions have "axes" as an attribute or input. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) + */ +template +static void RunReduceOpQDQTest(const std::string& op_type, + const TestInputDef& input_def, + const std::vector& axes, + bool keepdims, + int opset, + ExpectedEPNodeAssignment expected_ep_assignment) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + constexpr bool noop_with_empty_axes = false; + const bool axes_as_input = ReduceOpHasAxesInput(op_type, opset); // Later opsets have "axes" as an input. + + TestQDQModelAccuracy(BuildReduceOpTestCase(op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes), + BuildQDQReduceOpTestCase(op_type, input_def, axes_as_input, axes, keepdims, + noop_with_empty_axes), + provider_options, + opset, + expected_ep_assignment, + 1e-5f); +} + +// +// ReduceSum +// + +// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceSumU8Opset13) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// TODO: Investigate inaccuracy +// Input values: 3.21289 -5.9981 -1.72799 6.27263 +// Input quantization params [-10, 10]: scale=0.0784313753, zero_point=127 +// +// Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0068997270427644253, zero_point=0. +// Expected val: 1.7594304084777832 +// QNN QDQ val: 1.731831431388855 (err 0.027598977088928223) +// CPU QDQ val: 1.7594304084777832 (err 0) +TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumU8Opset13_Inaccurate) { + const std::vector input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f}; + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, input_data).OverrideValueRange(-10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} +// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 11, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceSumU8Opset11) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 11, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses int8 as the quantization type. +// - Uses opset 13, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Tests that keepdims = false generates expected results. +TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_NoKeepDims) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {1}, // axes + false, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test that we don't support rank 5 Reduce ops. +TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_Rank5Unsupported) { + RunReduceOpQDQTest("ReduceSum", + TestInputDef({1, 3, 4, 4, 2}, false, -10.0f, 10.0f), + {0, 1, 2, 3, 4}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::None); +} + +// +// ReduceMax +// + +// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMaxU8Opset18) { + RunReduceOpQDQTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceMaxU8Opset13) { + RunReduceOpQDQTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMax -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses int8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMaxS8Opset18) { + RunReduceOpQDQTest("ReduceMax", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMin +// + +// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMinU8Opset18) { + RunReduceOpQDQTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceMinU8Opset13) { + RunReduceOpQDQTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMin -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// Uses int8 as the quantization type. +TEST_F(QnnHTPBackendTests, ReduceMinS8Opset18) { + RunReduceOpQDQTest("ReduceMin", + TestInputDef({2, 2}, false, -10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// +// ReduceMean +// + +// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset18) { + RunReduceOpQDQTest("ReduceMean", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// TODO: Investigate inaccuracy +// Input values: 3.21289 -5.9981 -1.72799 6.27263 +// Input quantization params [-10, 10]: scale=0.0784313753, zero_point=127 +// +// Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0017249317606911063, zero_point=0. +// Expected val: 0.4398576021194458 +// QNN QDQ val: 0.43295785784721375 (err 0.0068997442722320557) +// CPU QDQ val: 0.4398576021194458 (err 0) +TEST_F(QnnHTPBackendTests, DISABLED_ReduceMeanU8Opset18_Inaccurate) { + const std::vector input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f}; + RunReduceOpQDQTest("ReduceMean", + TestInputDef({2, 2}, false, input_data).OverrideValueRange(-10.0f, 10.0f), + {0, 1}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +// - Uses opset 13, which has "axes" as an attribute. +TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset13) { + RunReduceOpQDQTest("ReduceMean", + TestInputDef({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}), + {0, 1}, // axes + true, // keepdims + 13, // opset + ExpectedEPNodeAssignment::All); +} + +// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses int8 as the quantization type. +// - Uses opset 18, which has "axes" as an input. +TEST_F(QnnHTPBackendTests, ReduceMeanS8Opset18) { + RunReduceOpQDQTest("ReduceMean", + TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {0, 1, 2, 3}, // axes + true, // keepdims + 18, // opset + ExpectedEPNodeAssignment::All); +} + +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +} // namespace test +} // namespace onnxruntime + +#endif \ No newline at end of file diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc index c5913ad3db5b8..1d900a41b1331 100644 --- a/onnxruntime/test/providers/qnn/resize_test.cc +++ b/onnxruntime/test/providers/qnn/resize_test.cc @@ -27,18 +27,18 @@ namespace test { * * \return A function that builds the graph with the provided builder. */ -static GetTestModelFn BuildResizeTestCase(const std::vector& shape, - const std::vector& sizes_data, - const std::string& mode = "nearest", - const std::string& coordinate_transformation_mode = "half_pixel", - const std::string& nearest_mode = "round_prefer_floor") { - return [shape, sizes_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { - auto* input = builder.MakeInput(shape, 0.0f, 20.0f); - auto* roi = builder.MakeInitializer({0}, {}); - auto* scales = builder.MakeInitializer({0}, {}); - auto* sizes = builder.Make1DInitializer(sizes_data); - - auto* output = builder.MakeOutput(); +static GetTestModelFn GetResizeModelBuilder(const TestInputDef& input_def, + const std::vector& sizes_data, + const std::string& mode = "nearest", + const std::string& coordinate_transformation_mode = "half_pixel", + const std::string& nearest_mode = "round_prefer_floor") { + return [input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* roi = builder.MakeInitializer({0}, {}); + NodeArg* scales = builder.MakeInitializer({0}, {}); + NodeArg* sizes = builder.Make1DInitializer(sizes_data); + + NodeArg* output = builder.MakeOutput(); Node& resize_node = builder.AddNode("Resize", {input, roi, scales, sizes}, {output}); resize_node.AddAttribute("mode", mode); resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode); @@ -49,17 +49,17 @@ static GetTestModelFn BuildResizeTestCase(const std::vector& shape, }; } -static GetTestModelFn BuildResizeTestCaseWithScales(const std::vector& shape, - const std::vector& scales_data, - const std::string& mode = "nearest", - const std::string& coordinate_transformation_mode = "half_pixel", - const std::string& nearest_mode = "round_prefer_floor") { - return [shape, scales_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { - auto* input = builder.MakeInput(shape, 0.0f, 20.0f); - auto* roi = builder.MakeInitializer({0}, {}); - auto* scales = builder.Make1DInitializer(scales_data); - - auto* output = builder.MakeOutput(); +static GetTestModelFn GetResizeModelBuilderWithScales(const TestInputDef& input_def, + const std::vector& scales_data, + const std::string& mode = "nearest", + const std::string& coordinate_transformation_mode = "half_pixel", + const std::string& nearest_mode = "round_prefer_floor") { + return [input_def, scales_data, mode, coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + NodeArg* roi = builder.MakeInitializer({0}, {}); + NodeArg* scales = builder.Make1DInitializer(scales_data); + + NodeArg* output = builder.MakeOutput(); Node& resize_node = builder.AddNode("Resize", {input, roi, scales}, {output}); resize_node.AddAttribute("mode", mode); resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode); @@ -70,11 +70,45 @@ static GetTestModelFn BuildResizeTestCaseWithScales(const std::vector& }; } +template +static GetTestQDQModelFn GetQDQResizeModelBuilder(const TestInputDef& input_def, + const std::vector& sizes_data, + const std::string& mode = "nearest", + const std::string& coordinate_transformation_mode = "half_pixel", + const std::string& nearest_mode = "round_prefer_floor") { + return [input_def, sizes_data, mode, + coordinate_transformation_mode, nearest_mode](ModelTestBuilder& builder, + std::vector>& output_qparams) { + // input -> Q -> DQ -> + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + + NodeArg* roi = builder.MakeInitializer({0}, {}); + NodeArg* scales = builder.MakeInitializer({0}, {}); + NodeArg* sizes = builder.Make1DInitializer(sizes_data); + + NodeArg* resize_output = builder.MakeIntermediate(); + Node& resize_node = builder.AddNode("Resize", {input_qdq, roi, scales, sizes}, {resize_output}); + resize_node.AddAttribute("mode", mode); + resize_node.AddAttribute("coordinate_transformation_mode", coordinate_transformation_mode); + + if (mode == "nearest") { + resize_node.AddAttribute("nearest_mode", nearest_mode); + } + + // Resize requires the output quantization parameters to match the input. + output_qparams[0] = input_qparams; + AddQDQNodePairWithOutputAsGraphOutput(builder, resize_output, output_qparams[0].scale, + output_qparams[0].zero_point); + }; +} + /** * Runs a Resize model on the QNN CPU backend. Checks the graph node assignment, and that inference * outputs for QNN and CPU match. * - * \param shape The shape of the input and output. Input data is randomly generated with this shape. + * \param input_def The input definition (shape, data, etc). * \param sizes_data The sizes input which determines the output shape. * \param mode The resize mode (e.g., nearest, linear). * \param coordinate_transformation_mode The coordinate transformation mode (e.g., half_pixel, pytorch_half_pixel). @@ -82,7 +116,7 @@ static GetTestModelFn BuildResizeTestCaseWithScales(const std::vector& * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). * \param opset The opset version to use. */ -static void RunCPUResizeOpTest(const std::vector& shape, const std::vector& sizes_data, +static void RunCPUResizeOpTest(const TestInputDef& input_def, const std::vector& sizes_data, const std::string& mode, const std::string& coordinate_transformation_mode, const std::string& nearest_mode, ExpectedEPNodeAssignment expected_ep_assignment, @@ -94,13 +128,13 @@ static void RunCPUResizeOpTest(const std::vector& shape, const std::vec provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildResizeTestCase(shape, sizes_data, mode, coordinate_transformation_mode, nearest_mode), + RunQnnModelTest(GetResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode), provider_options, opset, expected_ep_assignment); } -static void RunCPUResizeOpTestWithScales(const std::vector& shape, const std::vector& scales_data, +static void RunCPUResizeOpTestWithScales(const TestInputDef& input_def, const std::vector& scales_data, const std::string& mode, const std::string& coordinate_transformation_mode, const std::string& nearest_mode, ExpectedEPNodeAssignment expected_ep_assignment, @@ -112,17 +146,18 @@ static void RunCPUResizeOpTestWithScales(const std::vector& shape, cons provider_options["backend_path"] = "libQnnCpu.so"; #endif - RunQnnModelTest(BuildResizeTestCaseWithScales(shape, scales_data, mode, coordinate_transformation_mode, nearest_mode), + RunQnnModelTest(GetResizeModelBuilderWithScales(input_def, scales_data, mode, coordinate_transformation_mode, nearest_mode), provider_options, opset, expected_ep_assignment); } template -static void RunQDQResizeOpTest(const std::vector& shape, const std::vector& sizes_data, +static void RunQDQResizeOpTest(const TestInputDef& input_def, + const std::vector& sizes_data, const std::string& mode, const std::string& coordinate_transformation_mode, const std::string& nearest_mode, - ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err) { + ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -130,12 +165,13 @@ static void RunQDQResizeOpTest(const std::vector& shape, const std::vec provider_options["backend_path"] = "libQnnHtp.so"; #endif - RunQnnModelTest(BuildQDQResizeTestCase(shape, sizes_data, mode, coordinate_transformation_mode, - nearest_mode, true), - provider_options, - 18, // opset - expected_ep_assignment, - fp32_abs_err); + TestQDQModelAccuracy(GetResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode), + GetQDQResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, + nearest_mode), + provider_options, + 18, // opset + expected_ep_assignment, + 1e-5f); } // @@ -152,57 +188,68 @@ static void RunQDQResizeOpTest(const std::vector& shape, const std::vec // Upsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestHalfPixel_rpf) { - RunCPUResizeOpTest({1, 2, 7, 5}, {1, 2, 21, 10}, "nearest", "half_pixel", "round_prefer_floor", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 2, 7, 5}, false, -10.0f, 10.0f), // Random input w/ range [-10, 10] + {1, 2, 21, 10}, // Sizes + "nearest", + "half_pixel", + "round_prefer_floor", ExpectedEPNodeAssignment::All); } // Upsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestHalfPixel_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 7, 5}, "nearest", "half_pixel", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 7, 5}, "nearest", "half_pixel", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestHalfPixel_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 3}, "nearest", "half_pixel", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestHalfPixel_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 3}, "nearest", "half_pixel", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "half_pixel" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestHalfPixel_rpf) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 2}, "nearest", "half_pixel", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestHalfPixel_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 2}, "nearest", "half_pixel", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Upsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestAlignCorners_rpf) { - RunCPUResizeOpTest({1, 2, 7, 5}, {1, 2, 21, 10}, "nearest", "align_corners", "round_prefer_floor", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestAlignCorners_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 2, 7, 5}, false, -10.0f, 10.0f), + {1, 2, 21, 10}, "nearest", "align_corners", "round_prefer_floor", ExpectedEPNodeAssignment::All); } // Upsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeUpsampleNearestAlignCorners_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 7, 5}, "nearest", "align_corners", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestAlignCorners_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 7, 5}, "nearest", "align_corners", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_ceil" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestAlignCorners_rpc) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 3}, "nearest", "align_corners", "round_prefer_ceil", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpc) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 3}, "nearest", "align_corners", "round_prefer_ceil", ExpectedEPNodeAssignment::All); } // Downsample that uses "round_prefer_floor" as the "nearest_mode". // coordinate_transformation_mode: "align_corners" -TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestAlignCorners_rpf) { - RunCPUResizeOpTest({1, 1, 2, 4}, {1, 1, 1, 2}, "nearest", "align_corners", "round_prefer_floor", +TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpf) { + RunCPUResizeOpTest(TestInputDef({1, 1, 2, 4}, false, -10.0f, 10.0f), + {1, 1, 1, 2}, "nearest", "align_corners", "round_prefer_floor", ExpectedEPNodeAssignment::All); } @@ -210,23 +257,27 @@ TEST_F(QnnCPUBackendTests, DISABLED_TestResizeDownsampleNearestAlignCorners_rpf) // Cpu tests that use the "linear" mode. // -TEST_F(QnnCPUBackendTests, TestResize2xLinearHalfPixel) { - RunCPUResizeOpTest({1, 3, 4, 5}, {1, 3, 8, 10}, "linear", "half_pixel", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearHalfPixel) { + RunCPUResizeOpTest(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1, 3, 8, 10}, "linear", "half_pixel", "", ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestResize2xLinearHalfPixel_scales) { - RunCPUResizeOpTestWithScales({1, 3, 4, 5}, {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "half_pixel", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearHalfPixel_scales) { + RunCPUResizeOpTestWithScales(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "half_pixel", "", ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestResize2xLinearAlignCorners) { - RunCPUResizeOpTest({1, 3, 4, 5}, {1, 3, 8, 10}, "linear", "align_corners", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearAlignCorners) { + RunCPUResizeOpTest(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1, 3, 8, 10}, "linear", "align_corners", "", ExpectedEPNodeAssignment::All); } -TEST_F(QnnCPUBackendTests, TestResize2xLinearAlignCorners_scales) { - RunCPUResizeOpTestWithScales({1, 3, 4, 5}, {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "align_corners", "", +TEST_F(QnnCPUBackendTests, Resize2xLinearAlignCorners_scales) { + RunCPUResizeOpTestWithScales(TestInputDef({1, 3, 4, 5}, false, -10.0f, 10.0f), + {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "align_corners", "", ExpectedEPNodeAssignment::All); } @@ -235,19 +286,22 @@ TEST_F(QnnCPUBackendTests, TestResize2xLinearAlignCorners_scales) { // HTP tests: // -TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xLinearPytorchHalfPixel) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "linear", "pytorch_half_pixel", "", - ExpectedEPNodeAssignment::All, 0.0031f); +TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearPytorchHalfPixel) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "linear", "pytorch_half_pixel", "", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestHalfPixelRoundPreferFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "half_pixel", "round_prefer_floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestHalfPixelRoundPreferFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "nearest", "half_pixel", "round_prefer_floor", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestAsymmetricFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "asymmetric", "floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestAsymmetricFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "nearest", "asymmetric", "floor", + ExpectedEPNodeAssignment::All); } // TODO: Investigate with Qualcomm. The qnn-onnx-converter tool translates ONNX Resize [nearest, asymmetric, ceil] to @@ -259,19 +313,22 @@ TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestAsymmetricFloor) { // are an almost-equal pair // Actual : 16 - byte object, // where the value pair(0.15, 0.501) at index #1 don't match, which is 0.351 from 0.15 -TEST_F(QnnHTPBackendTests, DISABLED_TestQDQU8Resize2xNearestAsymmetricCeil) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "asymmetric", "ceil", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, DISABLED_ResizeU8_2xNearestAsymmetricCeil) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 8, 8}, "nearest", "asymmetric", "ceil", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8Resize3xNearestAsymmetricFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 12, 12}, "nearest", "asymmetric", "floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_3xNearestAsymmetricFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 12, 12}, "nearest", "asymmetric", "floor", + ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestQDQU8ResizeHalfNearestAsymmetricFloor) { - RunQDQResizeOpTest({1, 3, 4, 4}, {1, 3, 2, 2}, "nearest", "asymmetric", "floor", - ExpectedEPNodeAssignment::All, 1e-5f); +TEST_F(QnnHTPBackendTests, ResizeU8_HalfNearestAsymmetricFloor) { + RunQDQResizeOpTest(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + {1, 3, 2, 2}, "nearest", "asymmetric", "floor", + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index 93bd96e9549e8..5b4049d52c16f 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -20,6 +20,21 @@ namespace test { using UInt8Limits = std::numeric_limits; +template +static GetTestModelFn BuildUnaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, + const std::vector& attrs, + const std::string& domain = kOnnxDomain) { + return [op_type, input0_def, attrs, domain](ModelTestBuilder& builder) { + NodeArg* input0 = MakeTestInput(builder, input0_def); + + auto* output = builder.MakeOutput(); + auto& op_node = builder.AddNode(op_type, {input0}, {output}, domain); + for (const auto& attr : attrs) { + op_node.AddAttributeProto(attr); + } + }; +} + // Creates the graph: // _______________________ // | | @@ -28,60 +43,100 @@ using UInt8Limits = std::numeric_limits; // // Currently used to test QNN EP. template -GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const TestInputDef& input_def, - const std::string& op_type, - const std::vector& attrs = {}, - const std::string& domain = kOnnxDomain) { - return [input_def, op_type, attrs, domain](ModelTestBuilder& builder) { - const InputQType quant_zero_point = 0; - const float quant_scale = 1.0f; - - auto* input = MakeTestInput(builder, input_def); - auto* dq_input = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input, quant_scale, quant_zero_point, dq_input); +GetTestQDQModelFn BuildQDQUnaryOpTestCase(const TestInputDef& input_def, + const std::string& op_type, + const std::vector& attrs, + const std::string& domain = kOnnxDomain) { + return [input_def, op_type, attrs, domain](ModelTestBuilder& builder, + std::vector>& output_qparams) { + auto* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); auto* op_output = builder.MakeIntermediate(); - auto& op_node = builder.AddNode(op_type, {dq_input}, {op_output}, domain); + auto& op_node = builder.AddNode(op_type, {input_qdq}, {op_output}, domain); for (const auto& attr : attrs) { op_node.AddAttributeProto(attr); } - auto* q_output = builder.MakeOutput(); - builder.AddQuantizeLinearNode(op_output, quant_scale, quant_zero_point, q_output); + // op_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, output_qparams[0].zero_point); }; } -template -static GetTestModelFn BuildQDQBinaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, - const TestInputDef& input1_def) { +/** + * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param input_shape The input's shape. + * \param test_description Description of the test for error reporting. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). + * \param num_modes_in_graph The number of expected nodes in the graph. + */ +template +static void RunQDQUnaryOpTest(const TestInputDef& input_def, const std::string& op_type, + const std::vector& attrs, + int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment, + const std::string& domain = kOnnxDomain) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Runs model with DQ-> Op -> Q and compares the outputs of the CPU and QNN EPs. + TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, attrs, domain), + BuildQDQUnaryOpTestCase(input_def, op_type, attrs, domain), + provider_options, + opset_version, + expected_ep_assignment, + 1e-5f); +} + +template +static GetTestModelFn BuildBinaryOpTestCase(const std::string& op_type, const TestInputDef& input0_def, + const TestInputDef& input1_def) { return [op_type, input0_def, input1_def](ModelTestBuilder& builder) { - const InputQType zero_point = std::numeric_limits::max() / 2; - constexpr float qdq_scale = 0.0004f; + NodeArg* input0 = MakeTestInput(builder, input0_def); + NodeArg* input1 = MakeTestInput(builder, input1_def); + auto* output = builder.MakeOutput(); + builder.AddNode(op_type, {input0, input1}, {output}); + }; +} + +template +static GetTestQDQModelFn BuildQDQBinaryOpTestCase(const std::string& op_type, + const TestInputDef& input0_def, + const TestInputDef& input1_def) { + return [op_type, input0_def, input1_def](ModelTestBuilder& builder, + std::vector>& output_qparams) { NodeArg* input0 = MakeTestInput(builder, input0_def); NodeArg* input1 = MakeTestInput(builder, input1_def); - NodeArg* output = builder.MakeOutput(); // input -> Q -> DQ -> Op - auto* qdq0_output = AddQDQNodePair(builder, input0, qdq_scale, zero_point); - auto* qdq1_output = AddQDQNodePair(builder, input1, qdq_scale, zero_point); + QuantParams input0_qparams = GetTestInputQuantParams(input0_def); + auto* qdq0_output = AddQDQNodePair(builder, input0, input0_qparams.scale, input0_qparams.zero_point); + + QuantParams input1_qparams = GetTestInputQuantParams(input1_def); + auto* qdq1_output = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); // Op -> op_output auto* op_output = builder.MakeIntermediate(); builder.AddNode(op_type, {qdq0_output, qdq1_output}, {op_output}); // op_output -> Q -> DQ -> output - auto* op_q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(op_output, qdq_scale, zero_point, op_q_output); - builder.AddDequantizeLinearNode(op_q_output, qdq_scale, zero_point, output); + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, + output_qparams[0].zero_point); }; } -template -static void RunQDQBinaryOpTest(const std::string& op_type, const TestInputDef& input0_def, - const TestInputDef& input1_def, - int opset_version, +template +static void RunQDQBinaryOpTest(const std::string& op_type, const TestInputDef& input0_def, + const TestInputDef& input1_def, int opset_version, ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) @@ -90,28 +145,18 @@ static void RunQDQBinaryOpTest(const std::string& op_type, const TestInputDef(op_type, input0_def, input1_def), - provider_options, - opset_version, - expected_ep_assignment); + TestQDQModelAccuracy(BuildBinaryOpTestCase(op_type, input0_def, input1_def), + BuildQDQBinaryOpTestCase(op_type, input0_def, input1_def), + provider_options, + opset_version, + expected_ep_assignment, + 1e-5f); } -/** - * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference - * outputs for QNN and CPU match. - * - * \param input_shape The input's shape. - * \param test_description Description of the test for error reporting. - * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None). - * \param num_modes_in_graph The number of expected nodes in the graph. - */ -template -static void RunQDQSingleInputOpTest(const TestInputDef& input_def, const std::string& op_type, - const std::vector& attrs, - int opset_version, - ExpectedEPNodeAssignment expected_ep_assignment, - const std::string& domain = kOnnxDomain) { +template +static void RunBinaryOpTest(const std::string& op_type, const TestInputDef& input0_def, + const TestInputDef& input1_def, int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -119,8 +164,8 @@ static void RunQDQSingleInputOpTest(const TestInputDef& input_def, c provider_options["backend_path"] = "libQnnHtp.so"; #endif - // Runs model with DQ-> Op -> Q and compares the outputs of the CPU and QNN EPs. - RunQnnModelTest(BuildQDQSingleInputOpTestCase(input_def, op_type, attrs, domain), + // Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs. + RunQnnModelTest(BuildBinaryOpTestCase(op_type, input0_def, input1_def), provider_options, opset_version, expected_ep_assignment); @@ -128,87 +173,143 @@ static void RunQDQSingleInputOpTest(const TestInputDef& input_def, c // Check that QNN compiles DQ -> Gelu -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQGeluTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Gelu", {}, 11, ExpectedEPNodeAssignment::All, kMSDomain); +TEST_F(QnnHTPBackendTests, UnaryOp_Gelu) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "Gelu", + {}, + 11, + ExpectedEPNodeAssignment::All, + kMSDomain); // GeLu is a contrib op. } // Check that QNN compiles DQ -> Elu -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQEluTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Elu", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Elu) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "Elu", + {}, + 11, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> HardSwish -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQHardSwishTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "HardSwish", {}, 14, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_HardSwish) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "HardSwish", + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Atan -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQAtanTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Atan", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Atan) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), // Input range [-10.0, 10.0f] + "Atan", + {}, + 14, + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Asin -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQAsinTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, 0, 1), // input range 0 ~ 1 - "Asin", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Asin) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -0.5f, 0.5f), // input range -0.5 to 0.5 + "Asin", {}, + 13, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Sign -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQSignTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Sign", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Sign) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), + "Sign", {}, + 13, ExpectedEPNodeAssignment::All); } -// Check that QNN compiles DQ -> Sign -> Q as a single unit. +// Check that QNN compiles DQ -> Sin -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, TestQDQSinTest) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Sin", {}, 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Sin) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -3.14159f, 3.14159f), + "Sin", {}, + 11, ExpectedEPNodeAssignment::All); +} + +// Check that QNN compiles DQ -> Cos -> Q as a single unit. +// Use an input of rank 3. +TEST_F(QnnHTPBackendTests, UnaryOp_Cos) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {-3.14159f, -1.5f, -0.5f, 0.0f, 1.5, 3.14159f}), + "Cos", {}, + 11, ExpectedEPNodeAssignment::All); +} + +// TODO: Inaccuracy when computing cos(-1.88436) +// +// cos(-1.88436f) fp32 cpu ep = -0.308450460 +// cos(-1.88436f) qdq cpu ep = -0.298039228 +// cos(-1.88436f) qdq QNN ep = -0.321568638 +// +// QNN error: 0.013118177652359009, CPU error: 0.010411232709884644 +// +// input quant params: scale=0.0246399231, zero_point=127 +// output quant params: scale=0.00784313772, zero_point=127 +TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Cos_Inaccurate) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f}), + "Cos", {}, + 11, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (-1) for SoftMax opset 13 works. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax13_DefaultAxis) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {}, // Uses default axis of -1 for opset 13 - 13, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {}, // Uses default axis of -1 for opset 13 + 13, ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that an axis != -1 is not supported. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax13_UnsupportedAxis) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {utils::MakeAttribute("axis", static_cast(1))}, - 13, ExpectedEPNodeAssignment::None); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {utils::MakeAttribute("axis", static_cast(1))}, + 13, ExpectedEPNodeAssignment::None); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (1) for SoftMax opset < 13 does not work. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax11_DefaultAxisFails) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {}, // Uses default axis of 1 for opset < 13. - 11, ExpectedEPNodeAssignment::None); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_DefaultAxisFails) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {}, // Uses default axis of 1 for opset < 13. + 11, ExpectedEPNodeAssignment::None); } // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that setting an axis value of -1 works for Softmax opset < 13. -TEST_F(QnnHTPBackendTests, TestQDQSoftmax11_SetValidAxis) { - RunQDQSingleInputOpTest(TestInputDef({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()), - "Softmax", - {utils::MakeAttribute("axis", static_cast(-1))}, - 11, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_SetValidAxis) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -5.0f, 5.0f), + "Softmax", + {utils::MakeAttribute("axis", static_cast(-1))}, + 11, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Abs op. +TEST_F(QnnHTPBackendTests, UnaryOp_Abs) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -10.0f, 10.0f), + "Abs", + {}, + 13, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Ceil op. +TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) { + RunQDQUnaryOpTest(TestInputDef({1, 2, 3}, false, -100.0f, 100.0f), + "Ceil", + {}, + 13, ExpectedEPNodeAssignment::All); } // Run QDQ model on HTP twice @@ -225,68 +326,138 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) { const std::string context_binary_file = "./qnn_context_binary_test.bin"; provider_options["qnn_context_cache_path"] = context_binary_file; - const TestInputDef input_def({1, 2, 3}, false, UInt8Limits::min(), UInt8Limits::max()); + const TestInputDef input_def({1, 2, 3}, false, -10.0f, 10.0f); + const std::string op_type = "Atan"; // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs. // 1st run will generate the Qnn context cache binary file - RunQnnModelTest(BuildQDQSingleInputOpTestCase(input_def, "Atan"), - provider_options, - 11, - ExpectedEPNodeAssignment::All); + TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, {}), + BuildQDQUnaryOpTestCase(input_def, op_type, {}), + provider_options, + 14, + ExpectedEPNodeAssignment::All, + 1e-5f); // Make sure the Qnn context cache binary file is generated EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str())); // 2nd run will load and run from Qnn context cache binary file - RunQnnModelTest(BuildQDQSingleInputOpTestCase(input_def, "Atan"), + TestQDQModelAccuracy(BuildUnaryOpTestCase(op_type, input_def, {}), + BuildQDQUnaryOpTestCase(input_def, op_type, {}), + provider_options, + 14, + ExpectedEPNodeAssignment::All, + 1e-5f); +} + +TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { + ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Note: a graph input -> Q -> DQ -> is optimized by Qnn to have a perfectly accurate output. + // ORT's CPU EP, on the otherhand, actually quantizes and dequantizes the input, which leads to different outputs. + auto builder_func = [](ModelTestBuilder& builder) { + const TestInputDef input0_def({1, 2, 3}, false, {1.0f, 2.0f, 10.0f, 20.0f, 100.0f, 200.0f}); + + // input -> Q -> Transpose -> DQ -> output + NodeArg* input0 = MakeTestInput(builder, input0_def); + QuantParams qparams = GetTestInputQuantParams(input0_def); + + auto* quant_input = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(input0, qparams.scale, qparams.zero_point, quant_input); + + auto* op_output = builder.MakeIntermediate(); + builder.AddNode("Transpose", {quant_input}, {op_output}); + + NodeArg* output = builder.MakeOutput(); + builder.AddDequantizeLinearNode(op_output, qparams.scale, qparams.zero_point, output); + }; + + // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs. + // 1st run will generate the Qnn context cache binary file + RunQnnModelTest(builder_func, provider_options, - 11, + 13, ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestSub4D_SmallInputs) { - RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +// Test QDQ Add +TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) { + RunQDQBinaryOpTest("Add", TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + 17, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Sub +TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) { + RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), + TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. -TEST_F(QnnHTPBackendTests, DISABLED_TestSub4D_LargeInputs) { - RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Sub4D_LargeInputs) { + RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. -TEST_F(QnnHTPBackendTests, DISABLED_TestSub4D_Broadcast) { - RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Sub4D_Broadcast) { + RunQDQBinaryOpTest("Sub", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), + 17, ExpectedEPNodeAssignment::All); } -TEST_F(QnnHTPBackendTests, TestDiv4D_SmallInputs) { - RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 8, 8}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) { + RunQDQBinaryOpTest("Div", + TestInputDef({1, 2, 2, 2}, false, {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f}), + TestInputDef({1, 2, 2, 2}, false, {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f}), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. -TEST_F(QnnHTPBackendTests, DISABLED_TestDiv4D_LargeInputs) { - RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_LargeInputs) { + RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + 17, ExpectedEPNodeAssignment::All); } // TODO: Certain large input sizes cause the QNN graph to fail to finalize with error 1002 (QNN_COMMON_ERROR_MEM_ALLOC). // Enable when this is fixed. // Fails accuracy when input0 has dims [1,3,768,768] -TEST_F(QnnHTPBackendTests, DISABLED_TestDiv4D_Broadcast) { - RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), - TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), - 17, ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_Broadcast) { + RunQDQBinaryOpTest("Div", TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), + TestInputDef({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}), + 17, ExpectedEPNodeAssignment::All); +} + +// Test QDQ Mul +TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D) { + RunQDQBinaryOpTest("Mul", TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + 17, ExpectedEPNodeAssignment::All); +} +// Test QDQ And +TEST_F(QnnHTPBackendTests, BinaryOp_And4D) { + RunBinaryOpTest("And", TestInputDef({1, 4}, false, {false, false, true, true}), + TestInputDef({1, 4}, false, {false, true, false, true}), + 17, ExpectedEPNodeAssignment::All); +} + +// Test that Or is not yet supported on HTP backend. +TEST_F(QnnHTPBackendTests, BinaryOp_HTP_Or_Unsupported) { + RunBinaryOpTest("Or", TestInputDef({1, 4}, false, {false, false, true, true}), + TestInputDef({1, 4}, false, {false, true, false, true}), + 17, ExpectedEPNodeAssignment::None); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h index 83eb4f59bd42c..19e244afdded3 100644 --- a/onnxruntime/test/util/include/test_utils.h +++ b/onnxruntime/test/util/include/test_utils.h @@ -40,6 +40,12 @@ struct EPVerificationParams { const std::function* graph_verifier{nullptr}; }; +// Verify equality of two output tensors. +void VerifyOutput(const std::string& output_name, + const Tensor& expected_tensor, + const Tensor& tensor, + float fp32_abs_err); + // Return number of nodes in the Graph and any subgraphs that are assigned to the specified execution provider int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type); diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 1d38aea91066d..bc2d9aec8599b 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -18,6 +18,48 @@ namespace onnxruntime { namespace test { +void VerifyOutput(const std::string& output_name, + const Tensor& expected_tensor, + const Tensor& tensor, + float fp32_abs_err) { + ASSERT_TRUE(SpanEq(expected_tensor.Shape().GetDims(), tensor.Shape().GetDims())); + ASSERT_EQ(expected_tensor.GetElementType(), tensor.GetElementType()); + auto element_type = expected_tensor.GetElementType(); + switch (element_type) { + case ONNX_NAMESPACE::TensorProto_DataType_UINT32: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT32: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_UINT8: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT8: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + EXPECT_TRUE(SpanEq(expected_tensor.DataAsSpan(), tensor.DataAsSpan())) + << " mismatch for " << output_name; + break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { + EXPECT_THAT(expected_tensor.DataAsSpan(), + ::testing::Pointwise(::testing::FloatNear(fp32_abs_err), tensor.DataAsSpan())); + break; + } + default: + ORT_THROW("Unhandled data type. Please add 'case' statement for ", element_type); + } +} + static void VerifyOutputs(const std::vector& output_names, const std::vector& expected_fetches, const std::vector& fetches, @@ -27,41 +69,7 @@ static void VerifyOutputs(const std::vector& output_names, for (size_t i = 0, end = expected_fetches.size(); i < end; ++i) { auto& ltensor = expected_fetches[i].Get(); auto& rtensor = fetches[i].Get(); - ASSERT_TRUE(SpanEq(ltensor.Shape().GetDims(), rtensor.Shape().GetDims())); - auto element_type = ltensor.GetElementType(); - switch (element_type) { - case ONNX_NAMESPACE::TensorProto_DataType_UINT32: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT32: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT64: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_UINT8: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT8: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_BOOL: - EXPECT_TRUE(SpanEq(ltensor.DataAsSpan(), rtensor.DataAsSpan())) - << " mismatch for " << output_names[i]; - break; - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - EXPECT_THAT(ltensor.DataAsSpan(), - ::testing::Pointwise(::testing::FloatNear(params.fp32_abs_err), rtensor.DataAsSpan())); - break; - } - default: - ORT_THROW("Unhandled data type. Please add 'case' statement for ", element_type); - } + VerifyOutput(output_names[i], ltensor, rtensor, params.fp32_abs_err); } }