From c60e943c6cb5588f243a2d74ab25e17a7a43c610 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Fri, 27 Sep 2024 09:51:47 -0700 Subject: [PATCH] Update code regarding some QNN bug fixes (#22222) ### Description Update code regarding some QNN bug fixes: 1. QnnProfile_ExtendedEventData_t.version is not initialized in Qnn 2. Failed to finalize the graph for HardSigmoid with FP16 precision --- .../builder/opbuilder/simple_op_builder.cc | 158 ------------------ .../qnn/builder/qnn_backend_manager.cc | 21 ++- .../test/providers/qnn/qnn_basic_test.cc | 26 ++- .../test/providers/qnn/simple_op_htp_test.cc | 8 - 4 files changed, 32 insertions(+), 181 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index 285781aaa3559..0358fae3c2115 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -325,159 +325,6 @@ Status ProcessGridSampleAttributes(QnnModelWrapper& qnn_model_wrapper, return Status::OK(); } -static Status GetFloatBytes(float f32_val, Qnn_DataType_t qnn_data_type, std::vector& bytes) { - switch (qnn_data_type) { - case QNN_DATATYPE_FLOAT_32: { - bytes.resize(sizeof(float)); - std::memcpy(bytes.data(), &f32_val, bytes.size()); - break; - } - case QNN_DATATYPE_FLOAT_16: { - bytes.resize(sizeof(MLFloat16)); - const MLFloat16 f16_val(f32_val); - std::memcpy(bytes.data(), &f16_val, bytes.size()); - break; - } - default: - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Qnn Data Type: ", qnn_data_type, " is not supported"); - } - - return Status::OK(); -} - -static Status DecomposeHardSigmoid(QnnModelWrapper& qnn_model_wrapper, - const NodeUnit& node_unit, - std::vector&& input_names, - const logging::Logger& logger, - bool do_op_validation) { - ORT_UNUSED_PARAMETER(logger); - const auto& onnx_node_name = utils::GetNodeName(node_unit); - const auto& input = node_unit.Inputs()[0]; - const auto& output = node_unit.Outputs()[0]; - - std::vector input_shape; - ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input.node_arg, input_shape), "Cannot get shape of input 0"); - - Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32; - ORT_RETURN_IF_ERROR(utils::GetQnnDataType(false /*is_quantized*/, input.node_arg.TypeAsProto(), qnn_data_type)); - - NodeAttrHelper node_helper(node_unit); - - // - // Create Mul node. - // - const OnnxAttrInfo onnx_alpha_attr{"alpha", 0.2f}; - const OnnxAttrInfo onnx_beta_attr{"beta", 0.5}; - std::string alpha_input_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_Mul_alpha"); - std::vector alpha_bytes; - ORT_RETURN_IF_ERROR(GetFloatBytes(GetOnnxAttr(node_helper, onnx_alpha_attr), qnn_data_type, alpha_bytes)); - - QnnTensorWrapper alpha_input(alpha_input_name, - QNN_TENSOR_TYPE_STATIC, - qnn_data_type, - QnnQuantParamsWrapper(), - {1}, // shape - std::move(alpha_bytes)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(alpha_input)), "Failed to add alpha input tensor."); - - std::string mul_output_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_Mul_output"); - std::string mul_node_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_Mul_node"); - QnnTensorWrapper mul_output(mul_output_name, - QNN_TENSOR_TYPE_NATIVE, - qnn_data_type, - QnnQuantParamsWrapper(), - std::vector(input_shape)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(mul_output)), "Failed to add Mul output tensor."); - ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(mul_node_name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - QNN_OP_ELEMENT_WISE_MULTIPLY, - {input_names[0], alpha_input_name}, // input names - {mul_output_name}, // output names - {}, - do_op_validation), - "Failed to add Mul node."); - - // - // Create Add node. - // - - std::string beta_input_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_Mul_beta"); - std::vector beta_bytes; - ORT_RETURN_IF_ERROR(GetFloatBytes(GetOnnxAttr(node_helper, onnx_beta_attr), qnn_data_type, beta_bytes)); - - QnnTensorWrapper beta_input(beta_input_name, - QNN_TENSOR_TYPE_STATIC, - qnn_data_type, - QnnQuantParamsWrapper(), - {1}, // shape - std::move(beta_bytes)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(beta_input)), "Failed to add beta input tensor."); - - std::string add_output_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_Add_output"); - std::string add_node_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_Add_node"); - QnnTensorWrapper add_output(add_output_name, - QNN_TENSOR_TYPE_NATIVE, - qnn_data_type, - QnnQuantParamsWrapper(), - std::vector(input_shape)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(add_output)), "Failed to add Add output tensor."); - ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(add_node_name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - QNN_OP_ELEMENT_WISE_ADD, - {mul_output_name, beta_input_name}, // input names - {add_output_name}, // output names - {}, - do_op_validation), - "Failed to add Add node."); - - // - // Create ReluMinMax node. - // - - std::vector param_tensor_names; - - // Parameter 'min_value' - { - Qnn_Scalar_t min_value = QNN_SCALAR_INIT; - min_value.dataType = QNN_DATATYPE_FLOAT_32; - min_value.floatValue = 0.0f; - - QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE, min_value); - param_tensor_names.push_back(qnn_param.GetParamTensorName()); - qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); - } - - // Parameter 'max_value' - { - Qnn_Scalar_t max_value = QNN_SCALAR_INIT; - max_value.dataType = QNN_DATATYPE_FLOAT_32; - max_value.floatValue = 1.0f; - - QnnParamWrapper qnn_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE, max_value); - param_tensor_names.push_back(qnn_param.GetParamTensorName()); - qnn_model_wrapper.AddParamWrapper(std::move(qnn_param)); - } - - const std::string& output_name = output.node_arg.Name(); - std::string relu_min_max_node_name = MakeString("ort_qnn_ep_", onnx_node_name, "_HardSigmoid_ReluMinMax_node"); - QnnTensorWrapper output_tensor(output_name, - qnn_model_wrapper.GetTensorType(output_name), - qnn_data_type, - QnnQuantParamsWrapper(), - std::vector(input_shape)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensor)), "Failed to add output tensor."); - ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(relu_min_max_node_name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - QNN_OP_RELU_MIN_MAX, - {add_output_name}, // input names - {output_name}, // output names - std::move(param_tensor_names), - do_op_validation), - "Failed to add ReluMinMax node."); - - return Status::OK(); -} - Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -546,13 +393,8 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w } if (op_type == "HardSigmoid") { - // direct conversion to ElementWiseNeuron has issue to finalize the graph for FP16 data type - // still decompose it to Mul, Add, ReluMinMax int32_t onnx_data_type = 0; ORT_RETURN_IF_ERROR(utils::GetOnnxTensorElemDataType(node_unit.Inputs()[0].node_arg, onnx_data_type)); - if (onnx_data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { - return DecomposeHardSigmoid(qnn_model_wrapper, node_unit, std::move(input_names), logger, do_op_validation); - } ORT_RETURN_IF_ERROR(ProcessNodeAttribute(qnn_model_wrapper, node_unit, "alpha", QNN_OP_ELEMENT_WISE_NEURON_PARAM_ALPHA, diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index db5c2c5cb32ba..eaffe1e2ac224 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -1179,17 +1179,16 @@ Status QnnBackendManager::ExtractProfilingEventExtended( #endif if (!tracelogging_provider_ep_enabled) { - // QNN issue, the version number not correct, ticket created - // if (event_data_extended.version == QNN_PROFILE_DATA_VERSION_1) { - outfile << event_data_extended.v1.timestamp << "," - << message << "," - << ExtractQnnScalarValue(event_data_extended.v1.value) << "," - << unit << "," - << "BACKEND" - << "," - << eventLevel << "," - << (event_data_extended.v1.identifier ? event_data_extended.v1.identifier : "NULL") << "\n"; - //} + if (event_data_extended.version == QNN_PROFILE_DATA_VERSION_1) { + outfile << event_data_extended.v1.timestamp << "," + << message << "," + << ExtractQnnScalarValue(event_data_extended.v1.value) << "," + << unit << "," + << "BACKEND" + << "," + << eventLevel << "," + << (event_data_extended.v1.identifier ? event_data_extended.v1.identifier : "NULL") << "\n"; + } } else { #ifdef _WIN32 LogQnnProfileEventAsTraceLogging( diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index c4367aeb52edc..236b66a2d8a78 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -912,10 +912,28 @@ static GetTestModelFn BuildCastAddTestCase() { }; } -// A repro of QC case 06838696, accuracy issue for Cast + Op (quantized) -// the value pair(1, 0.00392156886) at index #1 don't match, -// which is -0.996078 from 1 -TEST_F(QnnHTPBackendTests, DISABLED_CastAddHTPAccuracyTest) { +TEST_F(QnnHTPBackendTests, ProfilingTest) { + onnxruntime::ProviderOptions provider_options; + +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + provider_options["enable_htp_fp16_precision"] = "1"; + provider_options["profiling_level"] = "detailed"; + provider_options["profiling_file_path"] = "detailed_profile.csv"; + + auto input_defs = {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f)}; + RunQnnModelTest(BuildOpTestCase("Add", input_defs, {}, {}, kOnnxDomain), + provider_options, + 13, + ExpectedEPNodeAssignment::All, + 0.008f); +} + +TEST_F(QnnHTPBackendTests, CastAddHTPAccuracyTest) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index 8de414dbb4a62..018720fd8b71f 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -1333,14 +1333,6 @@ TEST_F(QnnHTPBackendTests, UnaryOp_HardSigmoid_F32_as_FP16) { } // Check that QNN EP can support float16 HardSigmoid on HTP -// It is using decompose way for FP16 since ElementWiseNeuron failed to finalize the graph with the error below: -// \HTP\src\hexagon\prepare\tcm_migration.cc:1829:ERROR:no properties registered for q::QNN_HardSigmoid -// \HTP\HTP\src\hexagon\prepare\graph_prepare.cc:203:ERROR:could not create op: q::QNN_HardSigmoid -// \HTP\HTP\src\hexagon\prepare\graph_prepare.cc:1238:ERROR:Op 0x101000000010 preparation failed with err:-1 -// Completed stage: Graph Transformations and Optimizations (16361 us) -// QnnDsp "node" generated: could not create op -// QnnDsp RouterWindows graph prepare failed 12 -// QnnDsp Failed to finalize graph (id: 1) with err 1002 TEST_F(QnnHTPBackendTests, UnaryOp_HardSigmoid_FP16) { std::vector input_data = GetFloatDataInRange(-5.0f, 5.0f, 16);