microsoft · adrianlizarraga · Aug 4, 2023 · Jul 25, 2023 · Jul 25, 2023 · Jul 25, 2023
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@@ -64,10 +64,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
           {"Atan", {}},
           {"Asin", {}},
           {"Sin", {}},
+          {"Cos", {}},
           {"Sign", {}},
           {"Tanh", {}},
           {"Exp", {}},
-          {"LRN", {}}};
+          {"LRN", {}},
+          {"Ceil", {}},
+          {"Abs", {}}};
 }
 static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
   return {{"Add", {}},

diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
@@ -145,34 +145,28 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
   if (it != node_unit_supported_result.cend()) {
     return it->second;
   } else {
-    // quantized required, filter out the non-quantized nodes, filter in the QDQ nodes
-    auto IsQdqNode = [](const NodeUnit& node_unit) {
-      if ("QuantizeLinear" == node_unit.OpType() || "DequantizeLinear" == node_unit.OpType()) {
-        return true;
-      } else {
-        return false;
-      }
-    };
+    const std::string& op_type = node_unit.OpType();
+    const bool is_qdq_node = op_type == "QuantizeLinear" || op_type == "DequantizeLinear";
 
     // Is NPU backend, is single node, case by case
     // Q/DQ nodes -- supported
     // Transpose nodes -- supported
     // Cast nodes -- need to call CastOpBuilder::IsOpSupported
     if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) {
-      if (IsQdqNode(node_unit)) {  // Qnn has Quantize & Dequantize Op
+      if (is_qdq_node) {  // Qnn has Quantize & Dequantize Op
         LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name();
         return true;
       }
 
       // Tranpose only changes the data layout. NPU still supports it.
-      if ("Transpose" == node_unit.OpType()) {
+      if ("Transpose" == op_type) {
         LOGS(logger, VERBOSE) << "Single Transpose node is supported for NPU backend. Node name: " << node_unit.Name();
         return true;
       }
 
-      // For Cast, need to call IsOpSupported (below) to validate input and output types.
+      // For Cast, And, and Or, we need to call IsOpSupported (below) to validate input and output types.
       // For other single non-qdq nodes, immediately return not supported.
-      if (node_unit.OpType() != "Cast") {
+      if (op_type != "Cast" && op_type != "And" && op_type != "Or") {
         LOGS(logger, WARNING) << "Non-QDQ " << node_unit.OpType()
                               << " operators are not supported on HTP or DSP backends. " << node_unit.OpType()
                               << " node `" << node_unit.Name() << " will not be assigned to QNN EP.";
@@ -181,14 +175,14 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
     }
 
     // Non-NPU backend, quantized model not supported, but a QDQ node encountered
-    if (!is_npu_backend && IsQdqNode(node_unit)) {
+    if (!is_npu_backend && is_qdq_node) {
       LOGS(logger, ERROR) << "QDQ models are only supported on HTP or DSP backends. "
                           << node_unit.OpType() << " node `" << node_unit.Name() << "` will not be assigned to QNN EP.";
       return false;
     }
 
     bool supported = false;
-    const auto* op_builder = qnn::GetOpBuilder(node_unit.OpType());
+    const auto* op_builder = qnn::GetOpBuilder(op_type);
     if (op_builder == nullptr) {
       LOGS(logger, WARNING) << "Operators of type `" << node_unit.OpType() << "` are not supported by QNN EP."
                             << node_unit.OpType() << " node `" << node_unit.Name()

diff --git a/onnxruntime/test/optimizer/graph_transform_test_builder.h b/onnxruntime/test/optimizer/graph_transform_test_builder.h
@@ -219,6 +219,15 @@ class ModelTestBuilder {
     return &graph_.GetOrCreateNodeArg(name, nullptr);
   }
 
+  NodeArg* MakeRandInitializerBool(const std::vector<int64_t>& shape) {
+    std::vector<uint8_t> data_uint8 = rand_gen_.Uniform<uint8_t>(shape, 0, 1);
+    std::vector<bool> data;
+    for (uint8_t x : data_uint8) {
+      data.push_back(x != 0);
+    }
+    return MakeInitializerBool(shape, data);
+  }
+
   template <typename T>
   NodeArg* MakeInitializer(const std::vector<int64_t>& shape, T min, T max) {
     return MakeInitializer<T>(shape, rand_gen_.Uniform<T>(shape, min, max));

diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h
@@ -91,102 +91,6 @@ GetQDQTestCaseFn BuildQDQConvTransposeTestCase(const std::vector<int64_t>& input
   };
 }
 
-// Creates the following graph:
-//                                _______________________
-//    input (f32) -> Q -> DQ ->  |                       | -> Q -> DQ -> output (f32)
-// axes (int32, initializer) ->  |         Gather        |
-//                               |_______________________|
-//
-template <typename QuantType, typename IndicesType>
-GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector<int64_t>& input_shape,
-                                          const std::vector<IndicesType> indices,
-                                          const std::vector<int64_t>& indices_shape,
-                                          int64_t axis) {
-  return [input_shape, indices, indices_shape, axis](ModelTestBuilder& builder) {
-    auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
-    auto* final_output = builder.MakeOutput();
-
-    // input_data -> Q/DQ ->
-    auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);
-
-    auto* indices_input = builder.MakeInitializer<IndicesType>(indices_shape, indices);
-
-    auto* gather_output = builder.MakeIntermediate();
-    Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output});
-    gather_node.AddAttribute("axis", axis);
-
-    // -> Q/DQ -> final_output
-    auto* q_output = builder.MakeIntermediate();
-    builder.AddQuantizeLinearNode<QuantType>(gather_output, .003f, 1,
-                                             q_output);
-
-    builder.AddDequantizeLinearNode<QuantType>(q_output, .003f, 1,
-                                               final_output);
-  };
-}
-
-// Creates the following graph:
-//                                _______________________
-//    input (f32) -> Q -> DQ ->  |                       | -> Q -> DQ -> output (f32)
-// axes (int32, initializer) ->  |         Gather        |
-//                               |_______________________|
-//
-template <typename QuantType, typename IndicesType>
-GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t>& input_shape,
-                                                       const IndicesType indices,
-                                                       int64_t axis) {
-  return [input_shape, indices, axis](ModelTestBuilder& builder) {
-    auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
-    auto* final_output = builder.MakeOutput();
-
-    // input_data -> Q/DQ ->
-    auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);
-
-    auto* indices_input = builder.MakeScalarInitializer<IndicesType>(indices);
-
-    auto* gather_output = builder.MakeIntermediate();
-    Node& gather_node = builder.AddNode("Gather", {input_qdq_output, indices_input}, {gather_output});
-    gather_node.AddAttribute("axis", axis);
-
-    // -> Q/DQ -> final_output
-    auto* q_output = builder.MakeIntermediate();
-    builder.AddQuantizeLinearNode<QuantType>(gather_output, .003f, 1,
-                                             q_output);
-
-    builder.AddDequantizeLinearNode<QuantType>(q_output, .003f, 1,
-                                               final_output);
-  };
-}
-
-// Creates the following graph:
-//                                _______________________
-//                               |                       |
-//    input (f32) -> Q -> DQ ->  |       LeakyRelu       | -> Q -> DQ -> output (f32)
-//                               |_______________________|
-//
-template <typename QuantType>
-GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector<int64_t>& input_shape) {
-  return [input_shape](ModelTestBuilder& builder) {
-    auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
-    auto* final_output = builder.MakeOutput();
-
-    // input_data -> Q/DQ ->
-    auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, 0.0473f, 137);
-
-    auto* leakyrelu_output = builder.MakeIntermediate();
-    Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output});
-    leakyrelu_node.AddAttribute("alpha", 0.2f);
-
-    // -> Q/DQ -> final_output
-    auto* q_output = builder.MakeIntermediate();
-    builder.AddQuantizeLinearNode<QuantType>(leakyrelu_output, 0.02696f, 48,
-                                             q_output);
-
-    builder.AddDequantizeLinearNode<QuantType>(q_output, 0.02696f, 48,
-                                               final_output);
-  };
-}
-
 template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
 GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
   return [input_shape, weights_shape](ModelTestBuilder& builder) {

diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
@@ -20,21 +20,29 @@ static GetTestModelFn BuildArgMxxTestCase(const std::string& op_type, TestInputD
                                           const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
   return [op_type, input_def, attrs](ModelTestBuilder& builder) {
     auto* input = MakeTestInput(builder, input_def);
-    auto* output = builder.MakeOutput();
 
-    Node& argm_node = builder.AddNode(op_type, {input}, {output});
+    auto* argm_output = builder.MakeIntermediate();
+    Node& argm_node = builder.AddNode(op_type, {input}, {argm_output});
     for (const auto& attr : attrs) {
       argm_node.AddAttributeProto(attr);
     }
+
+    // Add cast to uint32
+    auto* output = builder.MakeOutput();
+    Node& cast_node = builder.AddNode("Cast", {argm_output}, {output});
+    const auto dst_type = ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32;
+    cast_node.AddAttribute("to", static_cast<int64_t>(dst_type));
   };
 }
 
 // Builds a QDQ model with ArgMin/ArgMax and a Cast to uint32. The quantization parameters are computed from the provided
 // input definition.
 template <typename QType = uint8_t>
-static GetTestModelFn BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef<float> input_def,
-                                             const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
-  return [op_type, input_def, attrs](ModelTestBuilder& builder) {
+static GetTestQDQModelFn<QType> BuildQDQArgMxxTestCase(const std::string& op_type, TestInputDef<float> input_def,
+                                                       const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
+  return [op_type, input_def, attrs](ModelTestBuilder& builder,
+                                     std::vector<QuantParams<QType>>& output_qparams) {
+    ORT_UNUSED_PARAMETER(output_qparams);
     QuantParams<QType> input_qparams = GetTestInputQuantParams(input_def);
 
     auto* input = MakeTestInput(builder, input_def);
@@ -75,8 +83,8 @@ static void RunCPUArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
                   expected_ep_assignment);
 }
 
-// Runs an ArgMax/ArgMin model on the QNN CPU backend. Checks the graph node assignment, and that inference
-// outputs for QNN EP and CPU EP match.
+// Runs a QDQ ArgMax/ArgMin model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference
+// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model).
 template <typename QType = uint8_t>
 static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> input_def,
                                const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
@@ -90,10 +98,12 @@ static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
 
-  RunQnnModelTest(BuildQDQArgMxxTestCase(op_type, input_def, attrs),
-                  provider_options,
-                  opset,
-                  expected_ep_assignment);
+  TestQDQModelAccuracy(BuildArgMxxTestCase(op_type, input_def, attrs),            // baseline float32 model
+                       BuildQDQArgMxxTestCase<QType>(op_type, input_def, attrs),  // QDQ model
+                       provider_options,
+                       opset,
+                       expected_ep_assignment,
+                       1e-5f);
 }
 
 //