init change

ROCm · Feb 2, 2022 · 7196cdf · 7196cdf
1 parent 84c0077
commit 7196cdf
Show file tree

Hide file tree

Showing 4 changed files with 314 additions and 105 deletions.
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -158,6 +158,201 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
   return true;
 }
 
+bool IsQuantizationScaleSupported(const InitializedTensorSet& initializers,
+                                  const NodeUnitIODef& io_def,
+                                  const OpSupportCheckParams& params,
+                                  const std::string& op_type,
+                                  bool is_quant_matmul,
+                                  bool is_conv_matmul_weight,
+                                  bool is_conv_matmul_u8s8_weight) {
+  const auto scale_name = io_def.quant_param->scale.Name();
+  if (!Contains(initializers, scale_name)) {
+    LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be an initializer tensor";
+    return false;
+  }
+
+  const auto& scale_tensor = *initializers.at(scale_name);
+  int64_t scales_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
+  if (!is_conv_matmul_u8s8_weight) {
+    if (scales_dim != 1) {
+      LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
+                            << " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
+      return false;
+    }
+  } else if (scales_dim != 1) {
+    // For u8s8 Qlinear[Conv/MatMul], we support
+    // 1. Per-tensor, the weight will be transformed to uint8 later
+    // 2. Per-channel, only from Android API level 29
+    if (is_quant_matmul) {
+      LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
+      return false;
+    }
+
+    if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
+      LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
+                            << "system NNAPI feature level: " << params.android_feature_level;
+      return false;
+    }
+
+    Shape weight_shape;
+    if (!GetShape(io_def.node_arg, weight_shape))
+      return false;
+
+    if (weight_shape[0] != scales_dim) {
+      LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
+                            << " weight dimension[0] " << weight_shape[0]
+                            << " scale dimension " << scales_dim;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool IsQuantizationZeroPointSupported(const InitializedTensorSet& initializers,
+                                      const NodeUnitIODef& io_def,
+                                      const OpSupportCheckParams& params,
+                                      const std::string& op_type,
+                                      const Path& model_path,
+                                      bool is_quant_matmul,
+                                      bool is_conv_matmul_weight,
+                                      bool is_conv_matmul_u8s8_weight) {
+  // zero point is optional here
+  if (!io_def.quant_param->zero_point)
+    return true;
+
+  const auto& zero_point_name = io_def.quant_param->zero_point->Name();
+  if (!Contains(initializers, zero_point_name)) {
+    LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor";
+    return false;
+  }
+
+  const auto& zero_tensor = *initializers.at(zero_point_name);
+  int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0];
+
+  if (!is_conv_matmul_u8s8_weight) {
+    if (zero_dim != 1) {
+      LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
+                            << " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
+      return false;
+    }
+  } else {
+    // For u8s8 Qlinear[Conv/MatMul], we support
+    // 1. Per-tensor, the weight will be transformed to uint8 later
+    // 2. Per-channel, only from Android API level 29
+    if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) {
+      LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only supports int8 zero point for weight, "
+                            << "actual zero point type: [" << zero_tensor.data_type() << "]";
+      return false;
+    }
+
+    if (zero_dim != 1) {
+      if (is_quant_matmul) {
+        LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
+        return false;
+      }
+    }
+
+    // For onnx, u8s8 QlinearConv, the weight zero point can be a scalar,
+    // or a tensor with same channel as weight, for NNAPI we only support it be
+    // 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
+    // quantization is 0 there is no input for it
+    Shape weight_shape;
+    if (!GetShape(io_def.node_arg, weight_shape))
+      return false;
+
+    if (weight_shape[0] != zero_dim && zero_dim != 1) {
+      LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
+                            << " weight dimension[0] " << weight_shape[0]
+                            << " zero point dimension " << zero_dim;
+      return false;
+    }
+
+    std::vector<uint8_t> unpacked_tensor;
+    auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, model_path, unpacked_tensor);
+    if (!status.IsOK()) {
+      LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
+                          << ", error msg: " << status.ErrorMessage();
+      return false;
+    }
+
+    // Verify all onnx weight zero point(s) are 0(s)
+    const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.data());
+    for (size_t i = 0; i < unpacked_tensor.size(); i++) {
+      if (zero_points[i] != 0) {
+        LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul]  only support 0 as zero point, "
+                              << "zero_points[" << i << "] has value: " << zero_points[i];
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool IsQuantizedIOSupported(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
+                            const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input) {
+  const auto& op_type = node_unit.OpType();
+  auto quant_op_type = GetQuantizedOpType(node_unit);
+
+  ORT_ENFORCE(quant_op_type != QuantizedOpType::QLinearMatMul, "[", op_type, "] is not a quantized op");
+
+  bool is_quant_conv = IsQuantizedConv(quant_op_type);
+  bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
+  const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
+
+  for (const auto idx : indices) {
+    if (idx >= io_defs.size()) {
+      LOGS_DEFAULT(VERBOSE) << (is_input ? "Input" : "Output") << " index,  " << idx
+                            << " >= size, " << io_defs.size()
+                            << " of NodeUnit: " << node_unit.Name();
+      return false;
+    }
+
+    const auto& io_def = io_defs[idx];
+    ORT_ENFORCE(io_def.quant_param.has_value(), "Input index,  ", idx, " has no quant_param");
+
+    // If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul)
+    bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
+    bool is_conv_matmul_u8s8_weight = false;
+
+    if (is_conv_matmul_weight) {
+      int32_t weight_type;
+      if (!GetType(io_def.node_arg, weight_type))
+        return false;
+      is_conv_matmul_u8s8_weight = weight_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
+    }
+
+    int32_t input_type;
+    if (!GetType(io_def.node_arg, input_type))
+      return false;
+
+    // We only support s8 for most of the inputs and all outputs, with the exception for Quantized MatMul and Conv,
+    // which allows s8 weight (u8s8)
+    // TODO, add support of s8s8
+    if (input_type != ONNX_NAMESPACE::TensorProto_DataType_INT8 && !is_conv_matmul_u8s8_weight) {
+      LOGS_DEFAULT(VERBOSE) << op_type << "NodeUnit [" << node_unit.Name()
+                            << "], type [" << op_type << "]'s "
+                            << (is_input ? "Input" : "Output") << " index  [" << idx
+                            << "] has unspoorted type [" << input_type << "]";
+      return false;
+    }
+
+    // Check scale and zero point
+    if (!IsQuantizationScaleSupported(initializers, io_def, params, op_type,
+                                      is_quant_matmul, is_conv_matmul_weight, is_conv_matmul_u8s8_weight)) {
+      return false;
+    }
+
+    if (!IsQuantizationZeroPointSupported(initializers, io_def, params, op_type, node_unit.ModelPath(),
+                                          is_quant_matmul, is_conv_matmul_weight, is_conv_matmul_u8s8_weight)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                 const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input) {
   const auto& op_type = node_unit.OpType();
@@ -192,8 +387,10 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
     bool is_conv_matmul_u8s8_weight = false;
 
     if (is_conv_matmul_weight) {
-      const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
-      is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
+      int32_t weight_type;
+      if (!GetType(io_def.node_arg, weight_type))
+        return false;
+      is_conv_matmul_u8s8_weight = weight_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
     }
 
     const auto& scale_tensor = *initializers.at(scale_name);
@@ -219,10 +416,13 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
         return false;
       }
 
-      const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
-      if (weight_tensor.dims()[0] != scales_dim) {
+      Shape weight_shape;
+      if (!GetShape(io_def.node_arg, weight_shape))
+        return false;
+
+      if (weight_shape[0] != scales_dim) {
         LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
-                              << " weight dimension[0] " << weight_tensor.dims()[0]
+                              << " weight dimension[0] " << weight_shape[0]
                               << " scale dimension " << scales_dim;
         return false;
       }
@@ -269,8 +469,10 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
     bool is_conv_matmul_u8s8_weight = false;
 
     if (is_conv_matmul_weight) {
-      const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
-      is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
+      int32_t weight_type;
+      if (!GetType(io_def.node_arg, weight_type))
+        return false;
+      is_conv_matmul_u8s8_weight = weight_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
     }
 
     const auto& zero_tensor = *initializers.at(zero_point_name);
@@ -303,10 +505,13 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
       // or a tensor with same channel as weight, for NNAPI we only support it be
       // 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
       // quantization is 0 there is no input for it
-      const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
-      if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) {
+      Shape weight_shape;
+      if (!GetShape(io_def.node_arg, weight_shape))
+        return false;
+
+      if (weight_shape[0] != zero_dim && zero_dim != 1) {
         LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
-                              << " weight dimension[0] " << weight_tensor.dims()[0]
+                              << " weight dimension[0] " << weight_shape[0]
                               << " zero point dimension " << zero_dim;
         return false;
       }

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -114,6 +114,10 @@ bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit);
 // Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add]
 bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit);
 
+// Check if the given quantized input(s) or output(s) is supported
+bool IsQuantizedIOSupported(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
+                            const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input);
+
 // Check if a qlinear op has valid scales for given indices
 bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
                                 const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input);