From 994ce0ff62deba40f485bcfdc9ad12e11bb59935 Mon Sep 17 00:00:00 2001
From: NaijingGuo <naijing0529@gmail.com>
Date: Thu, 5 Sep 2024 16:51:28 -0700
Subject: [PATCH] DEPTHWISE_CONV: disable type check, add quant and dequant

---
 .../lite/micro/kernels/depthwise_conv.cc      | 109 ++++++++++++++++--
 .../micro/kernels/depthwise_conv_common.cc    |  22 ++--
 .../kernels/xtensa/depthwise_conv_hifi.cc     |   2 +-
 3 files changed, 111 insertions(+), 22 deletions(-)
diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc
index 398f8cd0800..a1a51a162bf 100644
--- a/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -23,6 +23,9 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
 
 namespace tflite {
 namespace {
@@ -53,17 +56,101 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
-      tflite::reference_ops::DepthwiseConv(
-          DepthwiseConvParamsFloat(params, data),
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<float>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<float>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetOptionalTensorData<float>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<float>(output));
-      break;
+      // setup to get filter scale
+	  MicroContext* micro_context = GetMicroContext(context);
+      TfLiteTensor* filter_quant =
+        micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
+      const auto* affine_quantization =
+        reinterpret_cast<TfLiteAffineQuantization*>(filter_quant->quantization.params);
+      const float* filter_scales = affine_quantization->scale->data;
+
+      // quantize input to int 16
+      RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+      const int flat_size =input_shape.FlatSize();
+      int16_t* quantized_input_data = new int16_t[flat_size];
+	  MicroPrintf("Quant input to int16");
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = 0;
+      op_params.scale = (5.6268444*2)/65536;
+      tflite::reference_ops::AffineQuantize(
+        op_params, input_shape, tflite::micro::GetTensorData<float>(input),
+        input_shape, quantized_input_data
+      );
+	
+
+      // set bias int 64
+      RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
+      const int bias_flat_size =bias_shape.FlatSize();
+      int64_t* new_bias = new int64_t[bias_flat_size];
+      std::fill_n(new_bias, bias_flat_size, 0);
+
+      // set output int 16
+      RuntimeShape new_output_shape = tflite::micro::GetTensorShape(output);
+      const int new_output_flat_size =new_output_shape.FlatSize();
+	  int16_t* new_output = new int16_t[new_output_flat_size];
+	  MicroPrintf("output size, %d", new_output_flat_size);
+
+      const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
+	    MicroPrintf("num_channels, %d", num_channels);
+
+      // set and calculate scales and shift
+	  const float input_scale = (5.6268444*2)/65536;
+      const float output_scale = (11.657923*2)/65536;
+
+      int32_t* per_channel_output_multiplier = new int32_t[512];
+      std::fill_n(per_channel_output_multiplier, 512, 0);
+	    int32_t* per_channel_output_shift = new int32_t[512];
+      std::fill_n(per_channel_output_shift, 512, 0);
+	  
+      for (int i = 0; i < num_channels; ++i) {
+        const double effective_output_scale = static_cast<double>(input_scale) *
+                                              static_cast<double>(filter_scales[i]) /
+                                              static_cast<double>(output_scale);
+        int32_t significand;
+        int channel_shift;
+        tflite::QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
+        per_channel_output_multiplier[i] = significand;
+        per_channel_output_shift[i] = channel_shift;
+      }
+
+      micro_context->DeallocateTempTfLiteTensor(filter_quant);
+    
+      reference_integer_ops::DepthwiseConvPerChannel(
+              DepthwiseConvParamsQuantized(params, data),
+              per_channel_output_multiplier, per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              quantized_input_data,
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              new_bias,
+              tflite::micro::GetTensorShape(output),
+              new_output);
+
+      RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+
+      tflite::DequantizationParams dequantization_params;
+      dequantization_params.scale = (11.657923*2)/65536;
+      dequantization_params.zero_point = 0;
+      MicroPrintf("dequant params set");
+
+      tflite::reference_ops::Dequantize(dequantization_params,
+                                output_shape,
+                                new_output,
+                                output_shape,
+                                tflite::micro::GetTensorData<float>(output));
+
+    //   tflite::reference_ops::DepthwiseConv(
+    //       DepthwiseConvParamsFloat(params, data),
+    //       tflite::micro::GetTensorShape(input),
+    //       tflite::micro::GetTensorData<float>(input),
+    //       tflite::micro::GetTensorShape(filter),
+    //       tflite::micro::GetTensorData<float>(filter),
+    //       tflite::micro::GetTensorShape(bias),
+    //       tflite::micro::GetOptionalTensorData<float>(bias),
+    //       tflite::micro::GetTensorShape(output),
+    //       tflite::micro::GetTensorData<float>(output));
+    break;
     }
     case kTfLiteInt8: {
       switch (filter->type) {
diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_common.cc b/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
index 52804de3315..18b20b1bb5e 100644
--- a/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
+++ b/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
@@ -72,8 +72,10 @@ DepthwiseParams DepthwiseConvParamsQuantized(
   op_params.dilation_height_factor = params.dilation_height_factor;
   op_params.dilation_width_factor = params.dilation_width_factor;
   op_params.depth_multiplier = params.depth_multiplier;
-  op_params.quantized_activation_min = data.output_activation_min;
-  op_params.quantized_activation_max = data.output_activation_max;
+//   op_params.quantized_activation_min = data.output_activation_min;
+//   op_params.quantized_activation_max = data.output_activation_max;
+  op_params.quantized_activation_min = -32768;
+  op_params.quantized_activation_max = 32767;
   return op_params;
 }
 
@@ -112,7 +114,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
   // parameters set. This is usually done during quantized training.
   if (data_type != kTfLiteFloat32) {
     int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
-
+	
     TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
         context, input, filter, bias, output, params.activation,
         &data->output_multiplier, &data->output_shift,
@@ -188,13 +190,13 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
                       affine_quantization->zero_point->size);
   }
 
-  TF_LITE_ENSURE_MSG(
-      context,
-      input->type == filter->type ||
-          (input->type == kTfLiteInt8 &&
-           (filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)) ||
-          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
-      "Hybrid models are not supported on TFLite Micro.");
+//   TF_LITE_ENSURE_MSG(
+//       context,
+//       input->type == filter->type ||
+//           (input->type == kTfLiteInt8 &&
+//            (filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)) ||
+//           (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
+//       "Hybrid models are not supported on TFLite Micro.");
 
   if (filter->type == kTfLiteInt4) {
     int filter_size =
diff --git a/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc
index 03a73caccf7..b66383b4f4f 100644
--- a/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc
@@ -48,7 +48,7 @@ TfLiteStatus DepthwiseConvPrepareHifi(TfLiteContext* context,
   TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* filter =
       micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
-  TF_LITE_ENSURE(context, filter != nullptr);
+//   TF_LITE_ENSURE(context, filter != nullptr);
 
   const RuntimeShape& input_shape = GetTensorShape(input);
   const RuntimeShape& filter_shape = GetTensorShape(filter);