From 994ce0ff62deba40f485bcfdc9ad12e11bb59935 Mon Sep 17 00:00:00 2001 From: NaijingGuo Date: Thu, 5 Sep 2024 16:51:28 -0700 Subject: [PATCH] DEPTHWISE_CONV: disable type check, add quant and dequant --- .../lite/micro/kernels/depthwise_conv.cc | 109 ++++++++++++++++-- .../micro/kernels/depthwise_conv_common.cc | 22 ++-- .../kernels/xtensa/depthwise_conv_hifi.cc | 2 +- 3 files changed, 111 insertions(+), 22 deletions(-) diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index 398f8cd0800..a1a51a162bf 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -23,6 +23,9 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_log.h" +#include "tensorflow/lite/kernels/internal/reference/quantize.h" +#include "tensorflow/lite/kernels/internal/reference/dequantize.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" namespace tflite { namespace { @@ -53,17 +56,101 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: { - tflite::reference_ops::DepthwiseConv( - DepthwiseConvParamsFloat(params, data), - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; + // setup to get filter scale + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* filter_quant = + micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor); + const auto* affine_quantization = + reinterpret_cast(filter_quant->quantization.params); + const float* filter_scales = affine_quantization->scale->data; + + // quantize input to int 16 + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + const int flat_size =input_shape.FlatSize(); + int16_t* quantized_input_data = new int16_t[flat_size]; + MicroPrintf("Quant input to int16"); + tflite::QuantizationParams op_params; + op_params.zero_point = 0; + op_params.scale = (5.6268444*2)/65536; + tflite::reference_ops::AffineQuantize( + op_params, input_shape, tflite::micro::GetTensorData(input), + input_shape, quantized_input_data + ); + + + // set bias int 64 + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); + const int bias_flat_size =bias_shape.FlatSize(); + int64_t* new_bias = new int64_t[bias_flat_size]; + std::fill_n(new_bias, bias_flat_size, 0); + + // set output int 16 + RuntimeShape new_output_shape = tflite::micro::GetTensorShape(output); + const int new_output_flat_size =new_output_shape.FlatSize(); + int16_t* new_output = new int16_t[new_output_flat_size]; + MicroPrintf("output size, %d", new_output_flat_size); + + const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + MicroPrintf("num_channels, %d", num_channels); + + // set and calculate scales and shift + const float input_scale = (5.6268444*2)/65536; + const float output_scale = (11.657923*2)/65536; + + int32_t* per_channel_output_multiplier = new int32_t[512]; + std::fill_n(per_channel_output_multiplier, 512, 0); + int32_t* per_channel_output_shift = new int32_t[512]; + std::fill_n(per_channel_output_shift, 512, 0); + + for (int i = 0; i < num_channels; ++i) { + const double effective_output_scale = static_cast(input_scale) * + static_cast(filter_scales[i]) / + static_cast(output_scale); + int32_t significand; + int channel_shift; + tflite::QuantizeMultiplier(effective_output_scale, &significand, &channel_shift); + per_channel_output_multiplier[i] = significand; + per_channel_output_shift[i] = channel_shift; + } + + micro_context->DeallocateTempTfLiteTensor(filter_quant); + + reference_integer_ops::DepthwiseConvPerChannel( + DepthwiseConvParamsQuantized(params, data), + per_channel_output_multiplier, per_channel_output_shift, + tflite::micro::GetTensorShape(input), + quantized_input_data, + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + new_bias, + tflite::micro::GetTensorShape(output), + new_output); + + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + + tflite::DequantizationParams dequantization_params; + dequantization_params.scale = (11.657923*2)/65536; + dequantization_params.zero_point = 0; + MicroPrintf("dequant params set"); + + tflite::reference_ops::Dequantize(dequantization_params, + output_shape, + new_output, + output_shape, + tflite::micro::GetTensorData(output)); + + // tflite::reference_ops::DepthwiseConv( + // DepthwiseConvParamsFloat(params, data), + // tflite::micro::GetTensorShape(input), + // tflite::micro::GetTensorData(input), + // tflite::micro::GetTensorShape(filter), + // tflite::micro::GetTensorData(filter), + // tflite::micro::GetTensorShape(bias), + // tflite::micro::GetOptionalTensorData(bias), + // tflite::micro::GetTensorShape(output), + // tflite::micro::GetTensorData(output)); + break; } case kTfLiteInt8: { switch (filter->type) { diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_common.cc b/tensorflow/lite/micro/kernels/depthwise_conv_common.cc index 52804de3315..18b20b1bb5e 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv_common.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv_common.cc @@ -72,8 +72,10 @@ DepthwiseParams DepthwiseConvParamsQuantized( op_params.dilation_height_factor = params.dilation_height_factor; op_params.dilation_width_factor = params.dilation_width_factor; op_params.depth_multiplier = params.depth_multiplier; - op_params.quantized_activation_min = data.output_activation_min; - op_params.quantized_activation_max = data.output_activation_max; +// op_params.quantized_activation_min = data.output_activation_min; +// op_params.quantized_activation_max = data.output_activation_max; + op_params.quantized_activation_min = -32768; + op_params.quantized_activation_max = 32767; return op_params; } @@ -112,7 +114,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv( // parameters set. This is usually done during quantized training. if (data_type != kTfLiteFloat32) { int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( context, input, filter, bias, output, params.activation, &data->output_multiplier, &data->output_shift, @@ -188,13 +190,13 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) { affine_quantization->zero_point->size); } - TF_LITE_ENSURE_MSG( - context, - input->type == filter->type || - (input->type == kTfLiteInt8 && - (filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)) || - (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), - "Hybrid models are not supported on TFLite Micro."); +// TF_LITE_ENSURE_MSG( +// context, +// input->type == filter->type || +// (input->type == kTfLiteInt8 && +// (filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)) || +// (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), +// "Hybrid models are not supported on TFLite Micro."); if (filter->type == kTfLiteInt4) { int filter_size = diff --git a/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc index 03a73caccf7..b66383b4f4f 100644 --- a/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc +++ b/tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc @@ -48,7 +48,7 @@ TfLiteStatus DepthwiseConvPrepareHifi(TfLiteContext* context, TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* filter = micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); - TF_LITE_ENSURE(context, filter != nullptr); +// TF_LITE_ENSURE(context, filter != nullptr); const RuntimeShape& input_shape = GetTensorShape(input); const RuntimeShape& filter_shape = GetTensorShape(filter);