Skip to content

Commit

Permalink
DEPTHWISE_CONV: disable type check, add quant and dequant
Browse files Browse the repository at this point in the history
  • Loading branch information
NaijingGuo committed Sep 5, 2024
1 parent 9f59e2a commit 994ce0f
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 22 deletions.
109 changes: 98 additions & 11 deletions tensorflow/lite/micro/kernels/depthwise_conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_log.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"

namespace tflite {
namespace {
Expand Down Expand Up @@ -53,17 +56,101 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {

switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::DepthwiseConv(
DepthwiseConvParamsFloat(params, data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
// setup to get filter scale
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* filter_quant =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(filter_quant->quantization.params);
const float* filter_scales = affine_quantization->scale->data;

// quantize input to int 16
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
const int flat_size =input_shape.FlatSize();
int16_t* quantized_input_data = new int16_t[flat_size];
MicroPrintf("Quant input to int16");
tflite::QuantizationParams op_params;
op_params.zero_point = 0;
op_params.scale = (5.6268444*2)/65536;
tflite::reference_ops::AffineQuantize(
op_params, input_shape, tflite::micro::GetTensorData<float>(input),
input_shape, quantized_input_data
);


// set bias int 64
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
const int bias_flat_size =bias_shape.FlatSize();
int64_t* new_bias = new int64_t[bias_flat_size];
std::fill_n(new_bias, bias_flat_size, 0);

// set output int 16
RuntimeShape new_output_shape = tflite::micro::GetTensorShape(output);
const int new_output_flat_size =new_output_shape.FlatSize();
int16_t* new_output = new int16_t[new_output_flat_size];
MicroPrintf("output size, %d", new_output_flat_size);

const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
MicroPrintf("num_channels, %d", num_channels);

// set and calculate scales and shift
const float input_scale = (5.6268444*2)/65536;
const float output_scale = (11.657923*2)/65536;

int32_t* per_channel_output_multiplier = new int32_t[512];
std::fill_n(per_channel_output_multiplier, 512, 0);
int32_t* per_channel_output_shift = new int32_t[512];
std::fill_n(per_channel_output_shift, 512, 0);

for (int i = 0; i < num_channels; ++i) {
const double effective_output_scale = static_cast<double>(input_scale) *
static_cast<double>(filter_scales[i]) /
static_cast<double>(output_scale);
int32_t significand;
int channel_shift;
tflite::QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
per_channel_output_multiplier[i] = significand;
per_channel_output_shift[i] = channel_shift;
}

micro_context->DeallocateTempTfLiteTensor(filter_quant);

reference_integer_ops::DepthwiseConvPerChannel(
DepthwiseConvParamsQuantized(params, data),
per_channel_output_multiplier, per_channel_output_shift,
tflite::micro::GetTensorShape(input),
quantized_input_data,
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
new_bias,
tflite::micro::GetTensorShape(output),
new_output);

RuntimeShape output_shape = tflite::micro::GetTensorShape(output);

tflite::DequantizationParams dequantization_params;
dequantization_params.scale = (11.657923*2)/65536;
dequantization_params.zero_point = 0;
MicroPrintf("dequant params set");

tflite::reference_ops::Dequantize(dequantization_params,
output_shape,
new_output,
output_shape,
tflite::micro::GetTensorData<float>(output));

// tflite::reference_ops::DepthwiseConv(
// DepthwiseConvParamsFloat(params, data),
// tflite::micro::GetTensorShape(input),
// tflite::micro::GetTensorData<float>(input),
// tflite::micro::GetTensorShape(filter),
// tflite::micro::GetTensorData<float>(filter),
// tflite::micro::GetTensorShape(bias),
// tflite::micro::GetOptionalTensorData<float>(bias),
// tflite::micro::GetTensorShape(output),
// tflite::micro::GetTensorData<float>(output));
break;
}
case kTfLiteInt8: {
switch (filter->type) {
Expand Down
22 changes: 12 additions & 10 deletions tensorflow/lite/micro/kernels/depthwise_conv_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ DepthwiseParams DepthwiseConvParamsQuantized(
op_params.dilation_height_factor = params.dilation_height_factor;
op_params.dilation_width_factor = params.dilation_width_factor;
op_params.depth_multiplier = params.depth_multiplier;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
// op_params.quantized_activation_min = data.output_activation_min;
// op_params.quantized_activation_max = data.output_activation_max;
op_params.quantized_activation_min = -32768;
op_params.quantized_activation_max = 32767;
return op_params;
}

Expand Down Expand Up @@ -112,7 +114,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];

TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
Expand Down Expand Up @@ -188,13 +190,13 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
affine_quantization->zero_point->size);
}

TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt8 &&
(filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)) ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
// TF_LITE_ENSURE_MSG(
// context,
// input->type == filter->type ||
// (input->type == kTfLiteInt8 &&
// (filter->type == kTfLiteInt4 || filter->type == kTfLiteInt8)) ||
// (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
// "Hybrid models are not supported on TFLite Micro.");

if (filter->type == kTfLiteInt4) {
int filter_size =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ TfLiteStatus DepthwiseConvPrepareHifi(TfLiteContext* context,
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
// TF_LITE_ENSURE(context, filter != nullptr);

const RuntimeShape& input_shape = GetTensorShape(input);
const RuntimeShape& filter_shape = GetTensorShape(filter);
Expand Down

0 comments on commit 994ce0f

Please sign in to comment.