From 9a32964be99131002a83e494299fc44a3d39fdff Mon Sep 17 00:00:00 2001 From: Ryan Kuester Date: Mon, 16 Dec 2024 09:48:04 -0700 Subject: [PATCH 1/2] feat(compression): allocate resource variables in persistent buffer (#3013) Allocate resource variables in a persistent buffer when the input tensor is compressed. Extend tests to validate operation. BUG=part of #2636 --- .../lite/micro/kernels/assign_variable.cc | 51 ++++++++++++++++++- .../lite/micro/micro_resource_variable.cc | 11 ++-- .../lite/micro/micro_resource_variable.h | 6 +-- .../micro/micro_resource_variable_test.cc | 7 ++- 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/micro/kernels/assign_variable.cc b/tensorflow/lite/micro/kernels/assign_variable.cc index bd99bd1aa0c..9374279e9af 100644 --- a/tensorflow/lite/micro/kernels/assign_variable.cc +++ b/tensorflow/lite/micro/kernels/assign_variable.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_graph.h" #include "tensorflow/lite/micro/micro_log.h" #include "tensorflow/lite/micro/micro_resource_variable.h" +#include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/schema/schema_generated.h" namespace tflite { @@ -35,6 +36,20 @@ namespace { constexpr int kInputVariableId = 0; constexpr int kInputValue = 1; +#ifdef USE_TFLM_COMPRESSION + +struct OpData { + // scratch buffer for compressed input tensor + int scratch_index; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +#endif // USE_TFLM_COMPRESSION + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0); @@ -70,6 +85,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { context, input_value)); } +#ifdef USE_TFLM_COMPRESSION + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + data->scratch_index = + micro_context->AllocateDecompressionScratchBuffer(node, kInputValue); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(input_value); return kTfLiteOk; } @@ -93,15 +119,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { "ResourceVariables and pass it to the interpreter."); return kTfLiteError; } + +#ifdef USE_TFLM_COMPRESSION + OpData* data = static_cast(node->user_data); + const CompressionTensorData* comp_td = + micro_context->GetTensorCompressionData(node, kInputValue); + const void* buffer = tflite::micro::GetTensorData( + micro_context, input_value, comp_td, data->scratch_index); +#else // USE_TFLM_COMPRESSION + const void* buffer = tflite::micro::GetTensorData(input_value); +#endif // USE_TFLM_COMPRESSION + TF_LITE_ENSURE_OK(context, - resources->Assign(input_id->data.i32[0], input_value)); + resources->Assign(input_id->data.i32[0], + EvalTensorBytes(input_value), buffer)); return kTfLiteOk; } } // namespace. +#ifdef USE_TFLM_COMPRESSION + +TFLMRegistration Register_ASSIGN_VARIABLE() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); + +#else // USE_TFLM_COMPRESSION + TFLMRegistration Register_ASSIGN_VARIABLE() { return tflite::micro::RegisterOp(nullptr, Prepare, Eval); + +#endif // USE_TFLM_COMPRESSION } } // namespace tflite diff --git a/tensorflow/lite/micro/micro_resource_variable.cc b/tensorflow/lite/micro/micro_resource_variable.cc index 767e7d17d6f..843aac664bc 100644 --- a/tensorflow/lite/micro/micro_resource_variable.cc +++ b/tensorflow/lite/micro/micro_resource_variable.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -113,8 +113,8 @@ TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context, return kTfLiteOk; } -TfLiteStatus MicroResourceVariables::Assign(int id, - const TfLiteEvalTensor* tensor) { +TfLiteStatus MicroResourceVariables::Assign(int id, size_t count_bytes, + const void* input_buffer) { if (id < 0 || id >= num_resource_variables_) { MicroPrintf("Attempting to read non-existent resource variable %d", id); return kTfLiteError; @@ -128,8 +128,9 @@ TfLiteStatus MicroResourceVariables::Assign(int id, "with a TfLiteTensor first."); return kTfLiteError; } - TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes); - memcpy(variable.resource_buffer, tensor->data.raw, variable.bytes); + TFLITE_DCHECK(count_bytes == variable.bytes); + TFLITE_DCHECK(input_buffer != nullptr); + memcpy(variable.resource_buffer, input_buffer, variable.bytes); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/micro_resource_variable.h b/tensorflow/lite/micro/micro_resource_variable.h index fb9917d4784..57da6497b3a 100644 --- a/tensorflow/lite/micro/micro_resource_variable.h +++ b/tensorflow/lite/micro/micro_resource_variable.h @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,10 +46,10 @@ class MicroResourceVariables { TfLiteStatus Allocate(int id, TfLiteContext* context, const TfLiteTensor* tensor); - // Copies input tensor contents to the resource buffer. + // Copies input_buffer contents to the resource buffer. // AllocateResourceVariable with a TFLite tensor must have been called first // in order to allocate the resource buffer. - TfLiteStatus Assign(int id, const TfLiteEvalTensor* tensor); + TfLiteStatus Assign(int id, size_t count_bytes, const void* input_buffer); // Zeros out all resource buffers. TfLiteStatus ResetAll(); diff --git a/tensorflow/lite/micro/micro_resource_variable_test.cc b/tensorflow/lite/micro/micro_resource_variable_test.cc index 13868bb440d..a30718cb994 100644 --- a/tensorflow/lite/micro/micro_resource_variable_test.cc +++ b/tensorflow/lite/micro/micro_resource_variable_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_resource_variable.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" @@ -120,7 +121,9 @@ TF_LITE_MICRO_TEST(VerifyAssignAndReadResourceBuffer) { .type = kTfLiteFloat32, }; - resource_variables->Assign(id, &assign_tensor); + resource_variables->Assign( + id, tflite::EvalTensorBytes(&assign_tensor), + tflite::micro::GetTensorData(&assign_tensor)); int32_t buffer[32]; TfLiteEvalTensor read_tensor = { From 50e7e5dbd8485a432e2e91462f07e23068124bed Mon Sep 17 00:00:00 2001 From: Ryan Kuester Date: Mon, 16 Dec 2024 10:46:34 -0700 Subject: [PATCH 2/2] feat(compression): implement tensor decompression in op concatenation (#3014) Implement tensor decompression in op concatenation. Extend tests to validate operation on compressed tensors. BUG=part of #2636 --- .../lite/micro/kernels/concatenation.cc | 112 +++++---- .../lite/micro/kernels/concatenation_test.cc | 227 +++++++++++++++++- 2 files changed, 282 insertions(+), 57 deletions(-) diff --git a/tensorflow/lite/micro/kernels/concatenation.cc b/tensorflow/lite/micro/kernels/concatenation.cc index 57d63a916a1..151d3b47ed5 100644 --- a/tensorflow/lite/micro/kernels/concatenation.cc +++ b/tensorflow/lite/micro/kernels/concatenation.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,6 +33,13 @@ constexpr int kOutputTensor = 0; struct OpData { ConcatenationParams params; + +#ifdef USE_TFLM_COMPRESSION + + // scratch buffers for compressed tensors + int scratch_indices[kMaxInputNum]; + +#endif // USE_TFLM_COMPRESSION }; // Handles negative axis index, coerces to positive index value. @@ -52,8 +59,6 @@ inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) { inline void GetAllInputTensorShapes(const TfLiteContext* context, const TfLiteNode* node, RuntimeShape all_shapes[kMaxInputNum]) { - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(node != nullptr); for (int i = 0; i < node->inputs->size; ++i) { const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); RuntimeShape shape = tflite::micro::GetTensorShape(t); @@ -73,12 +78,22 @@ inline void GetShapesPointers(const RuntimeShape* shapes, size_t num, template inline void GetAllInputTensorData(const TfLiteContext* context, const TfLiteNode* node, - T* all_data[kMaxInputNum]) { - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(node != nullptr); + const T* all_data[kMaxInputNum]) { +#ifdef USE_TFLM_COMPRESSION + const OpData* data = static_cast(node->user_data); + MicroContext* micro_context = GetMicroContext(context); +#endif // USE_TFLM_COMPRESSION + for (int i = 0; i < node->inputs->size; ++i) { const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); +#ifdef USE_TFLM_COMPRESSION + const CompressionTensorData* comp_td = + micro_context->GetTensorCompressionData(node, i); + all_data[i] = tflite::micro::GetTensorData(micro_context, t, comp_td, + data->scratch_indices[i]); +#else // USE_TFLM_COMPRESSION all_data[i] = tflite::micro::GetTensorData(t); +#endif // USE_TFLM_COMPRESSION } } @@ -88,6 +103,10 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { RuntimeShape inputs_shape[kMaxInputNum]; const RuntimeShape* inputs_shape_ptr[kMaxInputNum]; const data_type* inputs_data[kMaxInputNum]; + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); GetAllInputTensorShapes(context, node, inputs_shape); GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr); GetAllInputTensorData(context, node, inputs_data); @@ -95,9 +114,6 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); - TFLITE_DCHECK(node->user_data != nullptr); - const OpData* data = static_cast(node->user_data); - reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data, tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); @@ -126,7 +142,6 @@ TfLiteStatus ConcatenationPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteType output_type = output_tensor->type; micro_context->DeallocateTempTfLiteTensor(input_tensor); - micro_context->DeallocateTempTfLiteTensor(output_tensor); // Check activation and input type TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); @@ -136,16 +151,22 @@ TfLiteStatus ConcatenationPrepare(TfLiteContext* context, TfLiteNode* node) { input_type == kTfLiteInt64 || input_type == kTfLiteBool); // Output type must match input type - TF_LITE_ENSURE_EQ(context, output_type, input_type); + TF_LITE_ENSURE_TYPES_EQ(context, output_type, input_type); // This implementation does not support large number of input tensors const int num_inputs = NumInputs(node); TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum); - // Shapes with dimensions >4 are not yet supported with static allocation. + // Calculate OpData. + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + // Shapes with dimensions > kMaxSmallSize are not yet supported with static + // allocation. for (int i = 0; i < num_inputs; ++i) { TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i); TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, input_type); int num_dimensions = NumDimensions(input); if (num_dimensions > RuntimeShape::kMaxSmallSize) { @@ -155,62 +176,53 @@ TfLiteStatus ConcatenationPrepare(TfLiteContext* context, TfLiteNode* node) { RuntimeShape::kMaxSmallSize, num_dimensions); return kTfLiteError; } + + if (input_type == kTfLiteInt8) { + // Make sure there is no re-scaling needed for Int8 quantized kernel. This + // is a restriction we introduced to Int8 kernels. + TF_LITE_ENSURE_EQ(context, static_cast(input->params.scale), + static_cast(output_tensor->params.scale)); + TF_LITE_ENSURE_EQ(context, input->params.zero_point, + output_tensor->params.zero_point); + } else if (input_type == kTfLiteInt16) { + // Make sure that all Int16 inputs have a null zero-point. + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + } + +#ifdef USE_TFLM_COMPRESSION + + // Compression scratch buffers. + // These will only be allocated if the tensor is compressed. + data->scratch_indices[i] = + micro_context->AllocateDecompressionScratchBuffer(node, i); + +#endif // USE_TFLM_COMPRESSION + micro_context->DeallocateTempTfLiteTensor(input); } - // Calculate OpData. - TFLITE_DCHECK(node->user_data != nullptr); - OpData* data = static_cast(node->user_data); - - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); + if (input_type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, output_tensor->params.zero_point, 0); + } switch (output_type) { // Already know in/outtypes are same. case kTfLiteBool: case kTfLiteFloat32: + case kTfLiteInt8: case kTfLiteInt16: case kTfLiteInt32: case kTfLiteInt64: { - data->params.axis = CalculatePositiveAxis(params->axis, output); - data->params.inputs_count = node->inputs->size; - break; - } - case kTfLiteInt8: { - data->params.axis = CalculatePositiveAxis(params->axis, output); + data->params.axis = CalculatePositiveAxis(params->axis, output_tensor); data->params.inputs_count = node->inputs->size; - - float* input_scales = - reinterpret_cast(context->AllocatePersistentBuffer( - context, node->inputs->size * sizeof(float))); - - int32_t* input_zero_points = - reinterpret_cast(context->AllocatePersistentBuffer( - context, node->inputs->size * sizeof(int32_t))); - - // Allocate persistent scale and zeropoint buffers. - // Store input scale and zero point values in OpParams: - for (int i = 0; i < node->inputs->size; ++i) { - TfLiteTensor* t = micro_context->AllocateTempInputTensor(node, i); - TF_LITE_ENSURE(context, t != nullptr); - input_scales[i] = t->params.scale; - input_zero_points[i] = t->params.zero_point; - micro_context->DeallocateTempTfLiteTensor(t); - } - - data->params.input_scale = input_scales; - data->params.input_zeropoint = input_zero_points; - data->params.output_zeropoint = output->params.zero_point; - data->params.output_scale = output->params.scale; break; } default: - MicroPrintf("Op Concatenation does not currently support Type '%s'.", + MicroPrintf("Op Concatenation does not currently support type '%s'.", TfLiteTypeGetName(output_type)); return kTfLiteError; } - micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(output_tensor); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/kernels/concatenation_test.cc b/tensorflow/lite/micro/kernels/concatenation_test.cc index ddbc74d4aa4..c7e698007ea 100644 --- a/tensorflow/lite/micro/kernels/concatenation_test.cc +++ b/tensorflow/lite/micro/kernels/concatenation_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include +#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" @@ -56,9 +57,14 @@ void TestConcatenateOneInput(int* input1_dims_data, const T* input1_data, } template -void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, - int* input2_dims_data, const T* input2_data, - int axis, int* output_dims_data, T* output_data) { +void TestConcatenateTwoInputs( + int* input1_dims_data, const T* input1_data, int* input2_dims_data, + const T* input2_data, int axis, int* output_dims_data, T* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo (*comp_info)[2] = nullptr +#endif // USE_TFLM_COMPRESSION +) { TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -70,6 +76,21 @@ void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, CreateTensor(input2_data, input2_dims), CreateTensor(output_data, output_dims)}; +#ifdef USE_TFLM_COMPRESSION + + TestCompressedList tcl; + const CompressedTensorList* comp_list_p = nullptr; + + if (comp_info != nullptr) { + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput((*comp_info)[0], tensors[0], 0), + kTfLiteOk); + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput((*comp_info)[1], tensors[1], 1), + kTfLiteOk); + comp_list_p = tcl.GetCompressedTensorList(); + } + +#endif // USE_TFLM_COMPRESSION + int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; @@ -83,7 +104,12 @@ void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, const TFLMRegistration registration = Register_CONCATENATION(); micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data)); + reinterpret_cast(&builtin_data) +#ifdef USE_TFLM_COMPRESSION + , + nullptr, comp_list_p +#endif // USE_TFLM_COMPRESSION + ); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); @@ -92,9 +118,19 @@ void TestConcatenateTwoInputs(int* input1_dims_data, const T* input1_data, void TestConcatenateTwoFloatInputs( int* input1_dims_data, const float* input1_data, int* input2_dims_data, const float* input2_data, int axis, int* output_dims_data, - const float* expected_output_data, float* output_data) { + const float* expected_output_data, float* output_data +#ifdef USE_TFLM_COMPRESSION + , + const TestCompressionInfo (*comp_info)[2] = nullptr +#endif // USE_TFLM_COMPRESSION +) { TestConcatenateTwoInputs(input1_dims_data, input1_data, input2_dims_data, - input2_data, axis, output_dims_data, output_data); + input2_data, axis, output_dims_data, output_data +#ifdef USE_TFLM_COMPRESSION + , + comp_info +#endif // USE_TFLM_COMPRESSION + ); TfLiteIntArray* dims = tflite::testing::IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*dims); @@ -148,6 +184,68 @@ void TestConcatenateQuantizedTwoInputs( } } +#ifdef USE_TFLM_COMPRESSION + +template +void TestConcatenateQuantizedTwoInputsCompressed( + int* input1_dims_data, const uint8_t* input1_data, int* input2_dims_data, + const uint8_t* input2_data, const float input_scale, + const int input_zero_point, int axis, int* output_dims_data, + const T* expected_output_data, const float output_scale, + const int output_zero_point, T* output_data, + const TestCompressionInfo (&comp_info)[2]) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + + constexpr int input_size = 2; + constexpr int output_size = 1; + constexpr int tensors_size = input_size + output_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input1_data, input1_dims, input_scale, + input_zero_point, false, typeToTfLiteType()), + CreateQuantizedTensor(input2_data, input2_dims, input_scale, + input_zero_point, false, typeToTfLiteType()), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point)}; + +#ifdef USE_TFLM_COMPRESSION + + TestCompressedList tcl; + const CompressedTensorList* comp_list_p = nullptr; + + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput(comp_info[0], tensors[0], 0), kTfLiteOk); + TF_LITE_MICRO_EXPECT_EQ(tcl.AddInput(comp_info[1], tensors[1], 1), kTfLiteOk); + comp_list_p = tcl.GetCompressedTensorList(); + +#endif // USE_TFLM_COMPRESSION + + int inputs_array_data[] = {2, 0, 1}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 2}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + + TfLiteConcatenationParams builtin_data = { + .axis = axis, + .activation = kTfLiteActNone // Only activation supported in this impl + }; + + const TFLMRegistration registration = Register_CONCATENATION(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), nullptr, comp_list_p); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + + const int output_dims_count = ElementCount(*output_dims); + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); + } +} + +#endif // USE_TFLM_COMPRESSION + } // namespace } // namespace testing } // namespace tflite @@ -237,6 +335,43 @@ TF_LITE_MICRO_TEST(TwoInputsAllAxesCombinations) { output_shape_axis1, output_value_axis1, output_data); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TwoInputsFloatCompressed) { + int input_shape[] = {2, 2, 3}; + const float input1_value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + const float input2_value[] = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + // Align the tensor data the same as a Buffer in the schema + alignas(16) const uint8_t inputs_compressed[] = {0x05, 0x39, 0x40}; + constexpr int kBitWidth = 3; + + // expected output when concatenating on axis 0 + int output_shape_axis0[] = {2, 4, 3}; + const float output_value_axis0[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + + float output_data[std::extent::value]; + + tflite::testing::TestCompressionInfo comp_info[2] = {}; + comp_info[0].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[0].value_table = input1_value; + comp_info[0].value_table_stride = std::extent::value; + comp_info[0].bit_width = kBitWidth; + comp_info[1].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[1].value_table = input2_value; + comp_info[1].value_table_stride = std::extent::value; + comp_info[1].bit_width = kBitWidth; + + // Axis = 0 + tflite::testing::TestConcatenateTwoFloatInputs( + input_shape, reinterpret_cast(inputs_compressed), + input_shape, reinterpret_cast(inputs_compressed), + /* axis */ 0, output_shape_axis0, output_value_axis0, output_data, + &comp_info); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(TwoInputsQuantizedInt8) { const int axis = 2; int input_shape[] = {3, 2, 1, 2}; @@ -260,6 +395,45 @@ TF_LITE_MICRO_TEST(TwoInputsQuantizedInt8) { output_zero_point, output_data); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TwoInputsQuantizedInt8Compressed) { + const int axis = 2; + int input_shape[] = {3, 2, 1, 2}; + int output_shape[] = {3, 2, 1, 4}; + + const float input_scale = 0.1f; + const int input_zero_point = 0; + const float output_scale = 0.1f; + const int output_zero_point = 0; + + const int8_t input1_values[] = {1, 2, 3, 4}; + const int8_t input2_values[] = {5, 6, 7, 8}; + const int8_t output_value[] = {1, 2, 5, 6, 3, 4, 7, 8}; + // Align the tensor data the same as a Buffer in the schema + alignas(16) const uint8_t input_compressed[] = {0x1B}; + constexpr int kBitWidth = 2; + + int8_t output_data[std::extent::value]; + + tflite::testing::TestCompressionInfo comp_info[2] = {}; + comp_info[0].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[0].value_table = input1_values; + comp_info[0].value_table_stride = std::extent::value; + comp_info[0].bit_width = kBitWidth; + comp_info[1].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[1].value_table = input2_values; + comp_info[1].value_table_stride = std::extent::value; + comp_info[1].bit_width = kBitWidth; + + tflite::testing::TestConcatenateQuantizedTwoInputsCompressed( + input_shape, input_compressed, input_shape, input_compressed, input_scale, + input_zero_point, axis, output_shape, output_value, output_scale, + output_zero_point, output_data, comp_info); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(TwoInputsQuantizedInt16) { const int axis = 2; int input_shape[] = {3, 2, 1, 2}; @@ -283,6 +457,45 @@ TF_LITE_MICRO_TEST(TwoInputsQuantizedInt16) { output_zero_point, output_data); } +#ifdef USE_TFLM_COMPRESSION + +TF_LITE_MICRO_TEST(TwoInputsQuantizedInt16Compressed) { + const int axis = 2; + int input_shape[] = {3, 2, 1, 2}; + int output_shape[] = {3, 2, 1, 4}; + + const float input_scale = 0.1f; + const int input_zero_point = 0; + const float output_scale = 0.1f; + const int output_zero_point = 0; + + const int16_t input1_values[] = {1, 2, 3, 4}; + const int16_t input2_values[] = {5, 6, 7, 8}; + const int16_t output_value[] = {1, 2, 5, 6, 3, 4, 7, 8}; + // Align the tensor data the same as a Buffer in the schema + alignas(16) const uint8_t input_compressed[] = {0x1B}; + constexpr int kBitWidth = 2; + + int16_t output_data[std::extent::value]; + + tflite::testing::TestCompressionInfo comp_info[2] = {}; + comp_info[0].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[0].value_table = input1_values; + comp_info[0].value_table_stride = std::extent::value; + comp_info[0].bit_width = kBitWidth; + comp_info[1].scheme = tflite::CompressionScheme::kBinQuant; + comp_info[1].value_table = input2_values; + comp_info[1].value_table_stride = std::extent::value; + comp_info[1].bit_width = kBitWidth; + + tflite::testing::TestConcatenateQuantizedTwoInputsCompressed( + input_shape, input_compressed, input_shape, input_compressed, input_scale, + input_zero_point, axis, output_shape, output_value, output_scale, + output_zero_point, output_data, comp_info); +} + +#endif // USE_TFLM_COMPRESSION + TF_LITE_MICRO_TEST(ThreeDimensionalTwoInputsDifferentShapes) { const int axis = 1;