wip: remainder

tensorflow · Dec 11, 2024 · 3bf00b7 · 3bf00b7
1 parent a7e3d37
commit 3bf00b7
Show file tree

Hide file tree

Showing 50 changed files with 3,324 additions and 311 deletions.
diff --git a/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h b/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h
@@ -74,8 +74,6 @@ class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
   // takes in account any temporary allocations.
   size_t GetAvailableMemory(size_t alignment) const override;
 
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-
  private:
   // The memory arena that this allocator manages.
   uint8_t* const buffer_head_;
@@ -97,6 +95,8 @@ class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
   // Count of outstanding temp buffers.
   int temp_buffer_count_ = 0;
   bool resizable_buffer_allocated_ = false;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 }  // namespace tflite

diff --git a/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h b/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h
@@ -39,7 +39,6 @@ class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
   // Returns the size of all persistent allocations in bytes.
   size_t GetPersistentUsedBytes() const override;
 
-  TF_LITE_REMOVE_VIRTUAL_DELETE
  private:
   // The memory arena that this allocator manages.
   uint8_t* const buffer_head_;
@@ -51,6 +50,8 @@ class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
   // So in essence, the allocated region grows from the bottom and emulates
   // SingleArenaBufferAllocator's persistent part.
   uint8_t* tail_temp_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 }  // namespace tflite

diff --git a/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h b/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h
@@ -110,8 +110,6 @@ class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
   // account any temporary allocations.
   size_t GetUsedBytes() const;
 
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-
  protected:
   // Returns a pointer to the current end of the head buffer.
   uint8_t* head() const;
@@ -137,6 +135,8 @@ class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
   intptr_t temp_buffer_ptr_check_sum_ = 0;
   // Count of outstanding temp buffers.
   int temp_buffer_count_ = 0;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 }  // namespace tflite

diff --git a/tensorflow/lite/micro/compression/model_facade.py b/tensorflow/lite/micro/compression/model_facade.py
@@ -100,10 +100,37 @@ def __init__(self, operator, index, subgraph):
   def opcode(self) -> tflite.OperatorCodeT:
     return self.subgraph.model.operatorCodes[self.operator.opcodeIndex]
 
+  @property
+  def builtin_opcode(self) -> int:
+    result: int = self.opcode.deprecatedBuiltinCode
+    if result == tflite.BuiltinOperator.PLACEHOLDER_FOR_GREATER_OP_CODES:
+      result = self.opcode.builtinCode
+    return result
+
   @property
   def inputs(self):
     return _IndirectIterator(self.operator.inputs, self.subgraph.tensors)
 
+  @property
+  def outputs(self):
+    return _IndirectIterator(self.operator.outputs, self.subgraph.tensors)
+
+  @property
+  def inputs_indices(self):
+    return self.operator.inputs
+
+  @property
+  def outputs_indices(self):
+    return self.operator.outputs
+
+  @property
+  def builtin_options_type(self) -> int:
+    return self.operator.builtinOptionsType
+
+  @property
+  def builtin_options(self):
+    return self.operator.builtinOptions
+
 
 _NP_DTYPES = {
     tflite.TensorType.FLOAT16: np.dtype("<f2"),

diff --git a/tensorflow/lite/micro/docs/compression.md b/tensorflow/lite/micro/docs/compression.md
@@ -276,7 +276,7 @@ bazel run --cache_test_results=no --test_output=all -s  tensorflow/lite/micro/to
 
 The Generic Benchmark Application can be used to see the size of the model, the
 amount of arena memory used, and the size of the interpreter data structures
-including those involved with tensor conpression.
+including those involved with tensor compression.
 
 The benchmark also reports total inference time, as well as time taken for
 tensor decompression.  Timing data may be either wall-clock time or processor

diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc
@@ -37,7 +37,7 @@ namespace {
 // Arena size is a guesstimate, followed by use of
 // MicroInterpreter::arena_used_bytes() on both the AudioPreprocessor and
 // MicroSpeech models and using the larger of the two results.
-constexpr size_t kArenaSize = 28584;  // xtensa p6
+constexpr size_t kArenaSize = 30 * 1024;
 alignas(16) uint8_t g_arena[kArenaSize];
 
 using Features = int8_t[kFeatureCount][kFeatureSize];

diff --git a/tensorflow/lite/micro/kernels/assign_variable.cc b/tensorflow/lite/micro/kernels/assign_variable.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_graph.h"
 #include "tensorflow/lite/micro/micro_log.h"
 #include "tensorflow/lite/micro/micro_resource_variable.h"
+#include "tensorflow/lite/micro/micro_utils.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
@@ -35,6 +36,20 @@ namespace {
 constexpr int kInputVariableId = 0;
 constexpr int kInputValue = 1;
 
+#ifdef USE_TFLM_COMPRESSION
+
+struct OpData {
+  // scratch buffer for compressed input tensor
+  int scratch_index;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+#endif  // USE_TFLM_COMPRESSION
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0);
@@ -70,6 +85,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                                           context, input_value));
   }
 
+#ifdef USE_TFLM_COMPRESSION
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+  // Compression scratch buffers.
+  // These will only be allocated if the tensor is compressed.
+  data->scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(node, kInputValue);
+
+#endif  // USE_TFLM_COMPRESSION
+
   micro_context->DeallocateTempTfLiteTensor(input_value);
   return kTfLiteOk;
 }
@@ -93,15 +119,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         "ResourceVariables and pass it to the interpreter.");
     return kTfLiteError;
   }
+
+#ifdef USE_TFLM_COMPRESSION
+  OpData* data = static_cast<OpData*>(node->user_data);
+  const CompressionTensorData* comp_td =
+      micro_context->GetTensorCompressionData(node, kInputValue);
+  const void* buffer = tflite::micro::GetTensorData<void>(
+      micro_context, input_value, comp_td, data->scratch_index);
+#else   // USE_TFLM_COMPRESSION
+  const void* buffer = tflite::micro::GetTensorData<void>(input_value);
+#endif  // USE_TFLM_COMPRESSION
+
   TF_LITE_ENSURE_OK(context,
-                    resources->Assign(input_id->data.i32[0], input_value));
+                    resources->Assign(input_id->data.i32[0],
+                                      EvalTensorBytes(input_value), buffer));
   return kTfLiteOk;
 }
 
 }  // namespace.
 
+#ifdef USE_TFLM_COMPRESSION
+
+TFLMRegistration Register_ASSIGN_VARIABLE() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+
+#else  // USE_TFLM_COMPRESSION
+
 TFLMRegistration Register_ASSIGN_VARIABLE() {
   return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+
+#endif  // USE_TFLM_COMPRESSION
 }
 
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -45,15 +45,35 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
 
+#ifdef USE_TFLM_COMPRESSION
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  const CompressionTensorData* weights_comp_td =
+      micro_context->GetTensorCompressionData(node, kConvWeightsTensor);
+  const CompressionTensorData* bias_comp_td =
+      micro_context->GetTensorCompressionData(node, kConvBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
+
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
       tflite::reference_ops::Conv(
           ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+          tflite::micro::GetTensorData<float>(micro_context, filter,
+                                              weights_comp_td,
+                                              data.weights_scratch_index),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<float>(micro_context, bias, bias_comp_td,
+                                              data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
           tflite::micro::GetOptionalTensorData<float>(bias),
+#endif  // USE_TFLM_COMPRESSION
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr);
@@ -67,9 +87,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int16_t>(input),
             tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                 weights_comp_td,
+                                                 data.weights_scratch_index),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetTensorData<int32_t>(
+                micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorData<int8_t>(filter),
             tflite::micro::GetTensorShape(bias),
             tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output));
       } else if (bias->type == kTfLiteInt64) {
@@ -79,9 +108,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int16_t>(input),
             tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+            tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                 weights_comp_td,
+                                                 data.weights_scratch_index),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetTensorData<int64_t>(
+                micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorData<int8_t>(filter),
             tflite::micro::GetTensorShape(bias),
             tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output));
       } else {
@@ -119,9 +157,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
               tflite::micro::GetTensorShape(filter),
+#ifdef USE_TFLM_COMPRESSION
+              tflite::micro::GetTensorData<int8_t>(micro_context, filter,
+                                                   weights_comp_td,
+                                                   data.weights_scratch_index),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetTensorData<int32_t>(
+                  micro_context, bias, bias_comp_td, data.bias_scratch_index),
+#else   // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorData<int8_t>(filter),
               tflite::micro::GetTensorShape(bias),
               tflite::micro::GetOptionalTensorData<int32_t>(bias),
+#endif  // USE_TFLM_COMPRESSION
               tflite::micro::GetTensorShape(output),
               tflite::micro::GetTensorData<int8_t>(output));
           break;

diff --git a/tensorflow/lite/micro/kernels/conv.h b/tensorflow/lite/micro/kernels/conv.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -49,6 +49,14 @@ struct OpDataConv {
   // A buffer used to store unpacked filter values. This is used if the source
   // tensor is of n-bit precision that cannot be easily processed by kernels.
   int filter_buffer_index;
+
+#ifdef USE_TFLM_COMPRESSION
+
+  // scratch buffers for compressed tensors
+  int weights_scratch_index;
+  int bias_scratch_index;
+
+#endif  // USE_TFLM_COMPRESSION
 };
 
 extern const int kConvInputTensor;

diff --git a/tensorflow/lite/micro/kernels/conv_common.cc b/tensorflow/lite/micro/kernels/conv_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -209,6 +209,23 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
                                          &data->filter_buffer_index);
   }
 
+#ifdef USE_TFLM_COMPRESSION
+
+  // Compression scratch buffers.
+  // These will only be allocated if the tensor is compressed.
+  if (micro_context->IsTensorCompressed(node, kConvWeightsTensor) &&
+      filter->type == kTfLiteInt4) {
+    MicroPrintf("Compression not supported with INT4 tensors");
+    return kTfLiteError;
+  }
+  data->weights_scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(node,
+                                                        kConvWeightsTensor);
+  data->bias_scratch_index =
+      micro_context->AllocateDecompressionScratchBuffer(node, kConvBiasTensor);
+
+#endif  // USE_TFLM_COMPRESSION
+
   micro_context->DeallocateTempTfLiteTensor(filter);
   micro_context->DeallocateTempTfLiteTensor(input);
   micro_context->DeallocateTempTfLiteTensor(output);