Skip to content

Commit

Permalink
wip: remainder
Browse files Browse the repository at this point in the history
  • Loading branch information
rkuester committed Dec 11, 2024
1 parent a7e3d37 commit 3bf00b7
Show file tree
Hide file tree
Showing 50 changed files with 3,324 additions and 311 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const override;

TF_LITE_REMOVE_VIRTUAL_DELETE

private:
// The memory arena that this allocator manages.
uint8_t* const buffer_head_;
Expand All @@ -97,6 +95,8 @@ class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;
bool resizable_buffer_allocated_ = false;

TF_LITE_REMOVE_VIRTUAL_DELETE
};

} // namespace tflite
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
// Returns the size of all persistent allocations in bytes.
size_t GetPersistentUsedBytes() const override;

TF_LITE_REMOVE_VIRTUAL_DELETE
private:
// The memory arena that this allocator manages.
uint8_t* const buffer_head_;
Expand All @@ -51,6 +50,8 @@ class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
// So in essence, the allocated region grows from the bottom and emulates
// SingleArenaBufferAllocator's persistent part.
uint8_t* tail_temp_;

TF_LITE_REMOVE_VIRTUAL_DELETE
};

} // namespace tflite
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
// account any temporary allocations.
size_t GetUsedBytes() const;

TF_LITE_REMOVE_VIRTUAL_DELETE

protected:
// Returns a pointer to the current end of the head buffer.
uint8_t* head() const;
Expand All @@ -137,6 +135,8 @@ class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
intptr_t temp_buffer_ptr_check_sum_ = 0;
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;

TF_LITE_REMOVE_VIRTUAL_DELETE
};

} // namespace tflite
Expand Down
27 changes: 27 additions & 0 deletions tensorflow/lite/micro/compression/model_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,37 @@ def __init__(self, operator, index, subgraph):
def opcode(self) -> tflite.OperatorCodeT:
return self.subgraph.model.operatorCodes[self.operator.opcodeIndex]

@property
def builtin_opcode(self) -> int:
result: int = self.opcode.deprecatedBuiltinCode
if result == tflite.BuiltinOperator.PLACEHOLDER_FOR_GREATER_OP_CODES:
result = self.opcode.builtinCode
return result

@property
def inputs(self):
return _IndirectIterator(self.operator.inputs, self.subgraph.tensors)

@property
def outputs(self):
return _IndirectIterator(self.operator.outputs, self.subgraph.tensors)

@property
def inputs_indices(self):
return self.operator.inputs

@property
def outputs_indices(self):
return self.operator.outputs

@property
def builtin_options_type(self) -> int:
return self.operator.builtinOptionsType

@property
def builtin_options(self):
return self.operator.builtinOptions


_NP_DTYPES = {
tflite.TensorType.FLOAT16: np.dtype("<f2"),
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/micro/docs/compression.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ bazel run --cache_test_results=no --test_output=all -s tensorflow/lite/micro/to
The Generic Benchmark Application can be used to see the size of the model, the
amount of arena memory used, and the size of the interpreter data structures
including those involved with tensor conpression.
including those involved with tensor compression.
The benchmark also reports total inference time, as well as time taken for
tensor decompression. Timing data may be either wall-clock time or processor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace {
// Arena size is a guesstimate, followed by use of
// MicroInterpreter::arena_used_bytes() on both the AudioPreprocessor and
// MicroSpeech models and using the larger of the two results.
constexpr size_t kArenaSize = 28584; // xtensa p6
constexpr size_t kArenaSize = 30 * 1024;
alignas(16) uint8_t g_arena[kArenaSize];

using Features = int8_t[kFeatureCount][kFeatureSize];
Expand Down
51 changes: 49 additions & 2 deletions tensorflow/lite/micro/kernels/assign_variable.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -26,6 +26,7 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_log.h"
#include "tensorflow/lite/micro/micro_resource_variable.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"

namespace tflite {
Expand All @@ -35,6 +36,20 @@ namespace {
constexpr int kInputVariableId = 0;
constexpr int kInputValue = 1;

#ifdef USE_TFLM_COMPRESSION

struct OpData {
// scratch buffer for compressed input tensor
int scratch_index;
};

void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}

#endif // USE_TFLM_COMPRESSION

TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0);
Expand Down Expand Up @@ -70,6 +85,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
context, input_value));
}

#ifdef USE_TFLM_COMPRESSION

TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
// Compression scratch buffers.
// These will only be allocated if the tensor is compressed.
data->scratch_index =
micro_context->AllocateDecompressionScratchBuffer(node, kInputValue);

#endif // USE_TFLM_COMPRESSION

micro_context->DeallocateTempTfLiteTensor(input_value);
return kTfLiteOk;
}
Expand All @@ -93,15 +119,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
"ResourceVariables and pass it to the interpreter.");
return kTfLiteError;
}

#ifdef USE_TFLM_COMPRESSION
OpData* data = static_cast<OpData*>(node->user_data);
const CompressionTensorData* comp_td =
micro_context->GetTensorCompressionData(node, kInputValue);
const void* buffer = tflite::micro::GetTensorData<void>(
micro_context, input_value, comp_td, data->scratch_index);
#else // USE_TFLM_COMPRESSION
const void* buffer = tflite::micro::GetTensorData<void>(input_value);
#endif // USE_TFLM_COMPRESSION

TF_LITE_ENSURE_OK(context,
resources->Assign(input_id->data.i32[0], input_value));
resources->Assign(input_id->data.i32[0],
EvalTensorBytes(input_value), buffer));
return kTfLiteOk;
}

} // namespace.

#ifdef USE_TFLM_COMPRESSION

TFLMRegistration Register_ASSIGN_VARIABLE() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);

#else // USE_TFLM_COMPRESSION

TFLMRegistration Register_ASSIGN_VARIABLE() {
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);

#endif // USE_TFLM_COMPRESSION
}

} // namespace tflite
49 changes: 48 additions & 1 deletion tensorflow/lite/micro/kernels/conv.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,15 +45,35 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));

#ifdef USE_TFLM_COMPRESSION

MicroContext* micro_context = GetMicroContext(context);

const CompressionTensorData* weights_comp_td =
micro_context->GetTensorCompressionData(node, kConvWeightsTensor);
const CompressionTensorData* bias_comp_td =
micro_context->GetTensorCompressionData(node, kConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::Conv(
ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<float>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(micro_context, bias, bias_comp_td,
data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(nullptr), nullptr);
Expand All @@ -67,9 +87,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else if (bias->type == kTfLiteInt64) {
Expand All @@ -79,9 +108,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int64_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
Expand Down Expand Up @@ -119,9 +157,18 @@ TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
Expand Down
10 changes: 9 additions & 1 deletion tensorflow/lite/micro/kernels/conv.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,6 +49,14 @@ struct OpDataConv {
// A buffer used to store unpacked filter values. This is used if the source
// tensor is of n-bit precision that cannot be easily processed by kernels.
int filter_buffer_index;

#ifdef USE_TFLM_COMPRESSION

// scratch buffers for compressed tensors
int weights_scratch_index;
int bias_scratch_index;

#endif // USE_TFLM_COMPRESSION
};

extern const int kConvInputTensor;
Expand Down
19 changes: 18 additions & 1 deletion tensorflow/lite/micro/kernels/conv_common.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -209,6 +209,23 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
&data->filter_buffer_index);
}

#ifdef USE_TFLM_COMPRESSION

// Compression scratch buffers.
// These will only be allocated if the tensor is compressed.
if (micro_context->IsTensorCompressed(node, kConvWeightsTensor) &&
filter->type == kTfLiteInt4) {
MicroPrintf("Compression not supported with INT4 tensors");
return kTfLiteError;
}
data->weights_scratch_index =
micro_context->AllocateDecompressionScratchBuffer(node,
kConvWeightsTensor);
data->bias_scratch_index =
micro_context->AllocateDecompressionScratchBuffer(node, kConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
Expand Down
Loading

0 comments on commit 3bf00b7

Please sign in to comment.