Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enhance profiling and benchmarking #3012

Merged
merged 2 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ char GetOrdinalCharacter(int i) {
} else if (i < 62) {
return 'A' + (i - 36);
}
return '*';
return GetOrdinalCharacter(i % 62);
}

} // namespace
Expand Down Expand Up @@ -335,9 +335,14 @@ void GreedyMemoryPlanner::PrintMemoryPlan() {
CalculateOffsetsIfNeeded();

for (int i = 0; i < buffer_count_; ++i) {
MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
GetOrdinalCharacter(i), i, requirements_[i].size,
buffer_offsets_[i], requirements_[i].first_time_used,
char c = '*';
if (requirements_[i].first_time_used != requirements_[i].last_time_used) {
// not a scratch buffer nor subgraph output tensor
c = GetOrdinalCharacter(i);
}
MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d", c,
i, requirements_[i].size, buffer_offsets_[i],
requirements_[i].first_time_used,
requirements_[i].last_time_used);
}

Expand Down Expand Up @@ -379,15 +384,20 @@ void GreedyMemoryPlanner::PrintMemoryPlan() {
const int line_end = ((offset + size) * kLineWidth) / max_size;
for (int n = line_start; n < line_end; ++n) {
if (line[n] == '.') {
line[n] = GetOrdinalCharacter(i);
if (requirements->first_time_used == requirements->last_time_used) {
// scratch buffer or subgraph output tensor
line[n] = '*';
} else {
line[n] = GetOrdinalCharacter(i);
}
} else {
line[n] = '!';
}
}
}
line[kLineWidth] = 0;

MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
MicroPrintf("%4d: %s (%dk)", t, (const char*)line,
(memory_use + 1023) / 1024);
}
}
Expand Down
19 changes: 14 additions & 5 deletions tensorflow/lite/micro/micro_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ void MicroProfiler::LogTicksPerTagCsv() {
TFLITE_DCHECK(tags_[i] != nullptr);
int position = FindExistingOrNextPosition(tags_[i]);
TFLITE_DCHECK(position >= 0);
total_ticks_per_tag[position].tag = tags_[i];
total_ticks_per_tag[position].ticks =
total_ticks_per_tag[position].ticks + ticks;
total_ticks_per_tag_[position].tag = tags_[i];
total_ticks_per_tag_[position].ticks =
total_ticks_per_tag_[position].ticks + ticks;
total_ticks += ticks;
}

for (int i = 0; i < num_events_; ++i) {
TicksPerTag each_tag_entry = total_ticks_per_tag[i];
TicksPerTag each_tag_entry = total_ticks_per_tag_[i];
if (each_tag_entry.tag == nullptr) {
break;
}
Expand All @@ -112,12 +112,21 @@ void MicroProfiler::LogTicksPerTagCsv() {
int MicroProfiler::FindExistingOrNextPosition(const char* tag_name) {
int pos = 0;
for (; pos < num_events_; pos++) {
TicksPerTag each_tag_entry = total_ticks_per_tag[pos];
TicksPerTag each_tag_entry = total_ticks_per_tag_[pos];
if (each_tag_entry.tag == nullptr ||
strcmp(each_tag_entry.tag, tag_name) == 0) {
return pos;
}
}
return pos < num_events_ ? pos : -1;
}

void MicroProfiler::ClearEvents() {
for (int i = 0; i < num_events_; i++) {
total_ticks_per_tag_[i].tag = nullptr;
}

num_events_ = 0;
}

} // namespace tflite
6 changes: 3 additions & 3 deletions tensorflow/lite/micro/micro_profiler.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,7 +45,7 @@ class MicroProfiler : public MicroProfilerInterface {
virtual void EndEvent(uint32_t event_handle) override;

// Clears all the events that have been currently profiled.
void ClearEvents() { num_events_ = 0; }
void ClearEvents();

// Returns the sum of the ticks taken across all the events. This number
// is only meaningful if all of the events are disjoint (the end time of
Expand Down Expand Up @@ -83,7 +83,7 @@ class MicroProfiler : public MicroProfilerInterface {
// In practice, the number of tags will be much lower than the number of
// events. But it is theoretically possible that each event to be unique and
// hence we allow total_ticks_per_tag to have kMaxEvents entries.
TicksPerTag total_ticks_per_tag[kMaxEvents] = {};
TicksPerTag total_ticks_per_tag_[kMaxEvents] = {};

int FindExistingOrNextPosition(const char* tag_name);

Expand Down
9 changes: 9 additions & 0 deletions tensorflow/lite/micro/tools/benchmarking/Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ endif
$(GENERATED_SRCS_DIR)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h
endif

ifeq ($(ENABLE_COMPRESSION), yes)
ifneq ($(GENERIC_BENCHMARK_ALT_MEM_ATTR),)
CXXFLAGS += -DGENERIC_BENCHMARK_ALT_MEM_ATTR=$(GENERIC_BENCHMARK_ALT_MEM_ATTR)
endif
ifneq ($(GENERIC_BENCHMARK_ALT_MEM_SIZE),)
CXXFLAGS += -DGENERIC_BENCHMARK_ALT_MEM_SIZE=$(GENERIC_BENCHMARK_ALT_MEM_SIZE)
endif
endif

GENERIC_BENCHMARK_SRCS := \
$(MICROLITE_BENCHMARK_ROOT_DIR)/generic_model_benchmark.cc \
$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.cc \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function substitute_strings() {
IFS=${SAVED_IFS}
replacement=()
for line in "${lines_array[@]}"; do
line=$(sed -e 's/"/\\"/g' <<< "${line}")
line=$(sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' <<< "${line}")
line=$(printf '"%s",\n ' "${line}")
replacement+=( "${line}" )
done
Expand Down
174 changes: 158 additions & 16 deletions tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include <sys/types.h>

#include <cstring>
#include <initializer_list>
#include <memory>
#include <random>
#include <type_traits>
Expand Down Expand Up @@ -56,19 +57,37 @@ limitations under the License.

#endif // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)

#if defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) && \
!defined(GENERIC_BENCHMARK_ALT_MEM_SIZE)
#error "GENERIC_BENCHMARK_ALT_MEM_SIZE missing from CXXFLAGS"
#endif // defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) &&
// !defined(GENERIC_BENCHMARK_ALT_MEM_SIZE)

#if defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) && \
!defined(GENERIC_BENCHMARK_ALT_MEM_ATTR)
#error "GENERIC_BENCHMARK_ALT_MEM_ATTR missing from CXXFLAGS"
#endif // defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) &&
// !defined(GENERIC_BENCHMARK_ALT_MEM_ATTR)

#if defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) && \
defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) && defined(USE_TFLM_COMPRESSION)
#define USE_ALT_DECOMPRESSION_MEM
#endif // defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) &&
// defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) &&
// defined(USE_TFLM_COMPRESSION)

/*
* Generic model benchmark. Evaluates runtime performance of a provided model
* with random inputs.
* Generic model benchmark. Evaluates runtime performance of a provided
* model with random inputs.
*/

namespace tflite {

namespace {

using Profiler = ::tflite::MicroProfiler;

// Seed used for the random input. Input data shouldn't affect invocation timing
// so randomness isn't really needed.
// Seed used for the random input. Input data shouldn't affect invocation
// timing so randomness isn't really needed.
constexpr uint32_t kRandomSeed = 0xFB;

#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
Expand All @@ -80,6 +99,11 @@ constexpr size_t kTensorArenaSize = GENERIC_BENCHMARK_TENSOR_ARENA_SIZE;
constexpr size_t kTensorArenaSize = 5e6 - MODEL_SIZE;
#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)

#if defined(USE_ALT_DECOMPRESSION_MEM)
constexpr size_t kAltMemorySize = GENERIC_BENCHMARK_ALT_MEM_SIZE;
alignas(16) GENERIC_BENCHMARK_ALT_MEM_ATTR uint8_t g_alt_memory[kAltMemorySize];
#endif // defined(USE_ALT_DECOMPRESSION_MEM)

constexpr int kNumResourceVariable = 100;

void SetRandomInput(const uint32_t random_seed,
Expand Down Expand Up @@ -130,39 +154,146 @@ bool ReadFile(const char* file_name, void* buffer, size_t buffer_size) {
}
#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)

constexpr uint32_t kCrctabLen = 256;
uint32_t crctab[kCrctabLen];

void GenCRC32Table() {
constexpr uint32_t kPolyN = 0xEDB88320;
for (size_t index = 0; index < kCrctabLen; index++) {
crctab[index] = index;
for (int i = 0; i < 8; i++) {
if (crctab[index] & 1) {
crctab[index] = (crctab[index] >> 1) ^ kPolyN;
} else {
crctab[index] >>= 1;
}
}
}
}

uint32_t ComputeCRC32(const uint8_t* data, const size_t data_length) {
uint32_t crc32 = ~0U;

for (size_t i = 0; i < data_length; i++) {
// crctab is an array of 256 32-bit constants
const uint32_t index = (crc32 ^ data[i]) & (kCrctabLen - 1);
crc32 = (crc32 >> 8) ^ crctab[index];
suleshahid marked this conversation as resolved.
Show resolved Hide resolved
}

// invert all bits of result
crc32 ^= ~0U;
return crc32;
}

void ShowOutputCRC32(tflite::MicroInterpreter* interpreter) {
GenCRC32Table();
for (size_t i = 0; i < interpreter->outputs_size(); ++i) {
TfLiteTensor* output = interpreter->output_tensor(i);
uint8_t* output_values = tflite::GetTensorData<uint8_t>(output);
uint32_t crc32_value = ComputeCRC32(output_values, output->bytes);
MicroPrintf("Output CRC32: 0x%X", crc32_value);
}
}

void ShowInputCRC32(tflite::MicroInterpreter* interpreter) {
GenCRC32Table();
for (size_t i = 0; i < interpreter->inputs_size(); ++i) {
TfLiteTensor* input = interpreter->input_tensor(i);
uint8_t* input_values = tflite::GetTensorData<uint8_t>(input);
uint32_t crc32_value = ComputeCRC32(input_values, input->bytes);
MicroPrintf("Input CRC32: 0x%X", crc32_value);
}
}

int Benchmark(const uint8_t* model_data, tflite::PrettyPrintType print_type) {
Profiler profiler;
static Profiler profiler;
static Profiler profiler2;
TfLiteStatus status;

// use this to keep the application size stable regardless of whether
// compression is being used
#ifdef USE_TFLM_COMPRESSION
constexpr bool using_compression = true;
#else // USE_TFLM_COMPRESSION
constexpr bool using_compression = false;
#endif // USE_TFLM_COMPRESSION

alignas(16) static uint8_t tensor_arena[kTensorArenaSize];

uint32_t event_handle = profiler.BeginEvent("TfliteGetModel");
#ifdef USE_ALT_DECOMPRESSION_MEM
std::initializer_list<tflite::MicroContext::AlternateMemoryRegion>
alt_memory_region = {{g_alt_memory, kAltMemorySize}};
#endif // USE_ALT_DECOMPRESSION_MEM

uint32_t event_handle = profiler.BeginEvent("tflite::GetModel");
const tflite::Model* model = tflite::GetModel(model_data);
profiler.EndEvent(event_handle);

event_handle = profiler.BeginEvent("tflite::CreateOpResolver");
TflmOpResolver op_resolver;
TF_LITE_ENSURE_STATUS(CreateOpResolver(op_resolver));
status = CreateOpResolver(op_resolver);
if (status != kTfLiteOk) {
MicroPrintf("tflite::CreateOpResolver failed");
return -1;
}
profiler.EndEvent(event_handle);

event_handle = profiler.BeginEvent("tflite::RecordingMicroAllocator::Create");
tflite::RecordingMicroAllocator* allocator(
tflite::RecordingMicroAllocator::Create(tensor_arena, kTensorArenaSize));
profiler.EndEvent(event_handle);
event_handle = profiler.BeginEvent("tflite::MicroInterpreter instantiation");
tflite::RecordingMicroInterpreter interpreter(
model, op_resolver, allocator,
tflite::MicroResourceVariables::Create(allocator, kNumResourceVariable),
&profiler);
TF_LITE_ENSURE_STATUS(interpreter.AllocateTensors());
profiler.EndEvent(event_handle);

#ifdef USE_ALT_DECOMPRESSION_MEM
event_handle =
profiler.BeginEvent("tflite::MicroInterpreter::SetDecompressionMemory");
status = interpreter.SetDecompressionMemory(alt_memory_region);
if (status != kTfLiteOk) {
MicroPrintf("tflite::MicroInterpreter::SetDecompressionMemory failed");
return -1;
}
profiler.EndEvent(event_handle);
#endif // USE_ALT_DECOMPRESSION_MEM

event_handle =
profiler.BeginEvent("tflite::MicroInterpreter::AllocateTensors");
status = interpreter.AllocateTensors();
if (status != kTfLiteOk) {
MicroPrintf("tflite::MicroInterpreter::AllocateTensors failed");
return -1;
}
profiler.EndEvent(event_handle);

profiler.Log();
profiler.LogTicksPerTagCsv();
profiler.ClearEvents();

if (using_compression) {
status = interpreter.SetAlternateProfiler(&profiler2);
if (status != kTfLiteOk) {
MicroPrintf("tflite::MicroInterpreter::SetAlternateProfiler failed");
return -1;
}
}

MicroPrintf(""); // null MicroPrintf serves as a newline.

// For streaming models, the interpreter will return kTfLiteAbort if the model
// does not yet have enough data to make an inference. As such, we need to
// invoke the interpreter multiple times until we either receive an error or
// kTfLiteOk. This loop also works for non-streaming models, as they'll just
// return kTfLiteOk after the first invocation.
// For streaming models, the interpreter will return kTfLiteAbort if the
// model does not yet have enough data to make an inference. As such, we
// need to invoke the interpreter multiple times until we either receive an
// error or kTfLiteOk. This loop also works for non-streaming models, as
// they'll just return kTfLiteOk after the first invocation.
uint32_t seed = kRandomSeed;
while (true) {
SetRandomInput(seed++, interpreter);
TfLiteStatus status = interpreter.Invoke();
ShowInputCRC32(&interpreter);
suleshahid marked this conversation as resolved.
Show resolved Hide resolved
MicroPrintf(""); // null MicroPrintf serves as a newline.

status = interpreter.Invoke();
if ((status != kTfLiteOk) && (static_cast<int>(status) != kTfLiteAbort)) {
MicroPrintf("Model interpreter invocation failed: %d\n", status);
return -1;
Expand All @@ -174,6 +305,17 @@ int Benchmark(const uint8_t* model_data, tflite::PrettyPrintType print_type) {
MicroPrintf(""); // null MicroPrintf serves as a newline.
profiler.ClearEvents();

if (using_compression) {
profiler2.Log();
MicroPrintf(""); // null MicroPrintf serves as a newline.
profiler2.LogTicksPerTagCsv();
MicroPrintf(""); // null MicroPrintf serves as a newline.
profiler2.ClearEvents();
}

ShowOutputCRC32(&interpreter);
MicroPrintf(""); // null MicroPrintf serves as a newline.

if (status == kTfLiteOk) {
break;
}
Expand Down
Loading
Loading