Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: add symbol table for future stat name encoding #3927

Merged
merged 41 commits into from
Aug 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
8de2463
WIP Working symbol table with manual reference counting
ambuc Jul 23, 2018
1c45948
Added tests, better error handling for null symbol lookups
ambuc Jul 23, 2018
a3df93b
Fix formatting
ambuc Jul 23, 2018
a78ea8d
Add common/pure header
ambuc Jul 23, 2018
59e8129
Add new StatNameImpl type for encapsulating the SymbolVec
ambuc Jul 24, 2018
475dcd3
Move the bulk of the logic to a .cc
ambuc Jul 24, 2018
9b3594b
Update tests
ambuc Jul 24, 2018
e7b619a
Add the SymbolTableImpl .cc
ambuc Jul 24, 2018
b3f1b7f
Mark internal string as const ref
ambuc Jul 25, 2018
bdc714c
More documentation around size()
ambuc Jul 25, 2018
0ded3e5
Decls before fields
ambuc Jul 25, 2018
b8b3f60
Hide toSymbols() fn from public API
ambuc Jul 25, 2018
ae650c3
Add proper test for death on the decode of an invalid symbol
ambuc Jul 25, 2018
14e8bb3
Added documentation for toSymbol() / fromSymbol()
ambuc Jul 25, 2018
86dc207
Add free pool for symbol re-use
ambuc Jul 25, 2018
cb6c754
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Jul 25, 2018
ff30317
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Jul 25, 2018
6e42c86
Various nits and docs fixes
ambuc Jul 26, 2018
f848464
Use std::stack<> for free pool
ambuc Jul 26, 2018
3687311
Optimized bimap using string_views
ambuc Jul 26, 2018
2772592
Add string_view return type for fromSymbol()
ambuc Jul 26, 2018
edb35e3
Added symbolic struct for SharedSymbol
ambuc Jul 26, 2018
c872073
SymbolTable::encode() now accepts a string_view
ambuc Jul 26, 2018
2f83e68
Simpler string construction in toSymbol()
ambuc Jul 26, 2018
85c5d8b
Use ASSERT() instead of RELEASE_ASSERT()
ambuc Jul 27, 2018
7f93df9
Assorted nits, todos, renaming
ambuc Jul 27, 2018
190da3d
Various const nits, stronger assertion against overflow
ambuc Jul 27, 2018
f2da353
Remove accidentally committed file
ambuc Jul 27, 2018
9e68dfd
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Jul 27, 2018
8e794d5
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Jul 31, 2018
bd47676
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 1, 2018
19c6e65
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 1, 2018
da2b7d7
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 1, 2018
9466aa3
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 7, 2018
86dab2c
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 14, 2018
208512d
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 14, 2018
27aa402
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 14, 2018
aaba51b
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 21, 2018
278f92f
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 24, 2018
ce87e7e
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 27, 2018
efd3c6f
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc Aug 27, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/envoy/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ envoy_cc_library(
deps = ["//include/envoy/common:interval_set_interface"],
)

envoy_cc_library(
name = "symbol_table_interface",
hdrs = ["symbol_table.h"],
)

envoy_cc_library(
name = "timespan",
hdrs = ["timespan.h"],
Expand Down
50 changes: 50 additions & 0 deletions include/envoy/stats/symbol_table.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#include <memory>
#include <string>
#include <vector>

#include "envoy/common/pure.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Stats {

/**
* Interface for storing a stat name.
*/
class StatName {
public:
virtual ~StatName(){};
virtual std::string toString() const PURE;
};

using StatNamePtr = std::unique_ptr<StatName>;

/**
* Interface for shortening and retrieving stat names.
*
* Guarantees that x = encode(x).toString() for any x.
*/
class SymbolTable {
public:
virtual ~SymbolTable() {}

/**
* Encodes a stat name into a StatNamePtr. Expects the name to be period-delimited.
*
* @param name the stat name to encode.
* @return StatNamePtr a unique_ptr to the StatName class encapsulating the symbol vector.
*/
virtual StatNamePtr encode(absl::string_view name) PURE;

/**
* Returns the size of a SymbolTable, as measured in number of symbols stored.
* @return size_t the size of the table.
*/
virtual size_t size() const PURE;
};

} // namespace Stats
} // namespace Envoy
13 changes: 13 additions & 0 deletions source/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ envoy_cc_library(
":raw_stat_data_lib",
":source_impl_lib",
":stats_options_lib",
":symbol_table_lib",
":tag_extractor_lib",
":utility_lib",
"//include/envoy/common:time_interface",
Expand All @@ -117,6 +118,18 @@ envoy_cc_library(
],
)

envoy_cc_library(
name = "symbol_table_lib",
srcs = ["symbol_table_impl.cc"],
hdrs = ["symbol_table_impl.h"],
external_deps = ["abseil_base"],
deps = [
"//include/envoy/stats:symbol_table_interface",
"//source/common/common:assert_lib",
"//source/common/common:utility_lib",
],
)

envoy_cc_library(
name = "stats_options_lib",
hdrs = ["stats_options_impl.h"],
Expand Down
86 changes: 86 additions & 0 deletions source/common/stats/symbol_table_impl.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include "common/stats/symbol_table_impl.h"

#include <memory>
#include <unordered_map>
#include <vector>

#include "common/common/assert.h"

namespace Envoy {
namespace Stats {

// TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs,
// if they appear in stat names. We don't want to waste time symbolizing an integer as an integer,
// if we can help it.
StatNamePtr SymbolTableImpl::encode(const absl::string_view name) {
SymbolVec symbol_vec;
std::vector<absl::string_view> name_vec = absl::StrSplit(name, '.');
symbol_vec.reserve(name_vec.size());
std::transform(name_vec.begin(), name_vec.end(), std::back_inserter(symbol_vec),
[this](absl::string_view x) { return toSymbol(x); });

return std::make_unique<StatNameImpl>(symbol_vec, *this);
}

std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const {
std::vector<absl::string_view> name;
name.reserve(symbol_vec.size());
std::transform(symbol_vec.begin(), symbol_vec.end(), std::back_inserter(name),
[this](Symbol x) { return fromSymbol(x); });
return absl::StrJoin(name, ".");
}

void SymbolTableImpl::free(const SymbolVec& symbol_vec) {
for (const Symbol symbol : symbol_vec) {
auto decode_search = decode_map_.find(symbol);
ASSERT(decode_search != decode_map_.end());

auto encode_search = encode_map_.find(decode_search->second);
ASSERT(encode_search != encode_map_.end());

encode_search->second.ref_count_--;
// If that was the last remaining client usage of the symbol, erase the the current
// mappings and add the now-unused symbol to the reuse pool.
if (encode_search->second.ref_count_ == 0) {
decode_map_.erase(decode_search);
encode_map_.erase(encode_search);
pool_.push(symbol);
}
}
}

Symbol SymbolTableImpl::toSymbol(absl::string_view sv) {
Symbol result;
auto encode_find = encode_map_.find(sv);
// If the string segment doesn't already exist,
if (encode_find == encode_map_.end()) {
// We create the actual string, place it in the decode_map_, and then insert a string_view
// pointing to it in the encode_map_. This allows us to only store the string once.
std::string str = std::string(sv);

auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
ASSERT(decode_insert.second);

auto encode_insert = encode_map_.insert(
{decode_insert.first->second, {.symbol_ = next_symbol_, .ref_count_ = 1}});
ASSERT(encode_insert.second);

result = next_symbol_;
newSymbol();
} else {
// If the insertion didn't take place, return the actual value at that location and up the
// refcount at that location
result = encode_find->second.symbol_;
++(encode_find->second.ref_count_);
}
return result;
}

absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const {
auto search = decode_map_.find(symbol);
ASSERT(search != decode_map_.end());
return search->second;
}

} // namespace Stats
} // namespace Envoy
139 changes: 139 additions & 0 deletions source/common/stats/symbol_table_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#pragma once

#include <algorithm>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>

#include "envoy/common/exception.h"
#include "envoy/stats/symbol_table.h"

#include "common/common/assert.h"
#include "common/common/utility.h"

#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"

namespace Envoy {
namespace Stats {

using Symbol = uint32_t;
using SymbolVec = std::vector<Symbol>;

/**
* Underlying SymbolTableImpl implementation which manages per-symbol reference counting.
*
* The underlying Symbol / SymbolVec data structures are private to the impl. One side
* effect of the non-monotonically-increasing symbol counter is that if a string is encoded, the
* resulting stat is destroyed, and then that same string is re-encoded, it may or may not encode to
* the same underlying symbol.
*/
class SymbolTableImpl : public SymbolTable {
public:
StatNamePtr encode(absl::string_view name) override;

// For testing purposes only.
size_t size() const override {
ASSERT(encode_map_.size() == decode_map_.size());
return encode_map_.size();
}

private:
friend class StatNameImpl;
friend class StatNameTest;

struct SharedSymbol {
Symbol symbol_;
uint32_t ref_count_;
};

/**
* Decodes a vector of symbols back into its period-delimited stat name.
* If decoding fails on any part of the symbol_vec, we release_assert and crash hard, since this
* should never happen, and we don't want to continue running with a corrupt stats set.
*
* @param symbol_vec the vector of symbols to decode.
* @return std::string the retrieved stat name.
*/
std::string decode(const SymbolVec& symbol_vec) const;

/**
* Since SymbolTableImpl does manual reference counting, a client of SymbolTable (such as
* StatName) must manually call free(symbol_vec) when it is freeing the stat it represents. This
* way, the symbol table will grow and shrink dynamically, instead of being write-only.
*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: @param

* @param symbol_vec the vector of symbols to be freed.
*/
void free(const SymbolVec& symbol_vec);

/**
* Convenience function for encode(), symbolizing one string segment at a time.
*
* @param sv the individual string to be encoded as a symbol.
* @return Symbol the encoded string.
*/
Symbol toSymbol(absl::string_view sv);

/**
* Convenience function for decode(), decoding one symbol at a time.
*
* @param symbol the individual symbol to be decoded.
* @return absl::string_view the decoded string.
*/
absl::string_view fromSymbol(Symbol symbol) const;

// Stages a new symbol for use. To be called after a successful insertion.
void newSymbol() {
if (pool_.empty()) {
next_symbol_ = ++monotonic_counter_;
} else {
next_symbol_ = pool_.top();
pool_.pop();
}
// This should catch integer overflow for the new symbol.
ASSERT(monotonic_counter_ != 0);
}

Symbol monotonicCounter() { return monotonic_counter_; }

// Stores the symbol to be used at next insertion. This should exist ahead of insertion time so
// that if insertion succeeds, the value written is the correct one.
Symbol next_symbol_ = 0;

// If the free pool is exhausted, we monotonically increase this counter.
Symbol monotonic_counter_ = 0;

// Bimap implementation.
// The encode map stores both the symbol and the ref count of that symbol.
// Using absl::string_view lets us only store the complete string once, in the decode map.
std::unordered_map<absl::string_view, SharedSymbol, StringViewHash> encode_map_;
std::unordered_map<Symbol, std::string> decode_map_;

// Free pool of symbols for re-use.
// TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
// using an Envoy::IntervalSet.
std::stack<Symbol> pool_;
};

/**
* Implements RAII for Symbols, since the StatName destructor does the work of freeing its component
* symbols.
*/
class StatNameImpl : public StatName {
public:
StatNameImpl(SymbolVec symbol_vec, SymbolTableImpl& symbol_table)
: symbol_vec_(symbol_vec), symbol_table_(symbol_table) {}
~StatNameImpl() override { symbol_table_.free(symbol_vec_); }
std::string toString() const override { return symbol_table_.decode(symbol_vec_); }

private:
friend class StatNameTest;
SymbolVec symbolVec() { return symbol_vec_; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this method for?

SymbolVec symbol_vec_;
SymbolTableImpl& symbol_table_;
};

} // namespace Stats
} // namespace Envoy
12 changes: 12 additions & 0 deletions test/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,22 @@ envoy_cc_test(
],
)

envoy_cc_test(
name = "symbol_table_test",
srcs = ["symbol_table_test.cc"],
deps = [
"//source/common/stats:symbol_table_lib",
"//test/mocks/stats:stats_mocks",
"//test/test_common:logging_lib",
"//test/test_common:utility_lib",
],
)

envoy_cc_test(
name = "thread_local_store_test",
srcs = ["thread_local_store_test.cc"],
deps = [
"//source/common/stats:symbol_table_lib",
"//source/common/stats:thread_local_store_lib",
"//test/mocks/event:event_mocks",
"//test/mocks/server:server_mocks",
Expand Down
Loading