-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
stats: add symbol table for future stat name encoding #3927
Merged
Merged
Changes from all commits
Commits
Show all changes
41 commits
Select commit
Hold shift + click to select a range
8de2463
WIP Working symbol table with manual reference counting
ambuc 1c45948
Added tests, better error handling for null symbol lookups
ambuc a3df93b
Fix formatting
ambuc a78ea8d
Add common/pure header
ambuc 59e8129
Add new StatNameImpl type for encapsulating the SymbolVec
ambuc 475dcd3
Move the bulk of the logic to a .cc
ambuc 9b3594b
Update tests
ambuc e7b619a
Add the SymbolTableImpl .cc
ambuc b3f1b7f
Mark internal string as const ref
ambuc bdc714c
More documentation around size()
ambuc 0ded3e5
Decls before fields
ambuc b8b3f60
Hide toSymbols() fn from public API
ambuc ae650c3
Add proper test for death on the decode of an invalid symbol
ambuc 14e8bb3
Added documentation for toSymbol() / fromSymbol()
ambuc 86dc207
Add free pool for symbol re-use
ambuc cb6c754
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc ff30317
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 6e42c86
Various nits and docs fixes
ambuc f848464
Use std::stack<> for free pool
ambuc 3687311
Optimized bimap using string_views
ambuc 2772592
Add string_view return type for fromSymbol()
ambuc edb35e3
Added symbolic struct for SharedSymbol
ambuc c872073
SymbolTable::encode() now accepts a string_view
ambuc 2f83e68
Simpler string construction in toSymbol()
ambuc 85c5d8b
Use ASSERT() instead of RELEASE_ASSERT()
ambuc 7f93df9
Assorted nits, todos, renaming
ambuc 190da3d
Various const nits, stronger assertion against overflow
ambuc f2da353
Remove accidentally committed file
ambuc 9e68dfd
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 8e794d5
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc bd47676
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 19c6e65
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc da2b7d7
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 9466aa3
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 86dab2c
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 208512d
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 27aa402
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc aaba51b
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc 278f92f
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc ce87e7e
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc efd3c6f
Merge remote-tracking branch 'upstream/master' into symbol-table
ambuc File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "envoy/common/pure.h" | ||
|
||
#include "absl/strings/string_view.h" | ||
|
||
namespace Envoy { | ||
namespace Stats { | ||
|
||
/** | ||
* Interface for storing a stat name. | ||
*/ | ||
class StatName { | ||
public: | ||
virtual ~StatName(){}; | ||
virtual std::string toString() const PURE; | ||
}; | ||
|
||
using StatNamePtr = std::unique_ptr<StatName>; | ||
|
||
/** | ||
* Interface for shortening and retrieving stat names. | ||
* | ||
* Guarantees that x = encode(x).toString() for any x. | ||
*/ | ||
class SymbolTable { | ||
public: | ||
virtual ~SymbolTable() {} | ||
|
||
/** | ||
* Encodes a stat name into a StatNamePtr. Expects the name to be period-delimited. | ||
* | ||
* @param name the stat name to encode. | ||
* @return StatNamePtr a unique_ptr to the StatName class encapsulating the symbol vector. | ||
*/ | ||
virtual StatNamePtr encode(absl::string_view name) PURE; | ||
|
||
/** | ||
* Returns the size of a SymbolTable, as measured in number of symbols stored. | ||
* @return size_t the size of the table. | ||
*/ | ||
virtual size_t size() const PURE; | ||
}; | ||
|
||
} // namespace Stats | ||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#include "common/stats/symbol_table_impl.h" | ||
|
||
#include <memory> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
#include "common/common/assert.h" | ||
|
||
namespace Envoy { | ||
namespace Stats { | ||
|
||
// TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs, | ||
// if they appear in stat names. We don't want to waste time symbolizing an integer as an integer, | ||
// if we can help it. | ||
StatNamePtr SymbolTableImpl::encode(const absl::string_view name) { | ||
SymbolVec symbol_vec; | ||
std::vector<absl::string_view> name_vec = absl::StrSplit(name, '.'); | ||
symbol_vec.reserve(name_vec.size()); | ||
std::transform(name_vec.begin(), name_vec.end(), std::back_inserter(symbol_vec), | ||
[this](absl::string_view x) { return toSymbol(x); }); | ||
|
||
return std::make_unique<StatNameImpl>(symbol_vec, *this); | ||
} | ||
|
||
std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const { | ||
std::vector<absl::string_view> name; | ||
name.reserve(symbol_vec.size()); | ||
std::transform(symbol_vec.begin(), symbol_vec.end(), std::back_inserter(name), | ||
[this](Symbol x) { return fromSymbol(x); }); | ||
return absl::StrJoin(name, "."); | ||
} | ||
|
||
void SymbolTableImpl::free(const SymbolVec& symbol_vec) { | ||
for (const Symbol symbol : symbol_vec) { | ||
auto decode_search = decode_map_.find(symbol); | ||
ASSERT(decode_search != decode_map_.end()); | ||
|
||
auto encode_search = encode_map_.find(decode_search->second); | ||
ASSERT(encode_search != encode_map_.end()); | ||
|
||
encode_search->second.ref_count_--; | ||
// If that was the last remaining client usage of the symbol, erase the the current | ||
// mappings and add the now-unused symbol to the reuse pool. | ||
if (encode_search->second.ref_count_ == 0) { | ||
decode_map_.erase(decode_search); | ||
encode_map_.erase(encode_search); | ||
pool_.push(symbol); | ||
} | ||
} | ||
} | ||
|
||
Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { | ||
Symbol result; | ||
auto encode_find = encode_map_.find(sv); | ||
// If the string segment doesn't already exist, | ||
if (encode_find == encode_map_.end()) { | ||
// We create the actual string, place it in the decode_map_, and then insert a string_view | ||
// pointing to it in the encode_map_. This allows us to only store the string once. | ||
std::string str = std::string(sv); | ||
|
||
auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)}); | ||
ASSERT(decode_insert.second); | ||
|
||
auto encode_insert = encode_map_.insert( | ||
{decode_insert.first->second, {.symbol_ = next_symbol_, .ref_count_ = 1}}); | ||
ASSERT(encode_insert.second); | ||
|
||
result = next_symbol_; | ||
newSymbol(); | ||
} else { | ||
// If the insertion didn't take place, return the actual value at that location and up the | ||
// refcount at that location | ||
result = encode_find->second.symbol_; | ||
++(encode_find->second.ref_count_); | ||
} | ||
return result; | ||
} | ||
|
||
absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const { | ||
auto search = decode_map_.find(symbol); | ||
ASSERT(search != decode_map_.end()); | ||
return search->second; | ||
} | ||
|
||
} // namespace Stats | ||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#pragma once | ||
|
||
#include <algorithm> | ||
#include <memory> | ||
#include <stack> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
#include "envoy/common/exception.h" | ||
#include "envoy/stats/symbol_table.h" | ||
|
||
#include "common/common/assert.h" | ||
#include "common/common/utility.h" | ||
|
||
#include "absl/strings/str_join.h" | ||
#include "absl/strings/str_split.h" | ||
|
||
namespace Envoy { | ||
namespace Stats { | ||
|
||
using Symbol = uint32_t; | ||
using SymbolVec = std::vector<Symbol>; | ||
|
||
/** | ||
* Underlying SymbolTableImpl implementation which manages per-symbol reference counting. | ||
* | ||
* The underlying Symbol / SymbolVec data structures are private to the impl. One side | ||
* effect of the non-monotonically-increasing symbol counter is that if a string is encoded, the | ||
* resulting stat is destroyed, and then that same string is re-encoded, it may or may not encode to | ||
* the same underlying symbol. | ||
*/ | ||
class SymbolTableImpl : public SymbolTable { | ||
public: | ||
StatNamePtr encode(absl::string_view name) override; | ||
|
||
// For testing purposes only. | ||
size_t size() const override { | ||
ASSERT(encode_map_.size() == decode_map_.size()); | ||
return encode_map_.size(); | ||
} | ||
|
||
private: | ||
friend class StatNameImpl; | ||
friend class StatNameTest; | ||
|
||
struct SharedSymbol { | ||
Symbol symbol_; | ||
uint32_t ref_count_; | ||
}; | ||
|
||
/** | ||
* Decodes a vector of symbols back into its period-delimited stat name. | ||
* If decoding fails on any part of the symbol_vec, we release_assert and crash hard, since this | ||
* should never happen, and we don't want to continue running with a corrupt stats set. | ||
* | ||
* @param symbol_vec the vector of symbols to decode. | ||
* @return std::string the retrieved stat name. | ||
*/ | ||
std::string decode(const SymbolVec& symbol_vec) const; | ||
|
||
/** | ||
* Since SymbolTableImpl does manual reference counting, a client of SymbolTable (such as | ||
* StatName) must manually call free(symbol_vec) when it is freeing the stat it represents. This | ||
* way, the symbol table will grow and shrink dynamically, instead of being write-only. | ||
* | ||
* @param symbol_vec the vector of symbols to be freed. | ||
*/ | ||
void free(const SymbolVec& symbol_vec); | ||
|
||
/** | ||
* Convenience function for encode(), symbolizing one string segment at a time. | ||
* | ||
* @param sv the individual string to be encoded as a symbol. | ||
* @return Symbol the encoded string. | ||
*/ | ||
Symbol toSymbol(absl::string_view sv); | ||
|
||
/** | ||
* Convenience function for decode(), decoding one symbol at a time. | ||
* | ||
* @param symbol the individual symbol to be decoded. | ||
* @return absl::string_view the decoded string. | ||
*/ | ||
absl::string_view fromSymbol(Symbol symbol) const; | ||
|
||
// Stages a new symbol for use. To be called after a successful insertion. | ||
void newSymbol() { | ||
if (pool_.empty()) { | ||
next_symbol_ = ++monotonic_counter_; | ||
} else { | ||
next_symbol_ = pool_.top(); | ||
pool_.pop(); | ||
} | ||
// This should catch integer overflow for the new symbol. | ||
ASSERT(monotonic_counter_ != 0); | ||
} | ||
|
||
Symbol monotonicCounter() { return monotonic_counter_; } | ||
|
||
// Stores the symbol to be used at next insertion. This should exist ahead of insertion time so | ||
// that if insertion succeeds, the value written is the correct one. | ||
Symbol next_symbol_ = 0; | ||
|
||
// If the free pool is exhausted, we monotonically increase this counter. | ||
Symbol monotonic_counter_ = 0; | ||
|
||
// Bimap implementation. | ||
// The encode map stores both the symbol and the ref count of that symbol. | ||
// Using absl::string_view lets us only store the complete string once, in the decode map. | ||
std::unordered_map<absl::string_view, SharedSymbol, StringViewHash> encode_map_; | ||
std::unordered_map<Symbol, std::string> decode_map_; | ||
|
||
// Free pool of symbols for re-use. | ||
// TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols | ||
// using an Envoy::IntervalSet. | ||
std::stack<Symbol> pool_; | ||
}; | ||
|
||
/** | ||
* Implements RAII for Symbols, since the StatName destructor does the work of freeing its component | ||
* symbols. | ||
*/ | ||
class StatNameImpl : public StatName { | ||
public: | ||
StatNameImpl(SymbolVec symbol_vec, SymbolTableImpl& symbol_table) | ||
: symbol_vec_(symbol_vec), symbol_table_(symbol_table) {} | ||
~StatNameImpl() override { symbol_table_.free(symbol_vec_); } | ||
std::string toString() const override { return symbol_table_.decode(symbol_vec_); } | ||
|
||
private: | ||
friend class StatNameTest; | ||
SymbolVec symbolVec() { return symbol_vec_; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this method for? |
||
SymbolVec symbol_vec_; | ||
SymbolTableImpl& symbol_table_; | ||
}; | ||
|
||
} // namespace Stats | ||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit:
@param