Skip to content

Commit

Permalink
stats: keep a set of ref-counted parent histograms in ThreadLocalStor…
Browse files Browse the repository at this point in the history
…e so that two with the same name map to the same histogram object. (#12275)

Commit Message: Creates a storage model for thread-local histograms enabling continuity of data across scope reloads. Previously, whenever a Scope was re-created, the counters, gauges, and text-readouts of the same names would retain their previous values. However, fresh histograms were created on every scope reload, and stats dumps would include duplicate histograms with the same name.

This change adds an analogous name-based set of histograms, held in ThreadLocalStore, so that we have a single histogram representing all its generations of data. This is somewhat more complex than for the other stats, since there were thread-local buffers, which previously were owned by TlsScope and needed to be made independent. So this introduces a new tls histogram map in the tls-cache to maintain this.

This should help unblock #12241.

Additional Description:
Risk Level: high (not clear whether this is enough testing of histogram usage)
Testing: //test/...
Docs Changes: n/a
Release Notes: n/a
Fixes: #3098

Signed-off-by: Joshua Marantz <[email protected]>
  • Loading branch information
jmarantz authored Aug 11, 2020
1 parent cce07c0 commit 04de1cf
Show file tree
Hide file tree
Showing 8 changed files with 504 additions and 147 deletions.
19 changes: 0 additions & 19 deletions source/common/stats/allocator_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,29 +58,10 @@ class AllocatorImpl : public Allocator {
friend class TextReadoutImpl;
friend class NotifyingAllocatorImpl;

struct HeapStatHash {
using is_transparent = void; // NOLINT(readability-identifier-naming)
size_t operator()(const Metric* a) const { return a->statName().hash(); }
size_t operator()(StatName a) const { return a.hash(); }
};

struct HeapStatCompare {
using is_transparent = void; // NOLINT(readability-identifier-naming)
bool operator()(const Metric* a, const Metric* b) const {
return a->statName() == b->statName();
}
bool operator()(const Metric* a, StatName b) const { return a->statName() == b; }
};

void removeCounterFromSetLockHeld(Counter* counter) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void removeGaugeFromSetLockHeld(Gauge* gauge) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void removeTextReadoutFromSetLockHeld(Counter* counter) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);

// An unordered set of HeapStatData pointers which keys off the key()
// field in each object. This necessitates a custom comparator and hasher, which key off of the
// StatNamePtr's own StatNamePtrHash and StatNamePtrCompare operators.
template <class StatType>
using StatSet = absl::flat_hash_set<StatType*, HeapStatHash, HeapStatCompare>;
StatSet<Counter> counters_ ABSL_GUARDED_BY(mutex_);
StatSet<Gauge> gauges_ ABSL_GUARDED_BY(mutex_);
StatSet<TextReadout> text_readouts_ ABSL_GUARDED_BY(mutex_);
Expand Down
25 changes: 25 additions & 0 deletions source/common/stats/metric_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,35 @@ class MetricHelper {
void iterateTagStatNames(const Metric::TagStatNameIterFn& fn) const;
void clear(SymbolTable& symbol_table) { stat_names_.clear(symbol_table); }

// Hasher for metrics.
struct Hash {
using is_transparent = void; // NOLINT(readability-identifier-naming)
size_t operator()(const Metric* a) const { return a->statName().hash(); }
size_t operator()(StatName a) const { return a.hash(); }
};

// Comparator for metrics.
struct Compare {
using is_transparent = void; // NOLINT(readability-identifier-naming)
bool operator()(const Metric* a, const Metric* b) const {
return a->statName() == b->statName();
}
bool operator()(const Metric* a, StatName b) const { return a->statName() == b; }
};

private:
StatNameList stat_names_;
};

// An unordered set of stat pointers. which keys off Metric::statName().
// This necessitates a custom comparator and hasher, using the StatNamePtr's
// own StatNamePtrHash and StatNamePtrCompare operators.
//
// This is used by AllocatorImpl for counters, gauges, and text-readouts, and
// is also used by thread_local_store.h for histograms.
template <class StatType>
using StatSet = absl::flat_hash_set<StatType*, MetricHelper::Hash, MetricHelper::Compare>;

/**
* Partial implementation of the Metric interface on behalf of Counters, Gauges,
* and Histograms. It leaves symbolTable() unimplemented so that implementations
Expand Down
188 changes: 147 additions & 41 deletions source/common/stats/thread_local_store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,22 @@ void ThreadLocalStoreImpl::setStatsMatcher(StatsMatcherPtr&& stats_matcher) {
// in the default_scope. There should be no requests, so there will
// be no copies in TLS caches.
Thread::LockGuard lock(lock_);
const uint32_t first_histogram_index = deleted_histograms_.size();
for (ScopeImpl* scope : scopes_) {
removeRejectedStats(scope->central_cache_->counters_, deleted_counters_);
removeRejectedStats(scope->central_cache_->gauges_, deleted_gauges_);
removeRejectedStats(scope->central_cache_->histograms_, deleted_histograms_);
removeRejectedStats(scope->central_cache_->text_readouts_, deleted_text_readouts_);
}

// Remove any newly rejected histograms from histogram_set_.
{
Thread::LockGuard hist_lock(hist_mutex_);
for (uint32_t i = first_histogram_index; i < deleted_histograms_.size(); ++i) {
uint32_t erased = histogram_set_.erase(deleted_histograms_[i].get());
ASSERT(erased == 1);
}
}
}

template <class StatMapClass, class StatListClass>
Expand Down Expand Up @@ -160,16 +170,11 @@ std::vector<TextReadoutSharedPtr> ThreadLocalStoreImpl::textReadouts() const {

std::vector<ParentHistogramSharedPtr> ThreadLocalStoreImpl::histograms() const {
std::vector<ParentHistogramSharedPtr> ret;
Thread::LockGuard lock(lock_);
// TODO(ramaraochavali): As histograms don't share storage, there is a chance of duplicate names
// here. We need to create global storage for histograms similar to how we have a central storage
// in shared memory for counters/gauges. In the interim, no de-dup is done here. This may result
// in histograms with duplicate names, but until shared storage is implemented it's ultimately
// less confusing for users who have such configs.
for (ScopeImpl* scope : scopes_) {
for (const auto& name_histogram_pair : scope->central_cache_->histograms_) {
const ParentHistogramSharedPtr& parent_hist = name_histogram_pair.second;
ret.push_back(parent_hist);
Thread::LockGuard lock(hist_mutex_);
{
ret.reserve(histogram_set_.size());
for (const auto& histogram_ptr : histogram_set_) {
ret.emplace_back(histogram_ptr);
}
}

Expand All @@ -189,6 +194,11 @@ void ThreadLocalStoreImpl::initializeThreading(Event::Dispatcher& main_thread_di
void ThreadLocalStoreImpl::shutdownThreading() {
// This will block both future cache fills as well as cache flushes.
shutting_down_ = true;
Thread::LockGuard lock(hist_mutex_);
for (ParentHistogramImpl* histogram : histogram_set_) {
histogram->setShuttingDown(true);
}
histogram_set_.clear();
}

void ThreadLocalStoreImpl::mergeHistograms(PostMergeCb merge_complete_cb) {
Expand All @@ -197,12 +207,9 @@ void ThreadLocalStoreImpl::mergeHistograms(PostMergeCb merge_complete_cb) {
merge_in_progress_ = true;
tls_->runOnAllThreads(
[this]() -> void {
for (const auto& scope : tls_->getTyped<TlsCache>().scope_cache_) {
const TlsCacheEntry& tls_cache_entry = scope.second;
for (const auto& name_histogram_pair : tls_cache_entry.histograms_) {
const TlsHistogramSharedPtr& tls_hist = name_histogram_pair.second;
tls_hist->beginMerge();
}
for (const auto& id_hist : tls_->getTyped<TlsCache>().tls_histogram_cache_) {
const TlsHistogramSharedPtr& tls_hist = id_hist.second;
tls_hist->beginMerge();
}
},
[this, merge_complete_cb]() -> void { mergeInternal(merge_complete_cb); });
Expand Down Expand Up @@ -257,19 +264,38 @@ void ThreadLocalStoreImpl::releaseScopeCrossThread(ScopeImpl* scope) {
if (!shutting_down_ && main_thread_dispatcher_) {
const uint64_t scope_id = scope->scope_id_;
lock.release();

// TODO(jmarantz): consider batching all the scope IDs that should be
// cleared from TLS caches to reduce bursts of runOnAllThreads on a large
// config update. See the pattern below used for histograms.
main_thread_dispatcher_->post([this, central_cache, scope_id]() {
sync_.syncPoint(MainDispatcherCleanupSync);
clearScopeFromCaches(scope_id, central_cache);
});
}
}

void ThreadLocalStoreImpl::releaseHistogramCrossThread(uint64_t histogram_id) {
// This can happen from any thread. We post() back to the main thread which will initiate the
// cache flush operation.
if (!shutting_down_ && main_thread_dispatcher_) {
main_thread_dispatcher_->post(
[this, histogram_id]() { clearHistogramFromCaches(histogram_id); });
}
}

ThreadLocalStoreImpl::TlsCacheEntry&
ThreadLocalStoreImpl::TlsCache::insertScope(uint64_t scope_id) {
return scope_cache_[scope_id];
}

void ThreadLocalStoreImpl::TlsCache::eraseScope(uint64_t scope_id) { scope_cache_.erase(scope_id); }
void ThreadLocalStoreImpl::TlsCache::eraseHistogram(uint64_t histogram_id) {
// This is called for every histogram in every thread, even though the
// histogram may not have been cached in each thread yet. So we don't
// want to check whether the erase() call erased anything.
tls_histogram_cache_.erase(histogram_id);
}

void ThreadLocalStoreImpl::clearScopeFromCaches(uint64_t scope_id,
CentralCacheEntrySharedPtr central_cache) {
Expand All @@ -283,6 +309,22 @@ void ThreadLocalStoreImpl::clearScopeFromCaches(uint64_t scope_id,
}
}

void ThreadLocalStoreImpl::clearHistogramFromCaches(uint64_t histogram_id) {
// If we are shutting down we no longer perform cache flushes as workers may be shutting down
// at the same time.
if (!shutting_down_) {
// Perform a cache flush on all threads.
//
// TODO(jmarantz): If this cross-thread posting proves to be a performance
// bottleneck,
// https://gist.github.com/jmarantz/838cb6de7e74c0970ea6b63eded0139a
// contains a patch that will implement batching together to clear multiple
// histograms.
tls_->runOnAllThreads(
[this, histogram_id]() { tls_->getTyped<TlsCache>().eraseHistogram(histogram_id); });
}
}

ThreadLocalStoreImpl::ScopeImpl::ScopeImpl(ThreadLocalStoreImpl& parent, const std::string& prefix)
: scope_id_(parent.next_scope_id_++), parent_(parent),
prefix_(Utility::sanitizeStatsName(prefix), parent.alloc_.symbolTable()),
Expand Down Expand Up @@ -566,9 +608,23 @@ Histogram& ThreadLocalStoreImpl::ScopeImpl::histogramFromStatNameWithTags(
[&buckets, this](absl::string_view stat_name) {
buckets = &parent_.histogram_settings_->buckets(stat_name);
});
RefcountPtr<ParentHistogramImpl> stat(new ParentHistogramImpl(
final_stat_name, unit, parent_, *this, tag_helper.tagExtractedName(),
tag_helper.statNameTags(), *buckets));

RefcountPtr<ParentHistogramImpl> stat;
{
Thread::LockGuard lock(parent_.hist_mutex_);
auto iter = parent_.histogram_set_.find(final_stat_name);
if (iter != parent_.histogram_set_.end()) {
stat = RefcountPtr<ParentHistogramImpl>(*iter);
} else {
stat = new ParentHistogramImpl(final_stat_name, unit, parent_,
tag_helper.tagExtractedName(), tag_helper.statNameTags(),
*buckets, parent_.next_histogram_id_++);
if (!parent_.shutting_down_) {
parent_.histogram_set_.insert(stat.get());
}
}
}

central_ref = &central_cache_->histograms_[stat->statName()];
*central_ref = stat;
}
Expand Down Expand Up @@ -639,34 +695,34 @@ TextReadoutOptConstRef ThreadLocalStoreImpl::ScopeImpl::findTextReadout(StatName
return findStatLockHeld<TextReadout>(name, central_cache_->text_readouts_);
}

Histogram& ThreadLocalStoreImpl::ScopeImpl::tlsHistogram(StatName name,
ParentHistogramImpl& parent) {
Histogram& ThreadLocalStoreImpl::tlsHistogram(ParentHistogramImpl& parent, uint64_t id) {
// tlsHistogram() is generally not called for a histogram that is rejected by
// the matcher, so no further rejection-checking is needed at this level.
// TlsHistogram inherits its reject/accept status from ParentHistogram.

// See comments in counterFromStatName() which explains the logic here.

StatNameHashMap<TlsHistogramSharedPtr>* tls_cache = nullptr;
if (!parent_.shutting_down_ && parent_.tls_) {
tls_cache = &parent_.tls_->getTyped<TlsCache>().scope_cache_[this->scope_id_].histograms_;
auto iter = tls_cache->find(name);
if (iter != tls_cache->end()) {
return *iter->second;
TlsHistogramSharedPtr* tls_histogram = nullptr;
if (!shutting_down_ && tls_ != nullptr) {
TlsCache& tls_cache = tls_->getTyped<TlsCache>();
tls_histogram = &tls_cache.tls_histogram_cache_[id];
if (*tls_histogram != nullptr) {
return **tls_histogram;
}
}

StatNameTagHelper tag_helper(parent_, name, absl::nullopt);
StatNameTagHelper tag_helper(*this, parent.statName(), absl::nullopt);

TlsHistogramSharedPtr hist_tls_ptr(
new ThreadLocalHistogramImpl(name, parent.unit(), tag_helper.tagExtractedName(),
new ThreadLocalHistogramImpl(parent.statName(), parent.unit(), tag_helper.tagExtractedName(),
tag_helper.statNameTags(), symbolTable()));

parent.addTlsHistogram(hist_tls_ptr);

if (tls_cache) {
tls_cache->insert(std::make_pair(hist_tls_ptr->statName(), hist_tls_ptr));
if (tls_histogram != nullptr) {
*tls_histogram = hist_tls_ptr;
}

return *hist_tls_ptr;
}

Expand All @@ -682,7 +738,7 @@ ThreadLocalHistogramImpl::ThreadLocalHistogramImpl(StatName name, Histogram::Uni
}

ThreadLocalHistogramImpl::~ThreadLocalHistogramImpl() {
MetricImpl::clear(symbolTable());
MetricImpl::clear(symbol_table_);
hist_free(histograms_[0]);
hist_free(histograms_[1]);
}
Expand All @@ -699,28 +755,78 @@ void ThreadLocalHistogramImpl::merge(histogram_t* target) {
hist_clear(*other_histogram);
}

ParentHistogramImpl::ParentHistogramImpl(StatName name, Histogram::Unit unit, Store& parent,
TlsScope& tls_scope, StatName tag_extracted_name,
ParentHistogramImpl::ParentHistogramImpl(StatName name, Histogram::Unit unit,
ThreadLocalStoreImpl& thread_local_store,
StatName tag_extracted_name,
const StatNameTagVector& stat_name_tags,
ConstSupportedBuckets& supported_buckets)
: MetricImpl(name, tag_extracted_name, stat_name_tags, parent.symbolTable()), unit_(unit),
parent_(parent), tls_scope_(tls_scope), interval_histogram_(hist_alloc()),
ConstSupportedBuckets& supported_buckets, uint64_t id)
: MetricImpl(name, tag_extracted_name, stat_name_tags, thread_local_store.symbolTable()),
unit_(unit), thread_local_store_(thread_local_store), interval_histogram_(hist_alloc()),
cumulative_histogram_(hist_alloc()),
interval_statistics_(interval_histogram_, supported_buckets),
cumulative_statistics_(cumulative_histogram_, supported_buckets), merged_(false) {}
cumulative_statistics_(cumulative_histogram_, supported_buckets), merged_(false), id_(id) {}

ParentHistogramImpl::~ParentHistogramImpl() {
MetricImpl::clear(parent_.symbolTable());
thread_local_store_.releaseHistogramCrossThread(id_);
ASSERT(ref_count_ == 0);
MetricImpl::clear(thread_local_store_.symbolTable());
hist_free(interval_histogram_);
hist_free(cumulative_histogram_);
}

void ParentHistogramImpl::incRefCount() { ++ref_count_; }

bool ParentHistogramImpl::decRefCount() {
bool ret;
if (shutting_down_) {
// When shutting down, we cannot reference thread_local_store_, as
// histograms can outlive the store. So we decrement the ref-count without
// the stores' lock. We will not be removing the object from the store's
// histogram map in this scenario, as the set was cleared during shutdown,
// and will not be repopulated in histogramFromStatNameWithTags after
// initiating shutdown.
ret = --ref_count_ == 0;
} else {
// We delegate to the Store object to decrement the ref-count so it can hold
// the lock to the map. If we don't hold a lock, another thread may
// simultaneously try to allocate the same name'd histogram after we
// decrement it, and we'll wind up with a dtor/update race. To avoid this we
// must hold the lock until the histogram is removed from the map.
//
// See also StatsSharedImpl::decRefCount() in allocator_impl.cc, which has
// the same issue.
ret = thread_local_store_.decHistogramRefCount(*this, ref_count_);
}
return ret;
}

bool ThreadLocalStoreImpl::decHistogramRefCount(ParentHistogramImpl& hist,
std::atomic<uint32_t>& ref_count) {
// We must hold the store's histogram lock when decrementing the
// refcount. Otherwise another thread may simultaneously try to allocate the
// same name'd stat after we decrement it, and we'll wind up with a
// dtor/update race. To avoid this we must hold the lock until the stat is
// removed from the map.
Thread::LockGuard lock(hist_mutex_);
ASSERT(ref_count >= 1);
if (--ref_count == 0) {
if (!shutting_down_) {
const size_t count = histogram_set_.erase(hist.statName());
ASSERT(shutting_down_ || count == 1);
}
return true;
}
return false;
}

SymbolTable& ParentHistogramImpl::symbolTable() { return thread_local_store_.symbolTable(); }

Histogram::Unit ParentHistogramImpl::unit() const { return unit_; }

void ParentHistogramImpl::recordValue(uint64_t value) {
Histogram& tls_histogram = tls_scope_.tlsHistogram(statName(), *this);
Histogram& tls_histogram = thread_local_store_.tlsHistogram(*this, id_);
tls_histogram.recordValue(value);
parent_.deliverHistogramToSinks(*this, value);
thread_local_store_.deliverHistogramToSinks(*this, value);
}

bool ParentHistogramImpl::used() const {
Expand Down
Loading

0 comments on commit 04de1cf

Please sign in to comment.