Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AsyncFileCached: switch from a random to an LRU cache eviction policy #1506

Merged
merged 7 commits into from
May 17, 2019
17 changes: 13 additions & 4 deletions fdbrpc/AsyncFileCached.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@ EvictablePage::~EvictablePage() {
else
aligned_free(data);
}
if (index > -1) {
pageCache->pages[index] = pageCache->pages.back();
pageCache->pages[index]->index = index;
pageCache->pages.pop_back();
if (EvictablePageCache::RANDOM == pageCache->cacheEvictionType) {
if (index > -1) {
pageCache->pages[index] = pageCache->pages.back();
pageCache->pages[index]->index = index;
pageCache->pages.pop_back();
}
} else {
// remove it from the LRU
pageCache->lruPages.erase(EvictablePageCache::List::s_iterator_to(*this));
}
}

Expand Down Expand Up @@ -97,6 +102,8 @@ Future<Void> AsyncFileCached::read_write_impl( AsyncFileCached* self, void* data
if ( p == self->pages.end() ) {
AFCPage* page = new AFCPage( self, pageOffset );
p = self->pages.insert( std::make_pair(pageOffset, page) ).first;
} else {
self->pageCache->updateHit(p->second);
}

int bytesInPage = std::min(self->pageCache->pageSize - offsetInPage, remaining);
Expand Down Expand Up @@ -133,6 +140,8 @@ Future<Void> AsyncFileCached::readZeroCopy( void** data, int* length, int64_t of
if ( p == pages.end() ) {
AFCPage* page = new AFCPage( this, offset );
p = pages.insert( std::make_pair(offset, page) ).first;
} else {
p->second->pageCache->updateHit(p->second);
}

*data = p->second->data;
Expand Down
78 changes: 69 additions & 9 deletions fdbrpc/AsyncFileCached.actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,21 @@
#elif !defined(FLOW_ASYNCFILECACHED_ACTOR_H)
#define FLOW_ASYNCFILECACHED_ACTOR_H

#include <boost/intrusive/list.hpp>

#include "flow/flow.h"
#include "fdbrpc/IAsyncFile.h"
#include "flow/Knobs.h"
#include "flow/TDMetric.actor.h"
#include "flow/network.h"
#include "flow/actorcompiler.h" // This must be the last #include.

namespace bi = boost::intrusive;
struct EvictablePage {
void* data;
int index;
class Reference<struct EvictablePageCache> pageCache;
bi::list_member_hook<> member_hook;

virtual bool evict() = 0; // true if page was evicted, false if it isn't immediately evictable (but will be evicted regardless if possible)

Expand All @@ -46,30 +50,86 @@ struct EvictablePage {
};

struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
EvictablePageCache() : pageSize(0), maxPages(0) {}
explicit EvictablePageCache(int pageSize, int64_t maxSize) : pageSize(pageSize), maxPages(maxSize / pageSize) {}
using List = bi::list< EvictablePage, bi::member_hook< EvictablePage, bi::list_member_hook<>, &EvictablePage::member_hook>>;
enum CacheEvictionType { RANDOM = 0, LRU = 1 };

static CacheEvictionType evictionPolicyStringToEnum(const std::string &policy) {
std::string cep = policy;
std::transform(cep.begin(), cep.end(), cep.begin(), ::tolower);
if (cep != "random" && cep != "lru")
throw invalid_cache_eviction_policy();

if (cep == "random")
return RANDOM;
return LRU;
}

EvictablePageCache() : pageSize(0), maxPages(0), cacheEvictionType(RANDOM) {}

explicit EvictablePageCache(int pageSize, int64_t maxSize) : pageSize(pageSize), maxPages(maxSize / pageSize), cacheEvictionType(evictionPolicyStringToEnum(FLOW_KNOBS->CACHE_EVICTION_POLICY)) {
cacheHits.init(LiteralStringRef("EvictablePageCache.CacheHits"));
cacheMisses.init(LiteralStringRef("EvictablePageCache.CacheMisses"));
cacheEvictions.init(LiteralStringRef("EvictablePageCache.CacheEvictions"));
}

void allocate(EvictablePage* page) {
try_evict();
try_evict();
page->data = pageSize == 4096 ? FastAllocator<4096>::allocate() : aligned_alloc(4096,pageSize);
page->index = pages.size();
pages.push_back(page);
if (RANDOM == cacheEvictionType) {
page->index = pages.size();
pages.push_back(page);
} else {
lruPages.push_back(*page); // new page is considered the most recently used (placed at LRU tail)
}
++cacheMisses;
}

void updateHit(EvictablePage* page) {
if (RANDOM != cacheEvictionType) {
// on a hit, update page's location in the LRU so that it's most recent (tail)
lruPages.erase(List::s_iterator_to(*page));
lruPages.push_back(*page);
}
++cacheHits;
}

void try_evict() {
if (pages.size() >= (uint64_t)maxPages && !pages.empty()) {
for (int i = 0; i < FLOW_KNOBS->MAX_EVICT_ATTEMPTS; i++) { // If we don't manage to evict anything, just go ahead and exceed the cache limit
int toEvict = g_random->randomInt(0, pages.size());
if (pages[toEvict]->evict())
break;
if (RANDOM == cacheEvictionType) {
if (pages.size() >= (uint64_t)maxPages && !pages.empty()) {
for (int i = 0; i < FLOW_KNOBS->MAX_EVICT_ATTEMPTS; i++) { // If we don't manage to evict anything, just go ahead and exceed the cache limit
int toEvict = g_random->randomInt(0, pages.size());
if (pages[toEvict]->evict()) {
++cacheEvictions;
break;
}
}
}
} else {
// For now, LRU is the only other CACHE_EVICTION option
if (lruPages.size() >= (uint64_t)maxPages) {
int i = 0;
// try the least recently used pages first (starting at head of the LRU list)
for (List::iterator it = lruPages.begin();
it != lruPages.end() && i < FLOW_KNOBS->MAX_EVICT_ATTEMPTS;
++it, ++i) { // If we don't manage to evict anything, just go ahead and exceed the cache limit
if (it->evict()) {
++cacheEvictions;
break;
}
}
}
}
}

std::vector<EvictablePage*> pages;
List lruPages;
int pageSize;
int64_t maxPages;
Int64MetricHandle cacheHits;
Int64MetricHandle cacheMisses;
Int64MetricHandle cacheEvictions;
const CacheEvictionType cacheEvictionType;
};

struct OpenFileInfo : NonCopyable {
Expand Down
4 changes: 4 additions & 0 deletions fdbserver/fdbserver.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "fdbrpc/TLSConnection.h"
#include "fdbrpc/Net2FileSystem.h"
#include "fdbrpc/Platform.h"
#include "fdbrpc/AsyncFileCached.actor.h"
#include "fdbserver/CoroFlow.h"
#include "flow/SignalSafeUnwind.h"
#if defined(CMAKE_BUILD) || !defined(WIN32)
Expand Down Expand Up @@ -1426,6 +1427,9 @@ int main(int argc, char* argv[]) {
}
if (!serverKnobs->setKnob("server_mem_limit", std::to_string(memLimit))) ASSERT(false);

// evictionPolicyStringToEnum will throw an exception if the string is not recognized as a valid
EvictablePageCache::evictionPolicyStringToEnum(flowKnobs->CACHE_EVICTION_POLICY);

if (role == SkipListTest) {
skipListTest();
flushAndExit(FDB_EXIT_SUCCESS);
Expand Down
1 change: 1 addition & 0 deletions flow/Knobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( BUGGIFY_SIM_PAGE_CACHE_4K, 1e6 );
init( BUGGIFY_SIM_PAGE_CACHE_64K, 1e6 );
init( MAX_EVICT_ATTEMPTS, 100 ); if( randomize && BUGGIFY ) MAX_EVICT_ATTEMPTS = 2;
init( CACHE_EVICTION_POLICY, "random" );
init( PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION, 0.1 ); if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 0.0; else if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 1.0;

//AsyncFileKAIO
Expand Down
1 change: 1 addition & 0 deletions flow/Knobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class FlowKnobs : public Knobs {
int64_t SIM_PAGE_CACHE_64K;
int64_t BUGGIFY_SIM_PAGE_CACHE_4K;
int64_t BUGGIFY_SIM_PAGE_CACHE_64K;
std::string CACHE_EVICTION_POLICY; // for now, "random", "lru", are supported
int MAX_EVICT_ATTEMPTS;
double PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION;
double TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY;
Expand Down
3 changes: 3 additions & 0 deletions flow/SystemMonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
.detail("CachePageReadsMerged", netData.countFileCachePageReadsMerged - statState->networkState.countFileCachePageReadsMerged)
.detail("CacheWrites", netData.countFileCacheWrites - statState->networkState.countFileCacheWrites)
.detail("CacheReads", netData.countFileCacheReads - statState->networkState.countFileCacheReads)
.detail("CacheHits", netData.countFilePageCacheHits - statState->networkState.countFilePageCacheHits)
.detail("CacheMisses", netData.countFilePageCacheMisses - statState->networkState.countFilePageCacheMisses)
.detail("CacheEvictions", netData.countFilePageCacheEvictions - statState->networkState.countFilePageCacheEvictions)
.detail("ZoneID", machineState.zoneId)
.detail("MachineID", machineState.machineId)
.detail("AIOSubmitCount", netData.countAIOSubmit - statState->networkState.countAIOSubmit)
Expand Down
6 changes: 6 additions & 0 deletions flow/SystemMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ struct NetworkData {
int64_t countFileCachePageReadsMerged;
int64_t countFileCacheFinds;
int64_t countFileCacheReadBytes;
int64_t countFilePageCacheHits;
int64_t countFilePageCacheMisses;
int64_t countFilePageCacheEvictions;
int64_t countConnEstablished;
int64_t countConnClosedWithError;
int64_t countConnClosedWithoutError;
Expand Down Expand Up @@ -121,6 +124,9 @@ struct NetworkData {
countFileCachePageReadsMerged = getValue(LiteralStringRef("AsyncFile.CountCachePageReadsMerged"));
countFileCacheFinds = getValue(LiteralStringRef("AsyncFile.CountCacheFinds"));
countFileCacheReadBytes = getValue(LiteralStringRef("AsyncFile.CountCacheReadBytes"));
countFilePageCacheHits = getValue(LiteralStringRef("EvictablePageCache.CacheHits"));
countFilePageCacheMisses = getValue(LiteralStringRef("EvictablePageCache.CacheMisses"));
countFilePageCacheEvictions = getValue(LiteralStringRef("EvictablePageCache.CacheEvictions"));
}
};

Expand Down
1 change: 1 addition & 0 deletions flow/error_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ ERROR( transaction_invalid_version, 2020, "Transaction does not have a valid com
ERROR( no_commit_version, 2021, "Transaction is read-only and therefore does not have a commit version" )
ERROR( environment_variable_network_option_failed, 2022, "Environment variable network option could not be set" )
ERROR( transaction_read_only, 2023, "Attempted to commit a transaction specified as read-only" )
ERROR( invalid_cache_eviction_policy, 2024, "Invalid cache eviction policy, only random and lru are supported" )

ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" )
ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" )
Expand Down