Skip to content

Commit

Permalink
table_cache: Generate SST file cache key based on db instance, file n…
Browse files Browse the repository at this point in the history
…umber

Currently on POSIX, the cache key prefix for SST files is generated from the
inode number and its generation number, which is less unique on some
OSes/FSes than others. This change sets that key prefix based on a
passed in unique ID, which is composed of a table-cache-specific ID
plus the SST number. This should resolve issues around cache collisions
between two different SS tables that happened to have the same
generation number.

Backported from 19.2 release branch (crl-release-6.2.1):
#61
  • Loading branch information
itsbilal committed Oct 7, 2019
1 parent d2feb8c commit 1990cf4
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 318 deletions.
14 changes: 13 additions & 1 deletion db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ void AppendVarint64(IterKey* key, uint64_t v) {

TableCache::TableCache(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, Cache* const cache)
: ioptions_(ioptions), env_options_(env_options), cache_(cache) {
: ioptions_(ioptions),
env_options_(env_options),
cache_(cache),
cache_id_(cache_id_alloc++) {
if (ioptions_.row_cache) {
// If the same cache is shared by multiple instances, we need to
// disambiguate its entries.
Expand All @@ -76,6 +79,8 @@ TableCache::TableCache(const ImmutableCFOptions& ioptions,
TableCache::~TableCache() {
}

std::atomic<uint64_t> TableCache::cache_id_alloc(0);

TableReader* TableCache::GetTableReaderFromHandle(Cache::Handle* handle) {
return reinterpret_cast<TableReader*>(cache_->Value(handle));
}
Expand Down Expand Up @@ -104,6 +109,13 @@ Status TableCache::GetTableReader(
if (!sequential_mode && ioptions_.advise_random_on_open) {
file->Hint(RandomAccessFile::RANDOM);
}

// Generate a unique ID for this file, consisting of <cache_id,file_number>.
std::string file_id;
PutVarint64(&file_id, cache_id_);
PutVarint64(&file_id, fd.GetNumber());
file->SetUniqueId(std::move(file_id));

StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS);
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(
Expand Down
3 changes: 3 additions & 0 deletions db/table_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ class TableCache {
const EnvOptions& env_options_;
Cache* const cache_;
std::string row_cache_id_;
const uint64_t cache_id_;

static std::atomic<uint64_t> cache_id_alloc;
};

} // namespace rocksdb
293 changes: 146 additions & 147 deletions env/env_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -597,11 +597,6 @@ TEST_P(EnvPosixTestWithParam, DecreaseNumBgThreads) {
WaitThreadPoolsEmpty();
}

#if (defined OS_LINUX || defined OS_WIN)
// Travis doesn't support fallocate or getting unique ID from files for whatever
// reason.
#ifndef TRAVIS

namespace {
bool IsSingleVarint(const std::string& s) {
Slice slice(s);
Expand All @@ -624,6 +619,152 @@ char temp_id[MAX_ID_SIZE];

} // namespace

// Returns true if any of the strings in ss are the prefix of another string.
bool HasPrefix(const std::unordered_set<std::string>& ss) {
for (const std::string& s: ss) {
if (s.empty()) {
return true;
}
for (size_t i = 1; i < s.size(); ++i) {
if (ss.count(s.substr(0, i)) != 0) {
return true;
}
}
}
return false;
}

TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
// Create file.
if (env_ == Env::Default()) {
EnvOptions soptions;
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
std::string fname = test::TmpDir(env_) + "/" + + "/testfile";
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));

unique_ptr<RandomAccessFile> file;

// Get Unique ID
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size == 0);

std::string unique_id;
PutVarint64(&unique_id, 1000);
PutVarint64(&unique_id, 1001);
file->SetUniqueId(unique_id);

id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id1(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id1));

// Get Unique ID again
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id2(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id2));

// Check IDs are the same.
ASSERT_EQ(unique_id1, unique_id2);

// Delete the file
env_->DeleteFile(fname);
}
}

TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) {
if (env_ == Env::Default()) {
// Check whether a bunch of concurrently existing files have unique IDs.
EnvOptions soptions;
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;

// Create the files
std::vector<std::string> fnames;
for (int i = 0; i < 1000; ++i) {
fnames.push_back(test::TmpDir(env_) + "/" + "testfile" + ToString(i));

// Create file.
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile, soptions));
}

// Collect and check whether the IDs are unique.
std::unordered_set<std::string> ids;
int counter = 0;
for (const std::string fname : fnames) {
unique_ptr<RandomAccessFile> file;
std::string unique_id;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
PutVarint64(&unique_id, 1000);
PutVarint64(&unique_id, counter++);
file->SetUniqueId(unique_id);
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id));

ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);
}

// Delete the files
for (const std::string fname : fnames) {
ASSERT_OK(env_->DeleteFile(fname));
}

ASSERT_TRUE(!HasPrefix(ids));
}
}

TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDDeletes) {
if (env_ == Env::Default()) {
EnvOptions soptions;
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;

std::string fname = test::TmpDir(env_) + "/" + "testfile";

// Check that after file is deleted we don't get same ID again in a new
// file.
std::unordered_set<std::string> ids;
for (int i = 0; i < 1000; ++i) {
// Create file.
{
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
}

// Get Unique ID
std::string unique_id;
{
unique_ptr<RandomAccessFile> file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
PutVarint64(&unique_id, 1000);
PutVarint64(&unique_id, i);
file->SetUniqueId(unique_id);
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
}

ASSERT_TRUE(IsUniqueIDValid(unique_id));
ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);

// Delete the file
ASSERT_OK(env_->DeleteFile(fname));
}

ASSERT_TRUE(!HasPrefix(ids));
}
}

#if (defined OS_LINUX || defined OS_WIN)
// Travis doesn't support fallocate or getting unique ID from files for whatever
// reason.
#ifndef TRAVIS

// Determine whether we can use the FS_IOC_GETVERSION ioctl
// on a file in directory DIR. Create a temporary file therein,
// try to apply the ioctl (save that result), cleanup and
Expand Down Expand Up @@ -762,50 +903,6 @@ TEST_F(EnvPosixTest, PositionedAppend) {
}
#endif // !ROCKSDB_LITE

// Only works in linux platforms
TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
// Create file.
if (env_ == Env::Default()) {
EnvOptions soptions;
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
IoctlFriendlyTmpdir ift;
std::string fname = ift.name() + "/testfile";
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));

unique_ptr<RandomAccessFile> file;

// Get Unique ID
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id1(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id1));

// Get Unique ID again
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id2(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id2));

// Get Unique ID again after waiting some time.
env_->SleepForMicroseconds(1000000);
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id3(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id3));

// Check IDs are the same.
ASSERT_EQ(unique_id1, unique_id2);
ASSERT_EQ(unique_id2, unique_id3);

// Delete the file
env_->DeleteFile(fname);
}
}

// only works in linux platforms
#ifdef ROCKSDB_FALLOCATE_PRESENT
TEST_P(EnvPosixTestWithParam, AllocateTest) {
Expand Down Expand Up @@ -880,104 +977,6 @@ TEST_P(EnvPosixTestWithParam, AllocateTest) {
}
#endif // ROCKSDB_FALLOCATE_PRESENT

// Returns true if any of the strings in ss are the prefix of another string.
bool HasPrefix(const std::unordered_set<std::string>& ss) {
for (const std::string& s: ss) {
if (s.empty()) {
return true;
}
for (size_t i = 1; i < s.size(); ++i) {
if (ss.count(s.substr(0, i)) != 0) {
return true;
}
}
}
return false;
}

// Only works in linux and WIN platforms
TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) {
if (env_ == Env::Default()) {
// Check whether a bunch of concurrently existing files have unique IDs.
EnvOptions soptions;
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;

// Create the files
IoctlFriendlyTmpdir ift;
std::vector<std::string> fnames;
for (int i = 0; i < 1000; ++i) {
fnames.push_back(ift.name() + "/" + "testfile" + ToString(i));

// Create file.
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile, soptions));
}

// Collect and check whether the IDs are unique.
std::unordered_set<std::string> ids;
for (const std::string fname : fnames) {
unique_ptr<RandomAccessFile> file;
std::string unique_id;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id));

ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);
}

// Delete the files
for (const std::string fname : fnames) {
ASSERT_OK(env_->DeleteFile(fname));
}

ASSERT_TRUE(!HasPrefix(ids));
}
}

// Only works in linux and WIN platforms
TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDDeletes) {
if (env_ == Env::Default()) {
EnvOptions soptions;
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;

IoctlFriendlyTmpdir ift;
std::string fname = ift.name() + "/" + "testfile";

// Check that after file is deleted we don't get same ID again in a new
// file.
std::unordered_set<std::string> ids;
for (int i = 0; i < 1000; ++i) {
// Create file.
{
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
}

// Get Unique ID
std::string unique_id;
{
unique_ptr<RandomAccessFile> file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
}

ASSERT_TRUE(IsUniqueIDValid(unique_id));
ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);

// Delete the file
ASSERT_OK(env_->DeleteFile(fname));
}

ASSERT_TRUE(!HasPrefix(ids));
}
}

// Only works in linux platforms
#ifdef OS_WIN
TEST_P(EnvPosixTestWithParam, DISABLED_InvalidateCache) {
Expand Down
Loading

0 comments on commit 1990cf4

Please sign in to comment.