Skip to content

Commit

Permalink
Maintain separate file handle for log manifest backup (#175)
Browse files Browse the repository at this point in the history
* Reduce system call
1. for log manifest, remove truncate if the previous size is smaller.
2. for backup log manifest, not open and close file in each call.

* indent fix

---------

Co-authored-by: lihzeng <[email protected]>
  • Loading branch information
ZZhongge and lihzeng authored Sep 24, 2024
1 parent fb1f0a7 commit fe88825
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 3 deletions.
49 changes: 49 additions & 0 deletions src/internal_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,55 @@ Status BackupRestore::backup(FileOps* f_ops,
}
}

Status BackupRestore::backup(FileOps* f_ops,
FileHandle** file,
const std::string& filename,
const SizedBuf& ctx,
size_t length,
size_t bytes_to_skip,
bool call_fsync)
{
Status s;
std::string dst_file = filename + ".bak";

try {
if (!f_ops->exist(dst_file)) {
if (*file) {
f_ops->close(*file);
delete *file;
*file = nullptr;
}
}

if (!(*file)) {
TC( f_ops->open(file, dst_file) );
}

size_t file_size = f_ops->eof(*file);
if (length > bytes_to_skip) {
TC( f_ops->pwrite( *file,
ctx.data + bytes_to_skip,
length - bytes_to_skip,
bytes_to_skip ) );
}
if (file_size > length) {
f_ops->ftruncate(*file, length);
}

if (call_fsync) {
f_ops->fsync(*file);
}
return s;
} catch (Status s) {
if (*file) {
f_ops->close(*file);
delete *file;
*file = nullptr;
}
return s;
}
}

Status BackupRestore::restore(FileOps* f_ops,
const std::string& filename)
{
Expand Down
7 changes: 7 additions & 0 deletions src/internal_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ class BackupRestore {
size_t length,
size_t bytes_to_skip,
bool call_fsync);
static Status backup(FileOps* f_ops,
FileHandle** file,
const std::string& filename,
const SizedBuf& ctx,
size_t length,
size_t bytes_to_skip,
bool call_fsync);
static Status restore(FileOps* f_ops,
const std::string& filename);
};
Expand Down
14 changes: 11 additions & 3 deletions src/log_manifest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ LogManifest::LogManifest(LogMgr* log_mgr, FileOps* _f_ops, FileOps* _f_l_ops)
: fOps(_f_ops)
, fLogOps(_f_l_ops)
, mFile(nullptr)
, mBackupFile(nullptr)
, lastFlushedLog(NOT_INITIALIZED)
, lastSyncedLog(NOT_INITIALIZED)
, maxLogFileNum(NOT_INITIALIZED)
Expand All @@ -174,6 +175,10 @@ LogManifest::~LogManifest() {
if (mFile) {
delete mFile;
}

if (mBackupFile) {
delete mBackupFile;
}
// NOTE: Skip `logFilesBySeq` as they share the actual memory.
skiplist_node* cursor = skiplist_begin(&logFiles);
while (cursor) {
Expand Down Expand Up @@ -558,6 +563,7 @@ Status LogManifest::storeInternal(bool call_fsync) {
// We will skip the common data and write different part only,
// to reduce disk IOs (assuming that memory comparison is faster than disk write).
uint32_t first_diff_pos = 0;
bool need_truncate = !lenCachedManifest || lenCachedManifest > ss.pos();

if (lenCachedManifest) {
uint32_t min_len = std::min(cachedManifest.size, (uint32_t)ss.pos());
Expand Down Expand Up @@ -591,8 +597,10 @@ Status LogManifest::storeInternal(bool call_fsync) {
lenCachedManifest = ss.pos();

// Should truncate tail.
fOps->ftruncate(mFile, ss.pos());

if (need_truncate) {
fOps->ftruncate(mFile, ss.pos());
}

bool backup_done = false;
if (call_fsync) {
s = fOps->fsync(mFile);
Expand All @@ -607,7 +615,7 @@ Status LogManifest::storeInternal(bool call_fsync) {
// using the latest data.
// Same as above, tolerate backup failure.
Status s_backup = BackupRestore::backup
( fOps, mFileName, mani_buf,
( fOps, &mBackupFile, mFileName, mani_buf,
ss.pos(),
fullBackupRequired ? 0 : first_diff_pos,
call_fsync );
Expand Down
1 change: 1 addition & 0 deletions src/log_manifest.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ class LogManifest {
FileOps* fOps;
FileOps* fLogOps;
FileHandle* mFile;
FileHandle* mBackupFile;
std::string dirPath;
std::string mFileName;
uint64_t prefixNum;
Expand Down
94 changes: 94 additions & 0 deletions tests/jungle/corruption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,97 @@ int log_manifest_corruption_across_file_test() {
return 0;
}

int restore_from_backup_log_manifest() {
std::string filename;
TEST_SUITE_PREPARE_PATH(filename);

jungle::Status s;
jungle::DB* db;

// Open DB.
jungle::DBConfig config;
TEST_CUSTOM_DB_CONFIG(config)
config.logSectionOnly = true;
config.maxEntriesInLogFile = 10;
CHK_Z(jungle::DB::open(&db, filename, config));

// Write something.
size_t num = 12;
size_t kv_num = 100;
std::vector<jungle::KV> kv(kv_num);
CHK_Z(_init_kv_pairs(kv_num, kv, "key", "value"));

for (size_t ii=0; ii<num/2; ++ii) {
CHK_Z(db->setSN(ii+1, kv[ii]));
}
CHK_Z(db->sync(false));

// Remove backup file
TestSuite::remove(filename + "/log0000_manifest.bak");

for (size_t ii=num/2; ii<num; ++ii) {
CHK_Z(db->setSN(ii+1, kv[ii]));
}

// Directly sync to generate new backup file
db->sync();
CHK_Z(jungle::DB::close(db));

// Corrupt manifest file.
CHK_Z(inject_crc_error(filename + "/log0000_manifest"));

// Restore from a backup file.
CHK_Z(jungle::DB::open(&db, filename, config));

// Get last seq num.
uint64_t last_seqnum;
CHK_Z(db->getMaxSeqNum(last_seqnum));

// Get check.
for (size_t ii=1; ii<=last_seqnum; ++ii) {
TestSuite::setInfo("ii=%zu", ii);
jungle::KV kv_out;
CHK_Z(db->getSN(ii, kv_out));
kv_out.free();
TestSuite::clearInfo();
}

// Set more, it will overwrite previous log files.
std::vector<jungle::KV> kv_after(kv_num);
CHK_Z(_init_kv_pairs(kv_num, kv_after, "key", "value_after_crash"));
for (size_t ii=last_seqnum+1; ii<=last_seqnum+5; ++ii) {
CHK_Z(db->setSN(ii, kv_after[ii-1]));
}

// Close & reopen.
CHK_Z(jungle::DB::close(db));
CHK_Z(jungle::DB::open(&db, filename, config));

// Get check.
for (size_t ii=1; ii<=last_seqnum+5; ++ii) {
TestSuite::setInfo("ii=%zu", ii);
jungle::KV kv_out;
CHK_Z(db->getSN(ii, kv_out));
if (ii <= last_seqnum) {
CHK_EQ(kv[ii-1].key, kv_out.key);
CHK_EQ(kv[ii-1].value, kv_out.value);
} else {
CHK_EQ(kv_after[ii-1].key, kv_out.key);
CHK_EQ(kv_after[ii-1].value, kv_out.value);
}
kv_out.free();
TestSuite::clearInfo();
}
CHK_Z(jungle::DB::close(db));

CHK_Z(jungle::shutdown());
CHK_Z(_free_kv_pairs(kv_num, kv));
CHK_Z(_free_kv_pairs(kv_num, kv_after));

TEST_SUITE_CLEANUP_PATH();
return 0;
}

int table_file_corruption_test(size_t failing_idx) {
std::string filename;
TEST_SUITE_PREPARE_PATH(filename);
Expand Down Expand Up @@ -1423,6 +1514,9 @@ int main(int argc, char** argv) {
ts.doTest("log manifest corruption across multi log files test",
log_manifest_corruption_across_file_test);

ts.doTest("restore from backup log manifest",
restore_from_backup_log_manifest);

ts.doTest("table file corruption test",
table_file_corruption_test,
TestRange<size_t>({2, 96}));
Expand Down

0 comments on commit fe88825

Please sign in to comment.