Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to configure the compaction checker policy #1325

Merged
merged 6 commits into from
Mar 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions kvrocks.conf
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,18 @@ profiling-sample-record-threshold-ms 100
# 0-7am every day.
compaction-checker-range 0-7

# When the compaction checker is triggered, the db will periodically pick the SST file
# with the highest "deleted percentage" (i.e. the percentage of deleted keys in the SST
# file) to compact, in order to free disk space.
# However, if a specific SST file was created more than "force-compact-file-age" seconds
# ago, and its percentage of deleted keys is higher than
# "force-compact-file-min-deleted-percentage", it will be forcely compacted as well.

# Default: 172800 seconds; Range: [60, INT64_MAX];
# force-compact-file-age 172800
# Default: 10 %; Range: [1, 100];
# force-compact-file-min-deleted-percentage 10

# Bgsave scheduler, auto bgsave at scheduled time
# time expression format is the same as crontab(currently only support * and int)
# e.g. bgsave-cron 0 3 * * * 0 4 * * *
Expand Down
3 changes: 3 additions & 0 deletions src/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ Config::Config() {
{"replica-announce-ip", false, new StringField(&replica_announce_ip, "")},
{"replica-announce-port", false, new UInt32Field(&replica_announce_port, 0, 0, PORT_LIMIT)},
{"compaction-checker-range", false, new StringField(&compaction_checker_range_, "")},
{"force-compact-file-age", false, new Int64Field(&force_compact_file_age, 2 * 24 * 3600, 60, INT64_MAX)},
{"force-compact-file-min-deleted-percentage", false,
new IntField(&force_compact_file_min_deleted_percentage, 10, 1, 100)},
{"db-name", true, new StringField(&db_name, "change.me.db")},
{"dir", true, new StringField(&dir, "/tmp/kvrocks")},
{"backup-dir", false, new StringField(&backup_dir, "")},
Expand Down
2 changes: 2 additions & 0 deletions src/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ struct Config {
Cron compact_cron;
Cron bgsave_cron;
CompactionCheckerRange compaction_checker_range{-1, -1};
int64_t force_compact_file_age;
int force_compact_file_min_deleted_percentage;
std::map<std::string, std::string> tokens;
std::string replica_announce_ip;
uint32_t replica_announce_port = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/server/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ Status Server::Start() {
compaction_checker_thread_ = GET_OR_RET(Util::CreateThread("compact-check", [this] {
uint64_t counter = 0;
time_t last_compact_date = 0;
CompactionChecker compaction_checker(this->storage_);
CompactionChecker compaction_checker{this->storage_};

while (!stop_) {
// Sleep first
Expand Down
27 changes: 18 additions & 9 deletions src/storage/compaction_checker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,12 @@ void CompactionChecker::PickCompactionFiles(const std::string &cf_name) {
if (props.size() / 360 > maxFilesToCompact) {
maxFilesToCompact = props.size() / 360;
}
int64_t forceCompactSeconds = 2 * 24 * 3600;
int64_t now = Util::GetTimeStamp();

auto force_compact_file_age = storage_->GetConfig()->force_compact_file_age;
auto force_compact_min_ratio =
static_cast<double>(storage_->GetConfig()->force_compact_file_min_deleted_percentage / 100);

std::string best_filename;
double best_delete_ratio = 0;
int64_t total_keys = 0, deleted_keys = 0;
Expand All @@ -76,8 +80,6 @@ void CompactionChecker::PickCompactionFiles(const std::string &cf_name) {
}
}

// don't compact the SST created in 1 hour
if (file_creation_time > static_cast<uint64_t>(now - 3600)) continue;
for (const auto &property_iter : iter.second->user_collected_properties) {
if (property_iter.first == "total_keys") {
auto parse_result = ParseInt<int>(property_iter.second, 10);
Expand All @@ -104,16 +106,23 @@ void CompactionChecker::PickCompactionFiles(const std::string &cf_name) {
}

if (start_key.empty() || stop_key.empty()) continue;
// pick the file which was created more than 2 days
if (file_creation_time < static_cast<uint64_t>(now - forceCompactSeconds)) {
LOG(INFO) << "[compaction checker] Going to compact the key in file(created more than 2 days): " << iter.first;
s = storage_->Compact(&start_key, &stop_key);
LOG(INFO) << "[compaction checker] Compact the key in file(created more than 2 days): " << iter.first
double delete_ratio = static_cast<double>(deleted_keys) / static_cast<double>(total_keys);

// pick the file according to force compact policy
if (file_creation_time < static_cast<uint64_t>(now - force_compact_file_age) &&
delete_ratio >= force_compact_min_ratio) {
LOG(INFO) << "[compaction checker] Going to compact the key in file (force compact policy): " << iter.first;
auto s = storage_->Compact(&start_key, &stop_key);
LOG(INFO) << "[compaction checker] Compact the key in file (force compact policy): " << iter.first
<< " finished, result: " << s.ToString();
maxFilesToCompact--;
continue;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@git-hulk @PragmaTwice @xiaobiaozhao I kindly ask you to pay attention to this new continue sentence because this will actually change how the compaction_checker works.

I add this because if a SST file has been compacted due to the "force compact policy", it should not be considered as a possible candidate with the best_delete_ratio. If it is not so skipped, the file may be compacted twice in a single call of PickCompactionFiles, from my point of view.

If any of you think this change is not proper please let me know.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ellutionist Thanks for your point out. You're right that is unnecessary to check again.

}

// don't compact the SST created in 1 hour
if (file_creation_time > static_cast<uint64_t>(now - 3600)) continue;

// pick the file which has highest delete ratio
double delete_ratio = static_cast<double>(deleted_keys) / static_cast<double>(total_keys);
if (total_keys != 0 && delete_ratio > best_delete_ratio) {
best_delete_ratio = delete_ratio;
best_filename = iter.first;
Expand Down
1 change: 1 addition & 0 deletions src/storage/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class Storage {
uint64_t GetCompactionCount() { return compaction_count_; }
void IncrCompactionCount(uint64_t n) { compaction_count_.fetch_add(n); }
bool IsSlotIdEncoded() { return config_->slot_id_encoded; }
const Config *GetConfig() { return config_; }

Status BeginTxn();
Status CommitTxn();
Expand Down