Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(duplication): prevent plog files from being removed by GC while they are being checked by duplication #1597

Merged
merged 17 commits into from
Mar 28, 2024
Merged
4 changes: 4 additions & 0 deletions src/replica/duplication/load_from_private_log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "replica/replica.h"
#include "replica_duplicator.h"
#include "utils/autoref_ptr.h"
#include "utils/defer.h"
#include "utils/error_code.h"
#include "utils/errors.h"
#include "utils/fail_point.h"
Expand Down Expand Up @@ -146,6 +147,9 @@ void load_from_private_log::run()

void load_from_private_log::find_log_file_to_start()
{
_duplicator->set_duplication_plog_checking(true);
empiredan marked this conversation as resolved.
Show resolved Hide resolved
empiredan marked this conversation as resolved.
Show resolved Hide resolved
auto cleanup = dsn::defer([this]() { _duplicator->set_duplication_plog_checking(false); });

// `file_map` has already excluded the useless log files during replica init.
const auto &file_map = _private_log->get_log_file_map();

Expand Down
4 changes: 3 additions & 1 deletion src/replica/duplication/load_from_private_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ class load_from_private_log final : public replica_base,

/// Find the log file that contains `_start_decree`.
void find_log_file_to_start();
void find_log_file_to_start(const mutation_log::log_file_map_by_index &log_files);

void replay_log_block();

Expand All @@ -82,6 +81,9 @@ class load_from_private_log final : public replica_base,
static constexpr int MAX_ALLOWED_BLOCK_REPEATS{3};
static constexpr int MAX_ALLOWED_FILE_REPEATS{10};

private:
void find_log_file_to_start(const mutation_log::log_file_map_by_index &log_files);

private:
friend class load_from_private_log_test;
friend class load_fail_mode_test;
Expand Down
5 changes: 5 additions & 0 deletions src/replica/duplication/replica_duplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,5 +262,10 @@ uint64_t replica_duplicator::get_pending_mutations_count() const
return cnt > 0 ? static_cast<uint64_t>(cnt) : 0;
}

void replica_duplicator::set_duplication_plog_checking(bool checking)
{
_replica->set_duplication_plog_checking(checking);
}

} // namespace replication
} // namespace dsn
4 changes: 3 additions & 1 deletion src/replica/duplication/replica_duplicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ class replica_duplicator : public replica_base, public pipeline::base
// For metric "dup.pending_mutations_count"
uint64_t get_pending_mutations_count() const;

duplication_status::type status() const { return _status; };
duplication_status::type status() const { return _status; }

void set_duplication_plog_checking(bool checking);

private:
friend class duplication_test_base;
Expand Down
8 changes: 8 additions & 0 deletions src/replica/replica.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
replica_duplicator_manager *get_duplication_manager() const { return _duplication_mgr.get(); }
bool is_duplication_master() const { return _is_duplication_master; }
bool is_duplication_follower() const { return _is_duplication_follower; }
bool is_duplication_plog_checking() const { return _is_duplication_plog_checking.load(); }
void set_duplication_plog_checking(bool checking)
{
_is_duplication_plog_checking.store(checking);
}

//
// Backup
Expand Down Expand Up @@ -625,6 +630,9 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
bool _is_manual_emergency_checkpointing{false};
bool _is_duplication_master{false};
bool _is_duplication_follower{false};
// Indicate whether the replica is during finding out some private logs to
// load for duplication. It useful to prevent plog GCed unexpectedly.
std::atomic<bool> _is_duplication_plog_checking{false};
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved

// backup
std::unique_ptr<replica_backup_manager> _backup_mgr;
Expand Down
6 changes: 6 additions & 0 deletions src/replica/replica_chkpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ void replica::on_checkpoint_timer()
return;
}

if (is_duplication_plog_checking()) {
LOG_DEBUG_PREFIX("gc_private {}: skip gc because duplication is checking plog files",
enum_to_string(status()));
return;
}

tasking::enqueue(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS,
&_tracker,
[this, plog, cleanable_decree, valid_start_offset] {
Expand Down
Loading