Skip to content

Commit

Permalink
Fixes to secondary commit order deadlock handling
Browse files Browse the repository at this point in the history
Summary:
Three fixes were made:
 - Resetting the deadlock flag when rolling back trxs so that the
   secondary stops correctly on error.
 - Adding sleeps in before retrying trxs to avoid the same deadlock to
   happen repeatedly.
 - Signaling wait loop on report_deadlock() generically using
   THD::awake() to cover cases where worker is waiting for deps to be
   satisfied when deadlock is detected.

Squash with D28619160 (facebook@ce8c474)

Reviewed By: hermanlee

Differential Revision: D29669532

fbshipit-source-id: a4a1f23482c
  • Loading branch information
abhinav04sharma authored and pull[bot] committed Feb 17, 2022
1 parent ba68465 commit 9360242
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@ Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
call mtr.add_suppression("Commit order deadlock between");
call mtr.add_suppression("Commit order deadlock found");
call mtr.add_suppression("The slave coordinator and worker threads are stopped");
create table t1 (a int primary key, b int) engine = innodb;
insert into t1 values(3, 3);
include/sync_slave_sql_with_master.inc
stop slave;
set @@global.debug = "+d,dbug.dep_fake_gap_lock_on_insert";
SELECT VARIABLE_VALUE INTO @orig_deadlocks FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks';
start slave;
"Case 1: Commit order deadlock when waiting for commit order"
insert into t1 values(3, 3);
include/sync_slave_sql_with_master.inc
stop slave;
begin;
insert into t1 values(1, 1);
insert into t1 values(1, 1);
Expand All @@ -25,9 +32,59 @@ a b
1 1
2 200
3 300
delete from t1;
include/sync_slave_sql_with_master.inc
"Case 2: Commit order deadlock when waiting for dependencies"
insert into t1 values(3, 3);
include/sync_slave_sql_with_master.inc
stop slave;
set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert";
begin;
insert into t1 values(1, 1);
insert into t1 values(1, 1);
begin;
insert into t1 values(2, 2);
update t1 set b = 10 where a = 1;
commit;
update t1 set b = 20 where a = 2;
update t1 set b = 200 where a = 2;
update t1 set b = 30 where a = 3;
update t1 set b = 300 where a = 3;
start slave;
rollback;
include/sync_slave_sql_with_master.inc
select * from t1;
a b
1 10
2 200
3 300
delete from t1;
include/sync_slave_sql_with_master.inc
"Case 3: Secondary stops when retries are exhausted"
set @save.slave_transaction_retries= @@global.slave_transaction_retries;
set @@global.slave_transaction_retries= 0;
insert into t1 values(3, 3);
include/sync_slave_sql_with_master.inc
stop slave;
begin;
insert into t1 values(1, 1);
insert into t1 values(1, 1);
insert into t1 values(2, 2);
update t1 set b = 20 where a = 2;
update t1 set b = 200 where a = 2;
update t1 set b = 30 where a = 3;
update t1 set b = 300 where a = 3;
start slave;
rollback;
include/wait_for_slave_sql_to_stop.inc
set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert";
include/start_slave.inc
include/sync_slave_sql_with_master.inc
select * from t1;
a b
1 1
2 200
3 300
set @@global.slave_transaction_retries= @save.slave_transaction_retries;
drop table t1;
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,30 @@ source include/have_mts_dependency_replication_stmt.inc;
source include/have_debug.inc;

call mtr.add_suppression("Commit order deadlock between");
call mtr.add_suppression("Commit order deadlock found");
call mtr.add_suppression("The slave coordinator and worker threads are stopped");

connection master;
create table t1 (a int primary key, b int) engine = innodb;
insert into t1 values(3, 3);
source include/sync_slave_sql_with_master.inc;

connection slave;
stop slave;
# We'll take a fake gap lock after execution of every insert event
set @@global.debug = "+d,dbug.dep_fake_gap_lock_on_insert";
SELECT VARIABLE_VALUE INTO @orig_deadlocks FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks';
start slave;


echo "Case 1: Commit order deadlock when waiting for commit order";
connection master;
insert into t1 values(3, 3);
source include/sync_slave_sql_with_master.inc;

# Start a trx to block the 1st insert that primary will send to create a gap in
# commit order
connection slave;
stop slave;

# Start a trx to block the 1st insert that primary will send to create a gap in commit order
begin;
insert into t1 values(1, 1);

Expand Down Expand Up @@ -47,7 +58,7 @@ rollback;

# Wait for 2nd transcation to be retried after receiving the commit order
# deadlock signal
let $wait_condition= SELECT VARIABLE_VALUE = 1
let $wait_condition= SELECT VARIABLE_VALUE - @orig_deadlocks = 1
FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks';
let $wait_timeout= 120;
Expand All @@ -59,9 +70,136 @@ source include/sync_slave_sql_with_master.inc;
connection slave;
select * from t1;

connection master;
delete from t1;
source include/sync_slave_sql_with_master.inc;


echo "Case 2: Commit order deadlock when waiting for dependencies";
connection master;
insert into t1 values(3, 3);
source include/sync_slave_sql_with_master.inc;

connection slave;
stop slave;
set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert";

# Start a trx to block the 1st insert that primary will send to create a gap in commit order
begin;
insert into t1 values(1, 1);

connection master;
insert into t1 values(1, 1); # this will be blocked by trx above

begin;
insert into t1 values(2, 2); # this will take the fake gap
update t1 set b = 10 where a = 1; # this will wait for above trx due to deps
commit;

update t1 set b = 20 where a = 2; # this will wait for above trx due to deps
update t1 set b = 200 where a = 2; # this will wait for above trx due to deps
update t1 set b = 30 where a = 3; # this will wait for commit order
update t1 set b = 300 where a = 3; # this will wait for above trx due to deps

connection slave1;
start slave;
# Wait for the 5th trx to start waiting for commit ordering
let $wait_condition= SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE STATE LIKE "%Waiting for preceding transaction to commit%";
let $wait_timeout= 120;
source include/wait_condition.inc;

# Wait for the 2nd, 3rd, 4th and 6th trx to wait for dependencies
let $wait_condition= SELECT COUNT(*) = 4 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE STATE LIKE "%Waiting for dependencies to be satisfied%";
let $wait_timeout= 120;
source include/wait_condition.inc;

connection slave;
# unblock 1st insert trx, it'll try to lock fake gap and fire the callback
rollback;

# Wait for 2nd transcation to be retried after receiving the commit order
# deadlock signal
let $wait_condition= SELECT VARIABLE_VALUE - @orig_deadlocks = 2
FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks';
let $wait_timeout= 120;
source include/wait_condition.inc;

connection master;
source include/sync_slave_sql_with_master.inc;

connection slave;
select * from t1;

connection master;
delete from t1;
source include/sync_slave_sql_with_master.inc;


echo "Case 3: Secondary stops when retries are exhausted";
connection slave;
set @save.slave_transaction_retries= @@global.slave_transaction_retries;
set @@global.slave_transaction_retries= 0; # trxs will not be retried

connection master;
insert into t1 values(3, 3);
source include/sync_slave_sql_with_master.inc;

connection slave;
stop slave;

# Start a trx to block the 1st insert that primary will send to create a gap in commit order
begin;
insert into t1 values(1, 1);

connection master;
insert into t1 values(1, 1); # this will be blocked by trx above
insert into t1 values(2, 2); # this will take the fake gap lock and wait for commit order
update t1 set b = 20 where a = 2; # this will wait for above trx due to deps
update t1 set b = 200 where a = 2; # this will wait for above trx due to deps
update t1 set b = 30 where a = 3; # this will wait for commit order
update t1 set b = 300 where a = 3; # this will wait for above trx due to deps

connection slave1;
start slave;
# Wait for the 2nd and 5th trx to start waiting for commit ordering
let $wait_condition= SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE STATE LIKE "%Waiting for preceding transaction to commit%";
let $wait_timeout= 120;
source include/wait_condition.inc;

# Wait for the 3rd, 4th and 6th trx to wait for dependencies
let $wait_condition= SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE STATE LIKE "%Waiting for dependencies to be satisfied%";
let $wait_timeout= 120;
source include/wait_condition.inc;

connection slave;
# unblock 1st insert trx, it'll try to lock fake gap and fire the callback
rollback;

# Wait for 2nd transcation to be retried after receiving the commit order
# deadlock signal
let $wait_condition= SELECT VARIABLE_VALUE - @orig_deadlocks = 3
FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks';
let $wait_timeout= 120;
source include/wait_condition.inc;

# secondary should stop
source include/wait_for_slave_sql_to_stop.inc;

# remove fake gap lock and start again
set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert";
source include/start_slave.inc;

connection master;
source include/sync_slave_sql_with_master.inc;

connection slave;
select * from t1;
set @@global.slave_transaction_retries= @save.slave_transaction_retries;

connection master;
drop table t1;
Expand Down
15 changes: 12 additions & 3 deletions sql/log_event_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@ bool Log_event_wrapper::wait(Slave_worker *worker)
&old_stage);
while (!info_thd->killed &&
worker->running_status == Slave_worker::RUNNING &&
dependencies)
dependencies &&
!worker->found_order_commit_deadlock())
{
mysql_cond_wait(&cond, &mutex);
const auto timeout_nsec=
worker->c_rli->mts_dependency_cond_wait_timeout * 1000000;
struct timespec abstime;
set_timespec_nsec(abstime, timeout_nsec);
mysql_cond_timedwait(&cond, &mutex, &abstime);
}
info_thd->EXIT_COND(&old_stage);
return !info_thd->killed && worker->running_status == Slave_worker::RUNNING;
Expand All @@ -50,7 +55,11 @@ std::shared_ptr<Log_event_wrapper> Log_event_wrapper::next()
!next_ev)
{
++worker->c_rli->next_event_waits;
mysql_cond_wait(&next_event_cond, &mutex);
const auto timeout_nsec=
worker->c_rli->mts_dependency_cond_wait_timeout * 1000000;
struct timespec abstime;
set_timespec_nsec(abstime, timeout_nsec);
mysql_cond_timedwait(&next_event_cond, &mutex, &abstime);
}
info_thd->EXIT_COND(&old_stage);
return next_ev;
Expand Down
38 changes: 37 additions & 1 deletion sql/rpl_rli_pdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1368,6 +1368,29 @@ Slave_worker *get_least_occupied_worker(DYNAMIC_ARRAY *ws)
DBUG_RETURN(worker);
}

bool Slave_worker::worker_sleep(ulong secs)
{
bool ret= false;
struct timespec abstime;
mysql_mutex_t *lock= &jobs_lock;
mysql_cond_t *cond= &jobs_cond;

/* Absolute system time at which the sleep time expires. */
set_timespec(abstime, secs);

mysql_mutex_lock(lock);
info_thd->ENTER_COND(cond, lock, nullptr, nullptr);

while (!(ret = info_thd->killed || running_status != RUNNING))
{
int error= mysql_cond_timedwait(cond, lock, &abstime);
if (error == ETIMEDOUT || error == ETIME) break;
}

info_thd->EXIT_COND(nullptr);
return ret;
}

/**
Deallocation routine to cancel out few effects of
@c map_db_to_worker().
Expand Down Expand Up @@ -1444,8 +1467,14 @@ void Slave_worker::slave_worker_ends_group(Log_event* ev, int &error,
trans_retries++;
error = 0; // Reset the error to avoid worker thread reporting an error.
temporary_error = true;
reset_order_commit_deadlock();
cleanup_context(info_thd, 1);
// slightly more sleep when commit order deadlock is found so that the
// earlier trx can race forward
ulong sleep_sec=
found_order_commit_deadlock() ? trans_retries + 1 : trans_retries;
sleep_sec= min<ulong>(sleep_sec, MAX_SLAVE_RETRY_PAUSE);
reset_order_commit_deadlock();
worker_sleep(sleep_sec);
DBUG_VOID_RETURN;
}
else if (running_status != STOP_ACCEPTED)
Expand Down Expand Up @@ -2491,6 +2520,13 @@ int slave_worker_exec_job(Slave_worker *worker, Relay_log_info *rli)
error= -1;
goto err;
}

if (worker->found_order_commit_deadlock())
{
error= ER_LOCK_DEADLOCK;
goto err;
}

worker->current_event_index++;
ev= static_cast<Log_event*>(job_item->data);

Expand Down
1 change: 1 addition & 0 deletions sql/rpl_rli_pdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ class Slave_worker : public Relay_log_info
int flush_info(bool force= FALSE);
static size_t get_number_worker_fields();
void slave_worker_ends_group(Log_event*, int&, bool&);
bool worker_sleep(ulong);
const char *get_master_log_name();
ulonglong get_master_log_pos() { return master_log_pos; };
ulonglong set_master_log_pos(ulong val) { return master_log_pos= val; };
Expand Down
1 change: 1 addition & 0 deletions sql/rpl_slave.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL, SLAVE_THD_WORKER } SLAVE_THD_TYPE;

#define MTS_WORKER_UNDEF ((ulong) -1)
#define MTS_MAX_WORKERS 1024
#define MAX_SLAVE_RETRY_PAUSE 5

/*
When using tables to store the slave workers bitmaps,
Expand Down
16 changes: 13 additions & 3 deletions sql/rpl_slave_commit_order_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ void Commit_order_manager::report_rollback(Slave_worker *worker)
{
DBUG_ENTER("Commit_order_manager::report_rollback");

// Without resetting order commit deadlock flag wait_for_its_turn will return
// immediately without changing state and due to that unregister_trx() will be
// a no-op.
worker->reset_order_commit_deadlock();

(void) wait_for_its_turn(worker, true);
/* No worker can set m_rollback_trx unless it is its turn to commit */
m_rollback_trx.store(true);
Expand All @@ -152,11 +157,16 @@ void Commit_order_manager::report_rollback(Slave_worker *worker)
void Commit_order_manager::report_deadlock(Slave_worker *worker)
{
DBUG_ENTER("Commit_order_manager::report_deadlock");
mysql_mutex_lock(&m_queue_mutex);
THD *thd= worker->info_thd;
mysql_mutex_lock(&thd->LOCK_thd_data);
++slave_commit_order_deadlocks;
worker->report_order_commit_deadlock();
mysql_cond_signal(&m_workers[worker->id].cond);
mysql_mutex_unlock(&m_queue_mutex);
/* Let's signal to any wait loop this worker is executing, this will also
* cover the wait loop in wait_for_its_turn() above.
* NOTE: we just want to send a signal without changing the killed flag
*/
thd->awake(thd->killed);
mysql_mutex_unlock(&thd->LOCK_thd_data);
DBUG_VOID_RETURN;
}

0 comments on commit 9360242

Please sign in to comment.