diff --git a/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result b/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result index ab9566667189..dddecddf6e16 100644 --- a/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result +++ b/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result @@ -4,11 +4,18 @@ Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. [connection master] call mtr.add_suppression("Commit order deadlock between"); +call mtr.add_suppression("Commit order deadlock found"); +call mtr.add_suppression("The slave coordinator and worker threads are stopped"); create table t1 (a int primary key, b int) engine = innodb; -insert into t1 values(3, 3); include/sync_slave_sql_with_master.inc stop slave; set @@global.debug = "+d,dbug.dep_fake_gap_lock_on_insert"; +SELECT VARIABLE_VALUE INTO @orig_deadlocks FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks'; +start slave; +"Case 1: Commit order deadlock when waiting for commit order" +insert into t1 values(3, 3); +include/sync_slave_sql_with_master.inc +stop slave; begin; insert into t1 values(1, 1); insert into t1 values(1, 1); @@ -25,9 +32,59 @@ a b 1 1 2 200 3 300 +delete from t1; +include/sync_slave_sql_with_master.inc +"Case 2: Commit order deadlock when waiting for dependencies" +insert into t1 values(3, 3); +include/sync_slave_sql_with_master.inc stop slave; -set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert"; +begin; +insert into t1 values(1, 1); +insert into t1 values(1, 1); +begin; +insert into t1 values(2, 2); +update t1 set b = 10 where a = 1; +commit; +update t1 set b = 20 where a = 2; +update t1 set b = 200 where a = 2; +update t1 set b = 30 where a = 3; +update t1 set b = 300 where a = 3; start slave; +rollback; +include/sync_slave_sql_with_master.inc +select * from t1; +a b +1 10 +2 200 +3 300 +delete from t1; +include/sync_slave_sql_with_master.inc +"Case 3: Secondary stops when retries are exhausted" +set @save.slave_transaction_retries= @@global.slave_transaction_retries; +set @@global.slave_transaction_retries= 0; +insert into t1 values(3, 3); +include/sync_slave_sql_with_master.inc +stop slave; +begin; +insert into t1 values(1, 1); +insert into t1 values(1, 1); +insert into t1 values(2, 2); +update t1 set b = 20 where a = 2; +update t1 set b = 200 where a = 2; +update t1 set b = 30 where a = 3; +update t1 set b = 300 where a = 3; +start slave; +rollback; +include/wait_for_slave_sql_to_stop.inc +set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert"; +include/start_slave.inc +include/sync_slave_sql_with_master.inc +select * from t1; +a b +1 1 +2 200 +3 300 +set @@global.slave_transaction_retries= @save.slave_transaction_retries; drop table t1; include/sync_slave_sql_with_master.inc include/rpl_end.inc diff --git a/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test b/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test index f8c11308b7e7..37f0e4494870 100644 --- a/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test +++ b/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test @@ -3,19 +3,30 @@ source include/have_mts_dependency_replication_stmt.inc; source include/have_debug.inc; call mtr.add_suppression("Commit order deadlock between"); +call mtr.add_suppression("Commit order deadlock found"); +call mtr.add_suppression("The slave coordinator and worker threads are stopped"); connection master; create table t1 (a int primary key, b int) engine = innodb; -insert into t1 values(3, 3); source include/sync_slave_sql_with_master.inc; connection slave; stop slave; # We'll take a fake gap lock after execution of every insert event set @@global.debug = "+d,dbug.dep_fake_gap_lock_on_insert"; +SELECT VARIABLE_VALUE INTO @orig_deadlocks FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks'; +start slave; + + +echo "Case 1: Commit order deadlock when waiting for commit order"; +connection master; +insert into t1 values(3, 3); +source include/sync_slave_sql_with_master.inc; -# Start a trx to block the 1st insert that primary will send to create a gap in -# commit order +connection slave; +stop slave; + +# Start a trx to block the 1st insert that primary will send to create a gap in commit order begin; insert into t1 values(1, 1); @@ -47,7 +58,7 @@ rollback; # Wait for 2nd transcation to be retried after receiving the commit order # deadlock signal -let $wait_condition= SELECT VARIABLE_VALUE = 1 +let $wait_condition= SELECT VARIABLE_VALUE - @orig_deadlocks = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks'; let $wait_timeout= 120; @@ -59,9 +70,136 @@ source include/sync_slave_sql_with_master.inc; connection slave; select * from t1; +connection master; +delete from t1; +source include/sync_slave_sql_with_master.inc; + + +echo "Case 2: Commit order deadlock when waiting for dependencies"; +connection master; +insert into t1 values(3, 3); +source include/sync_slave_sql_with_master.inc; + +connection slave; stop slave; -set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert"; + +# Start a trx to block the 1st insert that primary will send to create a gap in commit order +begin; +insert into t1 values(1, 1); + +connection master; +insert into t1 values(1, 1); # this will be blocked by trx above + +begin; +insert into t1 values(2, 2); # this will take the fake gap +update t1 set b = 10 where a = 1; # this will wait for above trx due to deps +commit; + +update t1 set b = 20 where a = 2; # this will wait for above trx due to deps +update t1 set b = 200 where a = 2; # this will wait for above trx due to deps +update t1 set b = 30 where a = 3; # this will wait for commit order +update t1 set b = 300 where a = 3; # this will wait for above trx due to deps + +connection slave1; start slave; +# Wait for the 5th trx to start waiting for commit ordering +let $wait_condition= SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST +WHERE STATE LIKE "%Waiting for preceding transaction to commit%"; +let $wait_timeout= 120; +source include/wait_condition.inc; + +# Wait for the 2nd, 3rd, 4th and 6th trx to wait for dependencies +let $wait_condition= SELECT COUNT(*) = 4 FROM INFORMATION_SCHEMA.PROCESSLIST +WHERE STATE LIKE "%Waiting for dependencies to be satisfied%"; +let $wait_timeout= 120; +source include/wait_condition.inc; + +connection slave; +# unblock 1st insert trx, it'll try to lock fake gap and fire the callback +rollback; + +# Wait for 2nd transcation to be retried after receiving the commit order +# deadlock signal +let $wait_condition= SELECT VARIABLE_VALUE - @orig_deadlocks = 2 +FROM INFORMATION_SCHEMA.GLOBAL_STATUS +WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks'; +let $wait_timeout= 120; +source include/wait_condition.inc; + +connection master; +source include/sync_slave_sql_with_master.inc; + +connection slave; +select * from t1; + +connection master; +delete from t1; +source include/sync_slave_sql_with_master.inc; + + +echo "Case 3: Secondary stops when retries are exhausted"; +connection slave; +set @save.slave_transaction_retries= @@global.slave_transaction_retries; +set @@global.slave_transaction_retries= 0; # trxs will not be retried + +connection master; +insert into t1 values(3, 3); +source include/sync_slave_sql_with_master.inc; + +connection slave; +stop slave; + +# Start a trx to block the 1st insert that primary will send to create a gap in commit order +begin; +insert into t1 values(1, 1); + +connection master; +insert into t1 values(1, 1); # this will be blocked by trx above +insert into t1 values(2, 2); # this will take the fake gap lock and wait for commit order +update t1 set b = 20 where a = 2; # this will wait for above trx due to deps +update t1 set b = 200 where a = 2; # this will wait for above trx due to deps +update t1 set b = 30 where a = 3; # this will wait for commit order +update t1 set b = 300 where a = 3; # this will wait for above trx due to deps + +connection slave1; +start slave; +# Wait for the 2nd and 5th trx to start waiting for commit ordering +let $wait_condition= SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST +WHERE STATE LIKE "%Waiting for preceding transaction to commit%"; +let $wait_timeout= 120; +source include/wait_condition.inc; + +# Wait for the 3rd, 4th and 6th trx to wait for dependencies +let $wait_condition= SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST +WHERE STATE LIKE "%Waiting for dependencies to be satisfied%"; +let $wait_timeout= 120; +source include/wait_condition.inc; + +connection slave; +# unblock 1st insert trx, it'll try to lock fake gap and fire the callback +rollback; + +# Wait for 2nd transcation to be retried after receiving the commit order +# deadlock signal +let $wait_condition= SELECT VARIABLE_VALUE - @orig_deadlocks = 3 +FROM INFORMATION_SCHEMA.GLOBAL_STATUS +WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks'; +let $wait_timeout= 120; +source include/wait_condition.inc; + +# secondary should stop +source include/wait_for_slave_sql_to_stop.inc; + +# remove fake gap lock and start again +set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert"; +source include/start_slave.inc; + +connection master; +source include/sync_slave_sql_with_master.inc; + +connection slave; +select * from t1; +set @@global.slave_transaction_retries= @save.slave_transaction_retries; connection master; drop table t1; diff --git a/sql/log_event_wrapper.cc b/sql/log_event_wrapper.cc index 2a619557b116..27c005f02f63 100644 --- a/sql/log_event_wrapper.cc +++ b/sql/log_event_wrapper.cc @@ -25,9 +25,14 @@ bool Log_event_wrapper::wait(Slave_worker *worker) &old_stage); while (!info_thd->killed && worker->running_status == Slave_worker::RUNNING && - dependencies) + dependencies && + !worker->found_order_commit_deadlock()) { - mysql_cond_wait(&cond, &mutex); + const auto timeout_nsec= + worker->c_rli->mts_dependency_cond_wait_timeout * 1000000; + struct timespec abstime; + set_timespec_nsec(abstime, timeout_nsec); + mysql_cond_timedwait(&cond, &mutex, &abstime); } info_thd->EXIT_COND(&old_stage); return !info_thd->killed && worker->running_status == Slave_worker::RUNNING; @@ -50,7 +55,11 @@ std::shared_ptr Log_event_wrapper::next() !next_ev) { ++worker->c_rli->next_event_waits; - mysql_cond_wait(&next_event_cond, &mutex); + const auto timeout_nsec= + worker->c_rli->mts_dependency_cond_wait_timeout * 1000000; + struct timespec abstime; + set_timespec_nsec(abstime, timeout_nsec); + mysql_cond_timedwait(&next_event_cond, &mutex, &abstime); } info_thd->EXIT_COND(&old_stage); return next_ev; diff --git a/sql/rpl_rli_pdb.cc b/sql/rpl_rli_pdb.cc index dfe68216aad6..dcaa04b87e1a 100644 --- a/sql/rpl_rli_pdb.cc +++ b/sql/rpl_rli_pdb.cc @@ -1368,6 +1368,29 @@ Slave_worker *get_least_occupied_worker(DYNAMIC_ARRAY *ws) DBUG_RETURN(worker); } +bool Slave_worker::worker_sleep(ulong secs) +{ + bool ret= false; + struct timespec abstime; + mysql_mutex_t *lock= &jobs_lock; + mysql_cond_t *cond= &jobs_cond; + + /* Absolute system time at which the sleep time expires. */ + set_timespec(abstime, secs); + + mysql_mutex_lock(lock); + info_thd->ENTER_COND(cond, lock, nullptr, nullptr); + + while (!(ret = info_thd->killed || running_status != RUNNING)) + { + int error= mysql_cond_timedwait(cond, lock, &abstime); + if (error == ETIMEDOUT || error == ETIME) break; + } + + info_thd->EXIT_COND(nullptr); + return ret; +} + /** Deallocation routine to cancel out few effects of @c map_db_to_worker(). @@ -1444,8 +1467,14 @@ void Slave_worker::slave_worker_ends_group(Log_event* ev, int &error, trans_retries++; error = 0; // Reset the error to avoid worker thread reporting an error. temporary_error = true; - reset_order_commit_deadlock(); cleanup_context(info_thd, 1); + // slightly more sleep when commit order deadlock is found so that the + // earlier trx can race forward + ulong sleep_sec= + found_order_commit_deadlock() ? trans_retries + 1 : trans_retries; + sleep_sec= min(sleep_sec, MAX_SLAVE_RETRY_PAUSE); + reset_order_commit_deadlock(); + worker_sleep(sleep_sec); DBUG_VOID_RETURN; } else if (running_status != STOP_ACCEPTED) @@ -2491,6 +2520,13 @@ int slave_worker_exec_job(Slave_worker *worker, Relay_log_info *rli) error= -1; goto err; } + + if (worker->found_order_commit_deadlock()) + { + error= ER_LOCK_DEADLOCK; + goto err; + } + worker->current_event_index++; ev= static_cast(job_item->data); diff --git a/sql/rpl_rli_pdb.h b/sql/rpl_rli_pdb.h index d23717632ae1..51dcf49548d8 100644 --- a/sql/rpl_rli_pdb.h +++ b/sql/rpl_rli_pdb.h @@ -458,6 +458,7 @@ class Slave_worker : public Relay_log_info int flush_info(bool force= FALSE); static size_t get_number_worker_fields(); void slave_worker_ends_group(Log_event*, int&, bool&); + bool worker_sleep(ulong); const char *get_master_log_name(); ulonglong get_master_log_pos() { return master_log_pos; }; ulonglong set_master_log_pos(ulong val) { return master_log_pos= val; }; diff --git a/sql/rpl_slave.h b/sql/rpl_slave.h index b2af88294b8f..bf31953f7fc1 100644 --- a/sql/rpl_slave.h +++ b/sql/rpl_slave.h @@ -57,6 +57,7 @@ typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL, SLAVE_THD_WORKER } SLAVE_THD_TYPE; #define MTS_WORKER_UNDEF ((ulong) -1) #define MTS_MAX_WORKERS 1024 +#define MAX_SLAVE_RETRY_PAUSE 5 /* When using tables to store the slave workers bitmaps, diff --git a/sql/rpl_slave_commit_order_manager.cc b/sql/rpl_slave_commit_order_manager.cc index 67be9461b713..7afc0de6872d 100644 --- a/sql/rpl_slave_commit_order_manager.cc +++ b/sql/rpl_slave_commit_order_manager.cc @@ -141,6 +141,11 @@ void Commit_order_manager::report_rollback(Slave_worker *worker) { DBUG_ENTER("Commit_order_manager::report_rollback"); + // Without resetting order commit deadlock flag wait_for_its_turn will return + // immediately without changing state and due to that unregister_trx() will be + // a no-op. + worker->reset_order_commit_deadlock(); + (void) wait_for_its_turn(worker, true); /* No worker can set m_rollback_trx unless it is its turn to commit */ m_rollback_trx.store(true); @@ -152,11 +157,16 @@ void Commit_order_manager::report_rollback(Slave_worker *worker) void Commit_order_manager::report_deadlock(Slave_worker *worker) { DBUG_ENTER("Commit_order_manager::report_deadlock"); - mysql_mutex_lock(&m_queue_mutex); + THD *thd= worker->info_thd; + mysql_mutex_lock(&thd->LOCK_thd_data); ++slave_commit_order_deadlocks; worker->report_order_commit_deadlock(); - mysql_cond_signal(&m_workers[worker->id].cond); - mysql_mutex_unlock(&m_queue_mutex); + /* Let's signal to any wait loop this worker is executing, this will also + * cover the wait loop in wait_for_its_turn() above. + * NOTE: we just want to send a signal without changing the killed flag + */ + thd->awake(thd->killed); + mysql_mutex_unlock(&thd->LOCK_thd_data); DBUG_VOID_RETURN; }