diff --git a/include/mysql/service_thd_engine_lock.h b/include/mysql/service_thd_engine_lock.h new file mode 100644 index 000000000000..af5a1950ef2c --- /dev/null +++ b/include/mysql/service_thd_engine_lock.h @@ -0,0 +1,44 @@ + +/* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef MYSQL_SERVICE_THD_EGINE_LOCK_INCLUDED +#define MYSQL_SERVICE_THD_EGINE_LOCK_INCLUDED + +/** + @file include/mysql/service_thd_engine_lock.h + This service provides functions for storage engines to report + lock related activities. + SYNOPSIS + thd_row_lock_wait() - call it just when the engine find a transaction should + wait another transaction to realease a row lock + thd The session which is waiting for the row lock to release + thd_wait_for The session which is holding the row lock. +*/ + +#ifdef __cplusplus +class THD; +#else +#define THD void +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + void thd_report_row_lock_wait(THD* self, THD *wait_for); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/mysql-test/include/have_mts_dependency_replication_stmt.inc b/mysql-test/include/have_mts_dependency_replication_stmt.inc new file mode 100644 index 000000000000..67cf0b613065 --- /dev/null +++ b/mysql-test/include/have_mts_dependency_replication_stmt.inc @@ -0,0 +1,4 @@ +if (`SELECT @@GLOBAL.mts_dependency_replication != 'STMT'`) +{ + skip Test needs to run with STMT dependency replication; +} diff --git a/mysql-test/suite/innodb/r/innodb_row_lock_wait_callback.result b/mysql-test/suite/innodb/r/innodb_row_lock_wait_callback.result new file mode 100644 index 000000000000..1c7827c76f7d --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_row_lock_wait_callback.result @@ -0,0 +1,21 @@ +create table t1 (a int primary key, b int unique, c int) engine = innodb; +insert into t1 values(1, 1, 1); +insert into t1 values(10, 10, 10); +set @@global.debug = "+d,report_row_lock_wait"; +select @@tx_isolation; +@@tx_isolation +REPEATABLE-READ +begin; +delete from t1 where a > 5; +begin; +insert into t1 values(6, 6, 6); +set debug_sync="now wait_for signal.reached"; +set debug_sync="now signal signal.done"; +set @@global.debug = "-d,report_row_lock_wait"; +rollback; +select * from t1; +a b c +1 1 1 +6 6 6 +10 10 10 +drop table t1; diff --git a/mysql-test/suite/innodb/t/innodb_row_lock_wait_callback.test b/mysql-test/suite/innodb/t/innodb_row_lock_wait_callback.test new file mode 100644 index 000000000000..16b803aa8d4b --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_row_lock_wait_callback.test @@ -0,0 +1,30 @@ +source include/have_debug.inc; +source include/have_innodb.inc; +source include/have_debug_sync.inc; + +create table t1 (a int primary key, b int unique, c int) engine = innodb; +insert into t1 values(1, 1, 1); +insert into t1 values(10, 10, 10); +set @@global.debug = "+d,report_row_lock_wait"; + +select @@tx_isolation; + +connect (con1, localhost, root); +begin; +delete from t1 where a > 5; # this will take a gap lock + +connection default; +begin; +send insert into t1 values(6, 6, 6); # this will block on gap lock + +connection con1; +set debug_sync="now wait_for signal.reached"; # callback was fired +set debug_sync="now signal signal.done"; +set @@global.debug = "-d,report_row_lock_wait"; +rollback; +disconnect con1; + +connection default; +reap; +select * from t1; +drop table t1; diff --git a/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result b/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result new file mode 100644 index 000000000000..ab9566667189 --- /dev/null +++ b/mysql-test/suite/rpl_mts/r/rpl_mts_dependency_order_commits_deadlock.result @@ -0,0 +1,33 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +call mtr.add_suppression("Commit order deadlock between"); +create table t1 (a int primary key, b int) engine = innodb; +insert into t1 values(3, 3); +include/sync_slave_sql_with_master.inc +stop slave; +set @@global.debug = "+d,dbug.dep_fake_gap_lock_on_insert"; +begin; +insert into t1 values(1, 1); +insert into t1 values(1, 1); +insert into t1 values(2, 2); +update t1 set b = 20 where a = 2; +update t1 set b = 200 where a = 2; +update t1 set b = 30 where a = 3; +update t1 set b = 300 where a = 3; +start slave; +rollback; +include/sync_slave_sql_with_master.inc +select * from t1; +a b +1 1 +2 200 +3 300 +stop slave; +set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert"; +start slave; +drop table t1; +include/sync_slave_sql_with_master.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock-slave.opt b/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock-slave.opt new file mode 100644 index 000000000000..b8a1806d9ca3 --- /dev/null +++ b/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock-slave.opt @@ -0,0 +1,2 @@ +--slave_check_before_image_consistency=ON +--slave_parallel_workers=8 diff --git a/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test b/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test new file mode 100644 index 000000000000..f8c11308b7e7 --- /dev/null +++ b/mysql-test/suite/rpl_mts/t/rpl_mts_dependency_order_commits_deadlock.test @@ -0,0 +1,70 @@ +source include/master-slave.inc; +source include/have_mts_dependency_replication_stmt.inc; +source include/have_debug.inc; + +call mtr.add_suppression("Commit order deadlock between"); + +connection master; +create table t1 (a int primary key, b int) engine = innodb; +insert into t1 values(3, 3); +source include/sync_slave_sql_with_master.inc; + +connection slave; +stop slave; +# We'll take a fake gap lock after execution of every insert event +set @@global.debug = "+d,dbug.dep_fake_gap_lock_on_insert"; + +# Start a trx to block the 1st insert that primary will send to create a gap in +# commit order +begin; +insert into t1 values(1, 1); + +connection master; +insert into t1 values(1, 1); # this will be blocked by trx above +insert into t1 values(2, 2); # this will take the fake gap lock and wait for commit order +update t1 set b = 20 where a = 2; # this will wait for above trx due to deps +update t1 set b = 200 where a = 2; # this will wait for above trx due to deps +update t1 set b = 30 where a = 3; # this will wait for commit order +update t1 set b = 300 where a = 3; # this will wait for above trx due to deps + +connection slave1; +start slave; +# Wait for the 2nd and 5th trx to start waiting for commit ordering +let $wait_condition= SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST +WHERE STATE LIKE "%Waiting for preceding transaction to commit%"; +let $wait_timeout= 120; +source include/wait_condition.inc; + +# Wait for the 3rd, 4th and 6th trx to wait for dependencies +let $wait_condition= SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST +WHERE STATE LIKE "%Waiting for dependencies to be satisfied%"; +let $wait_timeout= 120; +source include/wait_condition.inc; + +connection slave; +# unblock 1st insert trx, it'll try to lock fake gap and fire the callback +rollback; + +# Wait for 2nd transcation to be retried after receiving the commit order +# deadlock signal +let $wait_condition= SELECT VARIABLE_VALUE = 1 +FROM INFORMATION_SCHEMA.GLOBAL_STATUS +WHERE VARIABLE_NAME = 'Slave_commit_order_deadlocks'; +let $wait_timeout= 120; +source include/wait_condition.inc; + +connection master; +source include/sync_slave_sql_with_master.inc; + +connection slave; +select * from t1; + +stop slave; +set @@global.debug = "-d,dbug.dep_fake_gap_lock_on_insert"; +start slave; + +connection master; +drop table t1; +source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; diff --git a/mysql-test/suite/sys_vars/r/innodb_enable_row_lock_wait_callback_basic.result b/mysql-test/suite/sys_vars/r/innodb_enable_row_lock_wait_callback_basic.result new file mode 100644 index 000000000000..f580a56df0cb --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_enable_row_lock_wait_callback_basic.result @@ -0,0 +1,20 @@ +Valid values are 'ON' and 'OFF' +select @@global.innodb_enable_row_lock_wait_callback; +@@global.innodb_enable_row_lock_wait_callback +1 +select @@session.innodb_enable_row_lock_wait_callback; +ERROR HY000: Variable 'innodb_enable_row_lock_wait_callback' is a GLOBAL variable +show global variables like 'innodb_enable_row_lock_wait_callback'; +Variable_name Value +innodb_enable_row_lock_wait_callback ON +show session variables like 'innodb_enable_row_lock_wait_callback'; +Variable_name Value +innodb_enable_row_lock_wait_callback ON +select * from information_schema.global_variables where variable_name='innodb_enable_row_lock_wait_callback'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENABLE_ROW_LOCK_WAIT_CALLBACK ON +select * from information_schema.session_variables where variable_name='innodb_enable_row_lock_wait_callback'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENABLE_ROW_LOCK_WAIT_CALLBACK ON +set session innodb_enable_row_lock_wait_callback=1; +ERROR HY000: Variable 'innodb_enable_row_lock_wait_callback' is a GLOBAL variable and should be set with SET GLOBAL diff --git a/mysql-test/suite/sys_vars/t/innodb_enable_row_lock_wait_callback_basic.test b/mysql-test/suite/sys_vars/t/innodb_enable_row_lock_wait_callback_basic.test new file mode 100644 index 000000000000..5d5affc192e2 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_enable_row_lock_wait_callback_basic.test @@ -0,0 +1,17 @@ +--source include/have_innodb.inc + +# Can only be set from the command line. +# show the global and session values; + +--echo Valid values are 'ON' and 'OFF' +select @@global.innodb_enable_row_lock_wait_callback; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_enable_row_lock_wait_callback; +show global variables like 'innodb_enable_row_lock_wait_callback'; +show session variables like 'innodb_enable_row_lock_wait_callback'; +select * from information_schema.global_variables where variable_name='innodb_enable_row_lock_wait_callback'; +select * from information_schema.session_variables where variable_name='innodb_enable_row_lock_wait_callback'; + +--error 1229 +set session innodb_enable_row_lock_wait_callback=1; + diff --git a/sql/dependency_slave_worker.cc b/sql/dependency_slave_worker.cc index 8a8476bdf169..e8604f9d36a1 100644 --- a/sql/dependency_slave_worker.cc +++ b/sql/dependency_slave_worker.cc @@ -3,6 +3,7 @@ #include "dependency_slave_worker.h" #include "log_event_wrapper.h" #include "rpl_slave_commit_order_manager.h" +#include <../include/mysql/service_thd_engine_lock.h> bool append_item_to_jobs(slave_job_item *job_item, @@ -89,9 +90,35 @@ bool Dependency_slave_worker::execute_group() c_rli->dependency_worker_error= true; break; } + + DBUG_EXECUTE_IF("dbug.dep_fake_gap_lock_on_insert", { + if (!ev->is_end_event && ev->raw_event() && + ev->raw_event()->get_type_code() == WRITE_ROWS_EVENT) + { + if (!c_rli->dep_fake_gap_lock.try_lock()) + { + thd_report_row_lock_wait( + info_thd, c_rli->dep_fake_gap_lock_worker->info_thd); + c_rli->dep_fake_gap_lock.lock(); + c_rli->dep_fake_gap_lock_worker= this; + } + else + { + c_rli->dep_fake_gap_lock_worker= this; + } + } + };); + // case: restart trx if temporary error, see @slave_worker_ends_group if (unlikely(trans_retries && current_event_index == 0)) { + DBUG_EXECUTE_IF("dbug.dep_fake_gap_lock_on_insert", { + if (this == c_rli->dep_fake_gap_lock_worker) + { + c_rli->dep_fake_gap_lock_worker= nullptr; + c_rli->dep_fake_gap_lock.unlock(); + } + };); ev= begin_event; continue; } @@ -99,6 +126,14 @@ bool Dependency_slave_worker::execute_group() ev= ev->next(); } + DBUG_EXECUTE_IF("dbug.dep_fake_gap_lock_on_insert", { + if (this == c_rli->dep_fake_gap_lock_worker) + { + c_rli->dep_fake_gap_lock_worker= nullptr; + c_rli->dep_fake_gap_lock.unlock(); + } + };); + // case: in case of error rollback if commit ordering is enabled if (unlikely(err && commit_order_mngr)) { diff --git a/sql/log_event.cc b/sql/log_event.cc index a2a4c0a031fb..58802fc99f0d 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -930,7 +930,7 @@ void Log_event::prepare_dep(Relay_log_info *rli, get_type_str(), rli->get_group_master_log_name(), rli->get_group_master_log_pos()); - rli->dep_sync_group= true; + rli->set_dep_sync_group(true); ev->is_begin_event= true; } } @@ -3307,9 +3307,9 @@ bool Log_event::schedule_dep(Relay_log_info *rli) // we execute the trx in isolation if num_dbs is greater than one and if // OVER_MAX_DBS_IN_EVENT_MTS is set - rli->dep_sync_group= rli->dep_sync_group || - num_dbs == OVER_MAX_DBS_IN_EVENT_MTS || - (rli->dbs_accessed_by_group.size() > 1); + rli->set_dep_sync_group( + rli->dep_sync_group || num_dbs == OVER_MAX_DBS_IN_EVENT_MTS || + (rli->dbs_accessed_by_group.size() > 1)); } // when the number of events in a group is greater than max worker queue @@ -3320,7 +3320,7 @@ bool Log_event::schedule_dep(Relay_log_info *rli) if (unlikely( rli->num_events_in_current_group >= rli->mts_slave_worker_queue_len_max)) { - rli->dep_sync_group= true; + rli->set_dep_sync_group(true); } if (unlikely(rli->dep_sync_group)) @@ -3397,7 +3397,7 @@ bool Log_event::schedule_dep(Relay_log_info *rli) { if (wait_for_workers_to_finish(rli) == -1) DBUG_RETURN(false); - rli->dep_sync_group= false; + rli->set_dep_sync_group(false); } #ifndef DBUG_OFF @@ -3459,11 +3459,12 @@ Log_event::handle_terminal_dep_event(Relay_log_info *rli, if (ev->is_end_event) { - // Populate key->last trx penultimate event in the key lookup + // Populate key->last trx penultimate/end event in the key lookup // - // NOTE: We store the end event for a single event trx + // NOTE: We store the end event in STMT mode and penultimate event in TBL + // mode. We always store end event for single event trxs. // - // Why store penultimate event instead of end event? + // Why store penultimate event instead of end event in TBL mode? // This is to improve perf for TBL mode. When we depend on the end event of // trx we basically wait for it to commit. In TBL mode we might end up // waiting for a trx that we don't have any row conflits with. That's why we @@ -3472,13 +3473,19 @@ Log_event::handle_terminal_dep_event(Relay_log_info *rli, // If there are conflicts we expect row locks to kick in and in that case // we'll automatically wait for the conflicting trx to commit. // + // Note that this is not required for STMT mode since we'll already be + // depending on trxs based on actual row conflicts. So we depend on the end + // event directly. + // // Why penultimate though? Why not just depend on the conflicting row event? // This is done to support trx retries. On secondaries we're allowed to // retry trx on temprary errors like lock wait timeouts. Depending on // penultimate event allows the trx we depend on to retry execution, // otherwise we'll end up taking the row lock as soon as the row we depend // on is executed which can create deadlock if commit ordering is enabled. - auto to_add= rli->prev_event ? rli->prev_event : ev; + auto to_add= + rli->mts_dependency_replication == DEP_RPL_TABLE && rli->prev_event ? + rli->prev_event : ev; mysql_mutex_lock(&rli->dep_key_lookup_mutex); if (!to_add->finalized()) { @@ -5313,7 +5320,7 @@ void Query_log_event::prepare_dep(Relay_log_info *rli, } } - rli->dep_sync_group= true; + rli->set_dep_sync_group(true); } DBUG_VOID_RETURN; @@ -12654,7 +12661,7 @@ void Rows_log_event::prepare_dep(Relay_log_info *rli, m_keylist.size() + rli->keys_accessed_by_group.size() > rli->mts_dependency_max_keys)) { - rli->dep_sync_group= true; + rli->set_dep_sync_group(true); m_keylist.clear(); rli->keys_accessed_by_group.clear(); } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e86cf6d700e4..3dfe94e5194a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -582,6 +582,7 @@ ulonglong opt_slave_dump_thread_wait_sleep_usec; my_bool rpl_wait_for_semi_sync_ack; std::atomic slave_lag_sla_misses{0}; ulonglong opt_slave_lag_sla_seconds; +std::atomic slave_commit_order_deadlocks{0}; my_bool opt_safe_user_create = 0; my_bool opt_show_slave_auth_info; my_bool opt_log_slave_updates= 0; @@ -10410,6 +10411,19 @@ static int show_slave_dependency_next_waits(THD *thd, SHOW_VAR *var, char *buff) return 0; } +static int show_slave_dependency_num_syncs(THD *thd, SHOW_VAR *var, char *buff) +{ + if (active_mi && active_mi->rli && active_mi->rli->mts_dependency_replication) + { + var->type= SHOW_LONGLONG; + var->value= buff; + *((ulonglong *)buff)= (ulonglong) active_mi->rli->num_syncs.load(); + } + else + var->type= SHOW_UNDEF; + return 0; +} + static int show_slave_before_image_inconsistencies(THD *thd, SHOW_VAR *var, char *buff) { @@ -10441,6 +10455,19 @@ static int show_slave_retried_trans(THD *thd, SHOW_VAR *var, char *buff) return 0; } +static int show_slave_commit_order_deadlocks(THD *thd, SHOW_VAR *var, char *buf) +{ + if (active_mi && active_mi->rli) + { + var->type= SHOW_LONGLONG; + var->value= buf; + *((longlong *)buf)= slave_commit_order_deadlocks.load(); + } + else + var->type= SHOW_UNDEF; + return 0; +} + static int show_slave_received_heartbeats(THD *thd, SHOW_VAR *var, char *buff) { if (active_mi) @@ -11299,6 +11326,7 @@ SHOW_VAR status_vars[]= { {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_INT}, #ifdef HAVE_REPLICATION {"Slave_retried_transactions",(char*) &show_slave_retried_trans, SHOW_FUNC}, + {"Slave_Commit_order_deadlocks",(char*) &show_slave_commit_order_deadlocks, SHOW_FUNC}, {"Slave_heartbeat_period", (char*) &show_heartbeat_period, SHOW_FUNC}, {"Slave_received_heartbeats",(char*) &show_slave_received_heartbeats, SHOW_FUNC}, {"Slave_lag_sla_misses", (char*) &show_slave_lag_sla_misses, SHOW_FUNC}, @@ -11311,6 +11339,7 @@ SHOW_VAR status_vars[]= { {"Slave_dependency_in_flight", (char*) &show_slave_dependency_in_flight, SHOW_FUNC}, {"Slave_dependency_begin_waits", (char*) &show_slave_dependency_begin_waits, SHOW_FUNC}, {"Slave_dependency_next_waits", (char*) &show_slave_dependency_next_waits, SHOW_FUNC}, + {"Slave_dependency_num_syncs", (char*) &show_slave_dependency_num_syncs, SHOW_FUNC}, {"Slave_before_image_inconsistencies", (char*) &show_slave_before_image_inconsistencies, SHOW_FUNC}, {"Slave_high_priority_ddl_executed", (char *)&slave_high_priority_ddl_executed, SHOW_LONGLONG}, {"Slave_high_priority_ddl_killed_connections", (char *)&slave_high_priority_ddl_killed_connections, SHOW_LONGLONG}, diff --git a/sql/mysqld.h b/sql/mysqld.h index efd14561ea98..aad5d6add383 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -338,6 +338,7 @@ extern ulonglong opt_slave_dump_thread_wait_sleep_usec; extern my_bool rpl_wait_for_semi_sync_ack; extern std::atomic slave_lag_sla_misses; extern ulonglong opt_slave_lag_sla_seconds; +extern std::atomic slave_commit_order_deadlocks; extern ulong slave_exec_mode_options; extern ulong slave_use_idempotent_for_recovery_options; extern ulong slave_run_triggers_for_rbr; diff --git a/sql/rpl_reporting.cc b/sql/rpl_reporting.cc index 0e7421ff206a..a045eb91ce18 100644 --- a/sql/rpl_reporting.cc +++ b/sql/rpl_reporting.cc @@ -63,7 +63,7 @@ int Slave_reporting_capability::has_temporary_error(THD *thd, error or not. This is currently the case for Incident_log_event, which sets no message. */ - if (is_fatal_error || !thd->is_error()) + if (is_fatal_error || (!thd->is_error() && error_arg == 0)) DBUG_RETURN(0); error= (error_arg == 0)? thd->get_stmt_da()->sql_errno() : error_arg; diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 596b98c87b85..0a90fa086fee 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -1016,6 +1016,13 @@ class Relay_log_info : public Rpl_info { commit_order_mngr= mngr; } + + bool found_order_commit_deadlock() const + { + return m_order_commit_deadlock.load(); + } + void report_order_commit_deadlock() { m_order_commit_deadlock= true; } + void reset_order_commit_deadlock() { m_order_commit_deadlock= false; } #endif virtual bool get_skip_unique_check() @@ -1050,6 +1057,7 @@ class Relay_log_info : public Rpl_info corrdinator's order manager. */ Commit_order_manager* commit_order_mngr; + std::atomic m_order_commit_deadlock{false}; /** Delay slave SQL thread by this amount, compared to master (in @@ -1185,6 +1193,19 @@ class Relay_log_info : public Rpl_info // Statistics std::atomic begin_event_waits{0}; std::atomic next_event_waits{0}; + std::atomic num_syncs{0}; + +#ifndef DBUG_OFF + std::mutex dep_fake_gap_lock; + Slave_worker* dep_fake_gap_lock_worker = nullptr; +#endif + + void set_dep_sync_group(bool val) + { + dep_sync_group= val; + if (dep_sync_group) + ++num_syncs; + } bool enqueue_dep( const std::shared_ptr &begin_event) diff --git a/sql/rpl_rli_pdb.cc b/sql/rpl_rli_pdb.cc index 314c0105d93c..dfe68216aad6 100644 --- a/sql/rpl_rli_pdb.cc +++ b/sql/rpl_rli_pdb.cc @@ -269,6 +269,7 @@ int Slave_worker::init_worker(Relay_log_info * rli, ulong i) Slave_job_item empty= {NULL}; c_rli= rli; + reset_order_commit_deadlock(); set_commit_order_manager(c_rli->get_commit_order_manager()); if (rli_init_info(false) || @@ -1430,7 +1431,8 @@ void Slave_worker::slave_worker_ends_group(Log_event* ev, int &error, else { bool silent = false; - if (has_temporary_error(info_thd, 0, &silent) && + int err = found_order_commit_deadlock() ? ER_LOCK_DEADLOCK : 0; + if (has_temporary_error(info_thd, err, &silent) && trans_retries < slave_trans_retries) { if (last_current_event_index < current_event_index) @@ -1442,6 +1444,7 @@ void Slave_worker::slave_worker_ends_group(Log_event* ev, int &error, trans_retries++; error = 0; // Reset the error to avoid worker thread reporting an error. temporary_error = true; + reset_order_commit_deadlock(); cleanup_context(info_thd, 1); DBUG_VOID_RETURN; } diff --git a/sql/rpl_rli_pdb.h b/sql/rpl_rli_pdb.h index 5e8d78d1627e..d23717632ae1 100644 --- a/sql/rpl_rli_pdb.h +++ b/sql/rpl_rli_pdb.h @@ -518,6 +518,12 @@ class Slave_worker : public Relay_log_info return c_rli ? c_rli->get_rbr_column_type_mismatch_whitelist() : nullptr; } + ulonglong sequence_number() + { + Slave_job_group* ptr_g= c_rli->gaq->get_job_group(gaq_index); + return ptr_g->total_seqno; + } + protected: virtual void do_report(loglevel level, int err_code, @@ -526,6 +532,7 @@ class Slave_worker : public Relay_log_info private: ulong gaq_index; // GAQ index of the current assignment ulonglong master_log_pos; // event's cached log_pos for possibile error report + bool m_order_commit_deadlock = false; void end_info(); bool read_info(Rpl_info_handler *from) override; bool write_info(Rpl_info_handler *to) override; diff --git a/sql/rpl_slave_commit_order_manager.cc b/sql/rpl_slave_commit_order_manager.cc index 7c0db1f8e91e..67be9461b713 100644 --- a/sql/rpl_slave_commit_order_manager.cc +++ b/sql/rpl_slave_commit_order_manager.cc @@ -15,6 +15,8 @@ #include "rpl_slave_commit_order_manager.h" +#include "rpl_mi.h" +#include "rpl_rli.h" #include "rpl_rli_pdb.h" // Slave_worker #include "mysqld.h" // key_commit_order_manager_mutex .. @@ -83,7 +85,14 @@ bool Commit_order_manager::wait_for_its_turn(Slave_worker *worker, &old_stage); while (queue_front(db) != worker->id) + { + if (unlikely(worker->found_order_commit_deadlock())) + { + thd->EXIT_COND(&old_stage); + DBUG_RETURN(true); + } mysql_cond_wait(cond, &m_queue_mutex); + } m_workers[worker->id].status= OCS_SIGNAL; @@ -140,3 +149,14 @@ void Commit_order_manager::report_rollback(Slave_worker *worker) DBUG_VOID_RETURN; } +void Commit_order_manager::report_deadlock(Slave_worker *worker) +{ + DBUG_ENTER("Commit_order_manager::report_deadlock"); + mysql_mutex_lock(&m_queue_mutex); + ++slave_commit_order_deadlocks; + worker->report_order_commit_deadlock(); + mysql_cond_signal(&m_workers[worker->id].cond); + mysql_mutex_unlock(&m_queue_mutex); + DBUG_VOID_RETURN; +} + diff --git a/sql/rpl_slave_commit_order_manager.h b/sql/rpl_slave_commit_order_manager.h index e92df40d806c..c19b1a0355e2 100644 --- a/sql/rpl_slave_commit_order_manager.h +++ b/sql/rpl_slave_commit_order_manager.h @@ -18,6 +18,7 @@ #include "my_global.h" #include "rpl_rli_pdb.h" // get_thd_worker +#include "rpl_mi.h" class Commit_order_manager @@ -75,6 +76,7 @@ class Commit_order_manager } void report_deadlock(Slave_worker *worker); + private: enum order_commit_status { @@ -152,4 +154,91 @@ class Commit_order_manager } }; + +#ifdef HAVE_REPLICATION +/** + Check if order commit deadlock happens. + Worker1(trx1) Worker2(trx2) + ============= ============= + ... ... + Engine acquires lock A + ... Engine acquires lock A(waiting for + trx1 to release it. + COMMIT(waiting for + trx2 to commit first). + Currently, there are two corner cases can cause the deadlock. + - Case 1 + CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT, INDEX(c2)) ENGINE = InnoDB; + INSERT INTO t1 VALUES(1, NULL),(2, 2), (3, NULL), (4, 4), (5, NULL), (6, 6) + INSERT INTO t1 VALUES(7, NULL); + DELETE FROM t1 WHERE c2 <= 3; + - Case 2 + ANALYZE TABLE t1; + INSERT INTO t2 SELECT * FROM mysql.innodb_table_stats + Since this is not a real lock deadlock, it could not be handled by engine. + slave need to handle it separately. + Worker1(trx1) Worker2(trx2) + ============= ============= + ... ... + Engine acquires lock A + ... Engine acquires lock A. + 1. found trx1 is holding the lock.) + 2. report the lock wait to server code by + calling thd_report_row_lock_wait(). + Then this function is called to check + if it causes a order commit deadlock. + Report the deadlock to worker1. + 3. waiting for trx1 to release it. + COMMIT(waiting for + trx2 to commit first). + Found the deadlock flag set + by worker2 and then + return with ER_LOCK_DEADLOCK. + Rollback the transaction + Get lock A and go ahead. + ... + Retry the transaction + To conclude, The transaction A which is waiting for transaction B to commit + and is holding a lock which is required by transaction B will be rolled back + and try again later. + @param[in] thd_self The THD object of self session which is acquiring + a lock hold by another session. + @param[in] thd_wait_for The THD object of a session which is holding + a lock being acquired by current session. +*/ +inline void commit_order_manager_check_deadlock(THD* thd_self, + THD *thd_wait_for) +{ + DBUG_ENTER("commit_order_manager_check_deadlock"); + + Slave_worker *self_w= get_thd_worker(thd_self); + Slave_worker *wait_for_w= get_thd_worker(thd_wait_for); + Commit_order_manager *mngr= self_w->get_commit_order_manager(); + + /* Check if both workers are working for the same channel */ + if (mngr != NULL && self_w->c_rli == wait_for_w->c_rli && + wait_for_w->sequence_number() > self_w->sequence_number()) + { + DBUG_ASSERT(self_w->c_rli->mts_dependency_replication != DEP_RPL_TABLE); + if (self_w->c_rli && + self_w->c_rli->mts_dependency_replication == DEP_RPL_TABLE) + { + // NO_LINT_DEBUG + sql_print_error("InnoDB lock wait callback fired in TBL mode!"); + } + + DBUG_PRINT("info", ("Found slave order commit deadlock")); + // NO_LINT_DEBUG + sql_print_warning( + "Commit order deadlock between: self = %u (worker id %lu), waiting for " + "= %u (worker id %lu). Will rollback and retry trx on thread %u.", + thd_self->thread_id(), self_w->id, thd_wait_for->thread_id(), + wait_for_w->id, thd_wait_for->thread_id()); + mngr->report_deadlock(wait_for_w); + } + DBUG_VOID_RETURN; +} + +#endif //HAVE_REPLICATION + #endif /*RPL_SLAVE_COMMIT_ORDER_MANAGER*/ diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 17a3f264f2df..223ea5856fd2 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -75,6 +75,10 @@ #endif #include #include +#ifdef HAVE_REPLICATION +#include "rpl_rli_pdb.h" // Slave_worker +#include "rpl_slave_commit_order_manager.h" +#endif #ifdef TARGET_OS_LINUX #include @@ -6895,3 +6899,34 @@ void thd_add_response_attr( tracker->mark_as_changed(thd, &key, &value); } } + +#ifndef EMBEDDED_LIBRARY +/** + Interface for Engine to report row lock conflict. + The caller should guarantee thd_wait_for does not be freed, when it is + called. +*/ +extern "C" +void thd_report_row_lock_wait(THD* self, THD *wait_for) +{ + DBUG_ENTER("thd_report_row_lock_wait"); + + DBUG_EXECUTE_IF("report_row_lock_wait", { + const char act[]= "now signal signal.reached wait_for signal.done"; + DBUG_ASSERT(opt_debug_sync_timeout > 0); + DBUG_ASSERT(!debug_sync_set_action(self, STRING_WITH_LEN(act))); + };); + + if (unlikely(self != NULL && wait_for != NULL && + is_mts_worker(self) && is_mts_worker(wait_for))) + commit_order_manager_check_deadlock(self, wait_for); + + DBUG_VOID_RETURN; +} +#else +extern "C" +void thd_report_row_lock_wait(THD* self, THD *thd_wait_for) +{ + return; +} +#endif diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 367eeb612b85..1591568f8703 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -197,6 +197,7 @@ static char* innobase_log_arch_dir = NULL; #endif /* UNIV_LOG_ARCHIVE */ static my_bool innobase_use_checksums = TRUE; static my_bool innobase_locks_unsafe_for_binlog = FALSE; +static my_bool innobase_enable_row_lock_wait_callback = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_large_prefix = FALSE; @@ -4181,6 +4182,8 @@ innobase_init( row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout; srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog; + srv_enable_row_lock_wait_callback = + (ibool) innobase_enable_row_lock_wait_callback; if (innobase_locks_unsafe_for_binlog) { ut_print_timestamp(stderr); fprintf(stderr, @@ -17986,6 +17989,11 @@ static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binl "Force InnoDB to not use next-key locking, to use only row-level locking.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_BOOL(enable_row_lock_wait_callback, innobase_enable_row_lock_wait_callback, + PLUGIN_VAR_OPCMDARG, + "Enables a callback which fires when locks are enqueued (thd_report_row_lock_wait)", + NULL, NULL, TRUE); + #ifdef UNIV_LOG_ARCHIVE static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -19046,6 +19054,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(large_prefix), MYSQL_SYSVAR(force_load_corrupted), MYSQL_SYSVAR(locks_unsafe_for_binlog), + MYSQL_SYSVAR(enable_row_lock_wait_callback), MYSQL_SYSVAR(lock_wait_timeout), #ifdef UNIV_LOG_ARCHIVE MYSQL_SYSVAR(log_arch_dir), diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 579104f39a8b..c8b121447c24 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -284,6 +284,9 @@ extern ulint srv_max_file_format_at_startup; on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; +/** Should we call thd_report_row_lock_wait() when a lock request is queued? */ +extern ibool srv_enable_row_lock_wait_callback; + /** Sort buffer size in index creation */ extern ulong srv_sort_buf_size; /** Maximum modification log file size for online index creation */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 51aac09b902c..a0759c2d2018 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -24,6 +24,7 @@ Created 5/7/1996 Heikki Tuuri *******************************************************/ #define LOCK_MODULE_IMPLEMENTATION +#include #include "lock0lock.h" #include "lock0priv.h" @@ -2313,7 +2314,7 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - } else if (lock_rec_other_has_conflicting( + } else if (const lock_t* wait_for = lock_rec_other_has_conflicting( static_cast(mode), block, heap_no, trx)) { @@ -2326,9 +2327,13 @@ lock_rec_lock_slow( err = DB_FAILED_TO_LOCK_REC_NOWAIT; else if (x_mode == LOCK_X_SKIP_LOCKED) err = DB_FAILED_TO_LOCK_REC_SKIP_LOCKED; - else + else { err = lock_rec_enqueue_waiting( mode, block, heap_no, index, thr); + if (likely(srv_enable_row_lock_wait_callback)) + thd_report_row_lock_wait(current_thd, + wait_for->trx->mysql_thd); + } } else if (!impl) { /* Set the requested lock on the record, note that @@ -6183,7 +6188,7 @@ lock_rec_insert_check_and_lock( had to wait for their insert. Both had waiting gap type lock requests on the successor, which produced an unnecessary deadlock. */ - if (lock_rec_other_has_conflicting( + if (const lock_t* wait_for = lock_rec_other_has_conflicting( static_cast( LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION), block, next_rec_heap_no, trx)) { @@ -6194,6 +6199,9 @@ lock_rec_insert_check_and_lock( err = lock_rec_enqueue_waiting( LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, block, next_rec_heap_no, index, thr); + if (likely(srv_enable_row_lock_wait_callback)) + thd_report_row_lock_wait(current_thd, + wait_for->trx->mysql_thd); trx_mutex_exit(trx); } else { diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 724164e49f59..4594d5495fdb 100755 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -140,6 +140,8 @@ UNIV_INTERN my_bool high_level_read_only; /** Place locks to records only i.e. do not use next-key locking except on duplicate key checking and foreign key checking */ UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; +/** Should we call thd_report_row_lock_wait() when a lock request is queued? */ +UNIV_INTERN ibool srv_enable_row_lock_wait_callback = FALSE; /** Sort buffer size in index creation */ UNIV_INTERN ulong srv_sort_buf_size = 1048576; /** Maximum modification log file size for online index creation */ diff --git a/xtrabackup/src/xtrabackup.cc b/xtrabackup/src/xtrabackup.cc index c2255aa34735..acae738d7a90 100755 --- a/xtrabackup/src/xtrabackup.cc +++ b/xtrabackup/src/xtrabackup.cc @@ -443,6 +443,7 @@ my_bool innobase_use_checksums = TRUE; my_bool innobase_use_large_pages = FALSE; my_bool innobase_file_per_table = FALSE; my_bool innobase_locks_unsafe_for_binlog = FALSE; +my_bool innobase_enable_row_lock_wait_callback = FALSE; my_bool innobase_rollback_on_timeout = FALSE; my_bool innobase_create_status_file = FALSE; my_bool innobase_adaptive_hash_index = TRUE;