From 7987b1691cb30c34c7ef3b0fa2c3f9924f99acb9 Mon Sep 17 00:00:00 2001 From: Mark Hannum Date: Fri, 22 Nov 2024 06:07:05 -0500 Subject: [PATCH] Fix reverse-conn / tranlog Signed-off-by: Mark Hannum --- bdb/phys_rep_lsn.c | 5 +- berkdb/rep/rep_record.c | 6 +- db/db_tunables.c | 2 +- db/db_tunables.h | 6 +- db/phys_rep.c | 16 +++-- sqlite/ext/comdb2/tranlog.c | 2 + tests/phys_rep_tiered.test/lrl.options | 1 + tests/phys_rep_tiered.test/runit | 62 ++++++++++++++++++- tests/tunables.test/t00_all_tunables.expected | 4 +- 9 files changed, 86 insertions(+), 18 deletions(-) diff --git a/bdb/phys_rep_lsn.c b/bdb/phys_rep_lsn.c index 10302ab158..2a9b03c077 100644 --- a/bdb/phys_rep_lsn.c +++ b/bdb/phys_rep_lsn.c @@ -506,7 +506,8 @@ LOG_INFO find_match_lsn(void *in_bdb_state, cdb2_hndl_tp *repl_db, LOG_INFO star physrep_logmsg(LOGMSG_FATAL, "Require elect-highest-committed-gen on source- exiting\n"); exit(1); } - physrep_logmsg(LOGMSG_ERROR, "Require elect-highest-committed-gen source\n"); + physrep_logmsg(LOGMSG_ERROR, "Require elect-highest-committed-gen source {%d:%d}\n", info.file, + info.offset); } else { info.gen = *gen; } @@ -564,7 +565,7 @@ int physrep_bdb_wait_for_seqnum(bdb_state_type *bdb_state, DB_LSN *lsn, void *da return 0; } - seqnum_type seqnum; + seqnum_type seqnum = {0}; seqnum.lsn.file = lsn->file; seqnum.lsn.offset = lsn->offset; // seqnum.issue_time = ? diff --git a/berkdb/rep/rep_record.c b/berkdb/rep/rep_record.c index 286df2fd8f..5060d25975 100644 --- a/berkdb/rep/rep_record.c +++ b/berkdb/rep/rep_record.c @@ -8408,9 +8408,8 @@ __rep_verify_match(dbenv, rp, savetime, online) MUTEX_UNLOCK(dbenv, db_rep->rep_mutexp); } - /* Masters must run full recovery */ int i_am_master = F_ISSET(rep, REP_F_MASTER); - if (gbl_rep_skip_recovery && !i_am_master && log_compare(&dbenv->prev_commit_lsn, &rp->lsn) <= 0) { + if (gbl_rep_skip_recovery && !i_am_master && dbenv->prev_commit_lsn.file > 0 && log_compare(&dbenv->prev_commit_lsn, &rp->lsn) <= 0) { DB_TXNREGION *region; region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary; dbenv->wrlock_recovery_lock(dbenv, __func__, __LINE__); @@ -8467,7 +8466,7 @@ __rep_verify_match(dbenv, rp, savetime, online) /* Recovery cleanup */ if (dbenv->rep_recovery_cleanup) - dbenv->rep_recovery_cleanup(dbenv, &trunclsn, i_am_master /* 0 */); + dbenv->rep_recovery_cleanup(dbenv, &trunclsn, i_am_master); dbenv->unlock_recovery_lock(dbenv, __func__, __LINE__); @@ -8485,6 +8484,7 @@ __rep_verify_match(dbenv, rp, savetime, online) logmsg(LOGMSG_WARN, "skip-recovery cannot skip, prev-commit=[%d:%d] trunc-lsn=[%d:%d]\n", dbenv->prev_commit_lsn.file, dbenv->prev_commit_lsn.offset, rp->lsn.file, rp->lsn.offset); } + ZERO_LSN(dbenv->prev_commit_lsn); if ((ret = __rep_dorecovery(dbenv, &rp->lsn, &trunclsn, online, &undid_schema_change)) != 0) { Pthread_mutex_unlock(&apply_lk); diff --git a/db/db_tunables.c b/db/db_tunables.c index 948b13a3c8..1b69c6dd35 100644 --- a/db/db_tunables.c +++ b/db/db_tunables.c @@ -451,7 +451,7 @@ extern int gbl_physrep_keepalive_freq_sec; extern int gbl_physrep_max_candidates; extern int gbl_physrep_max_pending_replicants; extern int gbl_physrep_reconnect_penalty; -extern int gbl_physrep_register_interval; +extern int gbl_physrep_reconnect_interval; extern int gbl_physrep_shuffle_host_list; extern int gbl_physrep_ignore_queues; extern int gbl_physrep_max_rollback; diff --git a/db/db_tunables.h b/db/db_tunables.h index 5665a5b437..9c86a192de 100644 --- a/db/db_tunables.h +++ b/db/db_tunables.h @@ -1873,7 +1873,7 @@ REGISTER_TUNABLE("match_on_ckp", /* physical replication */ REGISTER_TUNABLE("blocking_physrep", - "Physical replicant blocks on select. (Default: false)", + "Physical replicant blocks on select. (Default: off)", TUNABLE_BOOLEAN, &gbl_blocking_physrep, 0, NULL, NULL, NULL, NULL); REGISTER_TUNABLE("tranlog_default_timeout", "Default timeout for tranlog queries. (Default: 30)", TUNABLE_INTEGER, @@ -1932,8 +1932,8 @@ REGISTER_TUNABLE("physrep_reconnect_penalty", "Physrep wait seconds before retry to the same node. (Default: 5)", TUNABLE_INTEGER, &gbl_physrep_reconnect_penalty, 0, NULL, NULL, NULL, NULL); -REGISTER_TUNABLE("physrep_register_interval", "Interval for physical replicant re-registration. (Default: 600)", - TUNABLE_INTEGER, &gbl_physrep_register_interval, 0, NULL, NULL, NULL, NULL); +REGISTER_TUNABLE("physrep_reconnect_interval", "Reconnect interval for physical replicants (Default: 600)", + TUNABLE_INTEGER, &gbl_physrep_reconnect_interval, 0, NULL, NULL, NULL, NULL); REGISTER_TUNABLE("physrep_shuffle_host_list", "Shuffle the host list returned by register_replicant() " "before connecting to the hosts. (Default: OFF)", diff --git a/db/phys_rep.c b/db/phys_rep.c index 69ed44f68e..83288a4482 100644 --- a/db/phys_rep.c +++ b/db/phys_rep.c @@ -54,7 +54,7 @@ typedef struct DB_Connection { } while (0) int gbl_physrep_debug = 0; -int gbl_physrep_register_interval = 600; // force re-registration every 10 mins +int gbl_physrep_reconnect_interval = 600; // force re-registration every 10 mins int gbl_physrep_reconnect_penalty = 0; int gbl_blocking_physrep = 0; int gbl_physrep_fanout = 8; @@ -102,6 +102,7 @@ static size_t repl_dbs_sz; reverse_conn_handle_tp *rev_conn_hndl = NULL; static int last_register; +static int repl_db_connect_time; static int add_replicant_host(char *hostname, char *dbname); static void dump_replicant_hosts(void); @@ -191,6 +192,8 @@ static void set_repl_db_connected(char *dbname, char *host) gbl_physrep_repl_name = dbname; gbl_physrep_repl_host = host; repl_db_connected = 1; + repl_db_connect_time = comdb2_time_epoch(); + physrep_logmsg(LOGMSG_USER, "Physical replicant is now replicating from %s@%s\n", dbname, host); } static void set_repl_db_disconnected() @@ -1271,7 +1274,7 @@ static void *physrep_worker(void *args) } if (repl_db_connected && - (force_registration() || (((now = time(NULL)) - last_register) > gbl_physrep_register_interval))) { + (force_registration() || (((now = time(NULL)) - repl_db_connect_time) > gbl_physrep_reconnect_interval))) { close_repl_connection(repl_db_cnct, repl_db, __func__, __LINE__); if (gbl_physrep_debug) { physrep_logmsg(LOGMSG_USER, "%s:%d: Forcing re-registration\n", __func__, __LINE__); @@ -1394,9 +1397,6 @@ static void *physrep_worker(void *args) cdb2_close(repl_metadb); } - physrep_logmsg(LOGMSG_USER, "Physical replicant is now replicating from %s@%s\n", - repl_db_cnct->dbname, repl_db_cnct->hostname); - if (do_truncate && repl_db) { info = get_last_lsn(thedb->bdb_env); prev_info = handle_truncation(repl_db, info); @@ -1481,6 +1481,12 @@ static void *physrep_worker(void *args) goto repl_loop; } + /* Check reconnect */ + if (force_registration() || ((now = time(NULL)) - repl_db_connect_time) > gbl_physrep_reconnect_interval) { + close_repl_connection(repl_db_cnct, repl_db, __func__, __LINE__); + goto repl_loop; + } + int update_regck = gbl_physrep_update_registry_interval; if (repl_db_connected && update_regck > 0 && (comdb2_time_epoch() - last_update_registry) > update_regck) { update_registry_periodic(repl_db_cnct->dbname, repl_db_cnct->hostname); diff --git a/sqlite/ext/comdb2/tranlog.c b/sqlite/ext/comdb2/tranlog.c index 5742c3a50b..ca11a766fe 100644 --- a/sqlite/ext/comdb2/tranlog.c +++ b/sqlite/ext/comdb2/tranlog.c @@ -588,6 +588,8 @@ static int tranlogColumn( if (pCur->data.data) LOGCOPY_32(&rectype, pCur->data.data); + normalize_rectype(&rectype); + if (rectype == DB___txn_regop_gen){ generation = get_generation_from_regop_gen_record(pCur->data.data); } diff --git a/tests/phys_rep_tiered.test/lrl.options b/tests/phys_rep_tiered.test/lrl.options index 2e3b01d54a..02c8303e6e 100644 --- a/tests/phys_rep_tiered.test/lrl.options +++ b/tests/phys_rep_tiered.test/lrl.options @@ -10,3 +10,4 @@ ctrace_dbdir 1 allow_lua_print 1 reverse_hosts_v2 1 physrep_keepalive_v2 1 +rep_skip_recovery 1 diff --git a/tests/phys_rep_tiered.test/runit b/tests/phys_rep_tiered.test/runit index 4f9381f16e..c9e0fae662 100755 --- a/tests/phys_rep_tiered.test/runit +++ b/tests/phys_rep_tiered.test/runit @@ -656,6 +656,7 @@ ctrace_dbdir 1 allow_lua_print 1 physrep_update_registry_interval 1 physrep_keepalive_v2 1 +rep_skip_recovery 1 END $COMDB2_EXE ${_dbname} --create --lrl ${_dbdir}/${_dbname}.lrl --pidfile ${_dbdir}/${_dbname}.pid >> ${logFile} 2>&1 @@ -702,6 +703,7 @@ ctrace_dbdir 1 allow_lua_print 1 physrep_update_registry_interval 1 physrep_keepalive_v2 1 +rep_skip_recovery 1 END scp ${tmpdir}/${_dbname}.lrl ${node}:${_dbdir}/${_dbname}.lrl ssh ${node} "$COMDB2_EXE ${_dbname} --create --lrl ${_dbdir}/${_dbname}.lrl --pidfile ${_dbdir}/${_dbname}.pid" >> ${logFile} 2>&1 < /dev/null @@ -1204,6 +1206,34 @@ function test_overlap fi } +# Simple test to verify that skip-recovery ran against physical replicants +function verify_skip_recovery +{ + for node in $CLUSTER ; do + logFile=$TESTDIR/logs/${REPL_DBNAME_PREFIX}_${node}.log + x=$(egrep "skip-recovery truncate" $logFile | wc -l) + if [[ "$x" == "0" ]]; then + cleanFailExit "skip-recovery not found in $logFile" + fi + logFile=$TESTDIR/logs/${REPL_CLUS_DBNAME}.${node}.log + x=$(egrep "skip-recovery truncate" $logFile | wc -l) + if [[ "$x" == "0" ]]; then + cleanFailExit "skip-recovery not found in $logFile" + fi + done +} + +# Verify the reverse-connection / register_interval fix +# When broken, the first REPL node would re-register 200+ times +function verify_revconn_fix +{ + logFile=$TESTDIR/logs/${REPL_DBNAME_PREFIX}_${firstNode}.log + x=$(egrep "Reverse connected" $logFile | wc -l) + if [[ "$x" -gt 80 ]]; then + cleanFailExit "Reverse connection loop in $logFile" + fi +} + # Use message traps to verify overlap logic function match_overlap { @@ -1228,6 +1258,19 @@ function match_overlap return 0 } +# Verify that we are showing generation in comdb2-transaction-logs +function verify_generation +{ + x=$(cdb2sql --tabs ${CDB2_OPTIONS} $DBNAME default "select count(*) from comdb2_transaction_logs where generation is not null") + if [[ $? != 0 ]]; then + cleanFailExit "Failed reading comdb2-transactions-logs" + fi + + if [[ $x == 0 ]]; then + cleanFailExit "Generation not shown in comdb2_transaction_logs" + fi +} + # Call register-replicant directly- look at the list of machines returned .. # Update the metadb info on the returned nodes to put them out of range, and # verify that they are not returned the next time @@ -1414,15 +1457,20 @@ function testcase_preamble function run_tests { + testcase="verify_generation" + testcase_preamble $testcase + verify_generation + testcase_finish $testcase + testcase="phys_rep_nomatch" testcase_preamble $testcase phys_rep_nomatch ${REPL_META_DBNAME} ${REPL_META_HOST} - testcase_finsh $testcase + testcase_finish $testcase testcase="tranlog_timeout" testcase_preamble $testcase tranlog_timeout ${REPL_META_DBNAME} ${REPL_META_HOST} - testcase_finsh $testcase + testcase_finish $testcase testcase="revconn_latency $lastNode" testcase_preamble $testcase @@ -1475,6 +1523,16 @@ function run_tests testcase_preamble $testcase match_overlap testcase_finish $testcase + + testcase="verify_skip_recovery" + testcase_preamble $testcase + verify_skip_recovery + testcase_finish $testcase + + testcase="verify_revconn_fix" + testcase_preamble $testcase + verify_revconn_fix + testcase_finish $testcase } run_tests diff --git a/tests/tunables.test/t00_all_tunables.expected b/tests/tunables.test/t00_all_tunables.expected index 573c550a1b..f038981bd1 100644 --- a/tests/tunables.test/t00_all_tunables.expected +++ b/tests/tunables.test/t00_all_tunables.expected @@ -69,7 +69,7 @@ (name='blobmem_sz_thresh_kb', description='Sets the threshold (in KB) above which blobs are allocated by the blob allocator. (Default: 0)', type='INTEGER', value='-1', read_only='Y') (name='blobstripe', description='', type='BOOLEAN', value='ON', read_only='Y') (name='blocking_latches', description='Block on latch rather than deadlock', type='BOOLEAN', value='OFF', read_only='N') -(name='blocking_physrep', description='Physical replicant blocks on select. (Default: false)', type='BOOLEAN', value='OFF', read_only='N') +(name='blocking_physrep', description='Physical replicant blocks on select. (Default: off)', type='BOOLEAN', value='OFF', read_only='N') (name='broadcast_check_rmtpol', description='Check rmtpol before sending triggers', type='BOOLEAN', value='ON', read_only='N') (name='broken_max_rec_sz', description='', type='INTEGER', value='0', read_only='Y') (name='broken_num_parser', description='', type='BOOLEAN', value='OFF', read_only='Y') @@ -712,8 +712,8 @@ (name='physrep_max_rollback', description='Maximum logs physrep can rollback. (Default: 0)', type='INTEGER', value='0', read_only='N') (name='physrep_metadb_host', description='List of physical replication metadb cluster hosts.', type='STRING', value=NULL, read_only='Y') (name='physrep_metadb_name', description='Physical replication metadb cluster name.', type='STRING', value=NULL, read_only='Y') +(name='physrep_reconnect_interval', description='Reconnect interval for physical replicants (Default: 600)', type='INTEGER', value='600', read_only='N') (name='physrep_reconnect_penalty', description='Physrep wait seconds before retry to the same node. (Default: 5)', type='INTEGER', value='0', read_only='N') -(name='physrep_register_interval', description='Interval for physical replicant re-registration. (Default: 600)', type='INTEGER', value='600', read_only='N') (name='physrep_repl_host', description='Current physrep host.', type='STRING', value=NULL, read_only='Y') (name='physrep_repl_name', description='Current physrep parent.', type='STRING', value=NULL, read_only='Y') (name='physrep_revconn_check_interval', description='Physrep recheck revconn interval. (Default: 60)', type='INTEGER', value='60', read_only='N')