Skip to content

Commit

Permalink
Fix reverse-conn / tranlog
Browse files Browse the repository at this point in the history
Signed-off-by: Mark Hannum <[email protected]>
  • Loading branch information
markhannum committed Nov 22, 2024
1 parent 43e0ecc commit ae9cab4
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 18 deletions.
5 changes: 3 additions & 2 deletions bdb/phys_rep_lsn.c
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,8 @@ LOG_INFO find_match_lsn(void *in_bdb_state, cdb2_hndl_tp *repl_db, LOG_INFO star
physrep_logmsg(LOGMSG_FATAL, "Require elect-highest-committed-gen on source- exiting\n");
exit(1);
}
physrep_logmsg(LOGMSG_ERROR, "Require elect-highest-committed-gen source\n");
physrep_logmsg(LOGMSG_ERROR, "Require elect-highest-committed-gen source {%d:%d}\n", info.file,
info.offset);
} else {
info.gen = *gen;
}
Expand Down Expand Up @@ -564,7 +565,7 @@ int physrep_bdb_wait_for_seqnum(bdb_state_type *bdb_state, DB_LSN *lsn, void *da
return 0;
}

seqnum_type seqnum;
seqnum_type seqnum = {0};
seqnum.lsn.file = lsn->file;
seqnum.lsn.offset = lsn->offset;
// seqnum.issue_time = ?
Expand Down
6 changes: 3 additions & 3 deletions berkdb/rep/rep_record.c
Original file line number Diff line number Diff line change
Expand Up @@ -8408,9 +8408,8 @@ __rep_verify_match(dbenv, rp, savetime, online)
MUTEX_UNLOCK(dbenv, db_rep->rep_mutexp);
}

/* Masters must run full recovery */
int i_am_master = F_ISSET(rep, REP_F_MASTER);
if (gbl_rep_skip_recovery && !i_am_master && log_compare(&dbenv->prev_commit_lsn, &rp->lsn) <= 0) {
if (gbl_rep_skip_recovery && !i_am_master && dbenv->prev_commit_lsn.file > 0 && log_compare(&dbenv->prev_commit_lsn, &rp->lsn) <= 0) {
DB_TXNREGION *region;
region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary;
dbenv->wrlock_recovery_lock(dbenv, __func__, __LINE__);
Expand Down Expand Up @@ -8467,7 +8466,7 @@ __rep_verify_match(dbenv, rp, savetime, online)

/* Recovery cleanup */
if (dbenv->rep_recovery_cleanup)
dbenv->rep_recovery_cleanup(dbenv, &trunclsn, i_am_master /* 0 */);
dbenv->rep_recovery_cleanup(dbenv, &trunclsn, i_am_master);

dbenv->unlock_recovery_lock(dbenv, __func__, __LINE__);

Expand All @@ -8485,6 +8484,7 @@ __rep_verify_match(dbenv, rp, savetime, online)
logmsg(LOGMSG_WARN, "skip-recovery cannot skip, prev-commit=[%d:%d] trunc-lsn=[%d:%d]\n",
dbenv->prev_commit_lsn.file, dbenv->prev_commit_lsn.offset, rp->lsn.file, rp->lsn.offset);
}
ZERO_LSN(dbenv->prev_commit_lsn);
if ((ret = __rep_dorecovery(dbenv, &rp->lsn, &trunclsn, online,
&undid_schema_change)) != 0) {
Pthread_mutex_unlock(&apply_lk);
Expand Down
2 changes: 1 addition & 1 deletion db/db_tunables.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ extern int gbl_physrep_keepalive_freq_sec;
extern int gbl_physrep_max_candidates;
extern int gbl_physrep_max_pending_replicants;
extern int gbl_physrep_reconnect_penalty;
extern int gbl_physrep_register_interval;
extern int gbl_physrep_reconnect_interval;
extern int gbl_physrep_shuffle_host_list;
extern int gbl_physrep_ignore_queues;
extern int gbl_physrep_max_rollback;
Expand Down
6 changes: 3 additions & 3 deletions db/db_tunables.h
Original file line number Diff line number Diff line change
Expand Up @@ -1873,7 +1873,7 @@ REGISTER_TUNABLE("match_on_ckp",

/* physical replication */
REGISTER_TUNABLE("blocking_physrep",
"Physical replicant blocks on select. (Default: false)",
"Physical replicant blocks on select. (Default: off)",
TUNABLE_BOOLEAN, &gbl_blocking_physrep, 0, NULL, NULL, NULL,
NULL);
REGISTER_TUNABLE("tranlog_default_timeout", "Default timeout for tranlog queries. (Default: 30)", TUNABLE_INTEGER,
Expand Down Expand Up @@ -1932,8 +1932,8 @@ REGISTER_TUNABLE("physrep_reconnect_penalty",
"Physrep wait seconds before retry to the same node. (Default: 5)",
TUNABLE_INTEGER, &gbl_physrep_reconnect_penalty, 0, NULL, NULL,
NULL, NULL);
REGISTER_TUNABLE("physrep_register_interval", "Interval for physical replicant re-registration. (Default: 600)",
TUNABLE_INTEGER, &gbl_physrep_register_interval, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_reconnect_interval", "Reconnect interval for physical replicants (Default: 600)",
TUNABLE_INTEGER, &gbl_physrep_reconnect_interval, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_shuffle_host_list",
"Shuffle the host list returned by register_replicant() "
"before connecting to the hosts. (Default: OFF)",
Expand Down
16 changes: 11 additions & 5 deletions db/phys_rep.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ typedef struct DB_Connection {
} while (0)

int gbl_physrep_debug = 0;
int gbl_physrep_register_interval = 600; // force re-registration every 10 mins
int gbl_physrep_reconnect_interval = 600; // force re-registration every 10 mins
int gbl_physrep_reconnect_penalty = 0;
int gbl_blocking_physrep = 0;
int gbl_physrep_fanout = 8;
Expand Down Expand Up @@ -102,6 +102,7 @@ static size_t repl_dbs_sz;
reverse_conn_handle_tp *rev_conn_hndl = NULL;

static int last_register;
static int repl_db_connect_time;

static int add_replicant_host(char *hostname, char *dbname);
static void dump_replicant_hosts(void);
Expand Down Expand Up @@ -191,6 +192,8 @@ static void set_repl_db_connected(char *dbname, char *host)
gbl_physrep_repl_name = dbname;
gbl_physrep_repl_host = host;
repl_db_connected = 1;
repl_db_connect_time = comdb2_time_epoch();
physrep_logmsg(LOGMSG_USER, "Physical replicant is now replicating from %s@%s\n", dbname, host);
}

static void set_repl_db_disconnected()
Expand Down Expand Up @@ -1271,7 +1274,7 @@ static void *physrep_worker(void *args)
}

if (repl_db_connected &&
(force_registration() || (((now = time(NULL)) - last_register) > gbl_physrep_register_interval))) {
(force_registration() || (((now = time(NULL)) - repl_db_connect_time) > gbl_physrep_reconnect_interval))) {
close_repl_connection(repl_db_cnct, repl_db, __func__, __LINE__);
if (gbl_physrep_debug) {
physrep_logmsg(LOGMSG_USER, "%s:%d: Forcing re-registration\n", __func__, __LINE__);
Expand Down Expand Up @@ -1394,9 +1397,6 @@ static void *physrep_worker(void *args)
cdb2_close(repl_metadb);
}

physrep_logmsg(LOGMSG_USER, "Physical replicant is now replicating from %s@%s\n",
repl_db_cnct->dbname, repl_db_cnct->hostname);

if (do_truncate && repl_db) {
info = get_last_lsn(thedb->bdb_env);
prev_info = handle_truncation(repl_db, info);
Expand Down Expand Up @@ -1481,6 +1481,12 @@ static void *physrep_worker(void *args)
goto repl_loop;
}

/* Check reconnect */
if (force_registration() || ((now = time(NULL)) - repl_db_connect_time) > gbl_physrep_reconnect_interval) {
close_repl_connection(repl_db_cnct, repl_db, __func__, __LINE__);
goto repl_loop;
}

int update_regck = gbl_physrep_update_registry_interval;
if (repl_db_connected && update_regck > 0 && (comdb2_time_epoch() - last_update_registry) > update_regck) {
update_registry_periodic(repl_db_cnct->dbname, repl_db_cnct->hostname);
Expand Down
2 changes: 2 additions & 0 deletions sqlite/ext/comdb2/tranlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,8 @@ static int tranlogColumn(
if (pCur->data.data)
LOGCOPY_32(&rectype, pCur->data.data);

normalize_rectype(&rectype);

if (rectype == DB___txn_regop_gen){
generation = get_generation_from_regop_gen_record(pCur->data.data);
}
Expand Down
1 change: 1 addition & 0 deletions tests/phys_rep_tiered.test/lrl.options
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ ctrace_dbdir 1
allow_lua_print 1
reverse_hosts_v2 1
physrep_keepalive_v2 1
rep_skip_recovery 1
63 changes: 61 additions & 2 deletions tests/phys_rep_tiered.test/runit
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ ctrace_dbdir 1
allow_lua_print 1
physrep_update_registry_interval 1
physrep_keepalive_v2 1
rep_skip_recovery 1
END

$COMDB2_EXE ${_dbname} --create --lrl ${_dbdir}/${_dbname}.lrl --pidfile ${_dbdir}/${_dbname}.pid >> ${logFile} 2>&1
Expand Down Expand Up @@ -702,6 +703,7 @@ ctrace_dbdir 1
allow_lua_print 1
physrep_update_registry_interval 1
physrep_keepalive_v2 1
rep_skip_recovery 1
END
scp ${tmpdir}/${_dbname}.lrl ${node}:${_dbdir}/${_dbname}.lrl
ssh ${node} "$COMDB2_EXE ${_dbname} --create --lrl ${_dbdir}/${_dbname}.lrl --pidfile ${_dbdir}/${_dbname}.pid" >> ${logFile} 2>&1 < /dev/null
Expand Down Expand Up @@ -1204,6 +1206,34 @@ function test_overlap
fi
}

# Simple test to verify that skip-recovery ran against physical replicants
function verify_skip_recovery
{
for node in $CLUSTER ; do
logFile=$TESTDIR/logs/${REPL_DBNAME_PREFIX}_${node}.log
x=$(egrep "skip-recovery truncate" $logFile | wc -l)
if [[ "$x" == "0" ]]; then
cleanFailExit "skip-recovery not found in $logFile"
fi
logFile=$TESTDIR/logs/${REPL_CLUS_DBNAME}.${node}.log
x=$(egrep "skip-recovery truncate" $logFile | wc -l)
if [[ "$x" == "0" ]]; then
cleanFailExit "skip-recovery not found in $logFile"
fi
done
}

# Verify the reverse-connection / register_interval fix
# When broken, the first REPL node would re-register 200+ times
function verify_revconn_fix
{
logFile=$TESTDIR/logs/${REPL_DBNAME_PREFIX}_${firstNode}.log
x=$(egrep "Reverse connected" $logFile | wc -l)
if [[ "$x" -gt 80 ]]; then
cleanFailExit "Reverse connection loop in $logFile"
fi
}

# Use message traps to verify overlap logic
function match_overlap
{
Expand All @@ -1228,6 +1258,19 @@ function match_overlap
return 0
}

# Verify that we are showing generation in comdb2-transaction-logs
function verify_generation
{
x=$(cdb2sql --tabs ${CDB2_OPTIONS} $DBNAME default "select count(*) from comdb2_transaction_logs where generation is not null")
if [[ $? != 0 ]]; then
cleanFailExit "Failed reading comdb2-transactions-logs"
fi

if [[ $x == 0 ]]; then
cleanFailExit "Generation not shown in comdb2_transaction_logs"
fi
}

# Call register-replicant directly- look at the list of machines returned ..
# Update the metadb info on the returned nodes to put them out of range, and
# verify that they are not returned the next time
Expand Down Expand Up @@ -1414,15 +1457,20 @@ function testcase_preamble

function run_tests
{
testcase="verify_generation"
testcase_preamble $testcase
verify_generation
testcase_finish $testcase

testcase="phys_rep_nomatch"
testcase_preamble $testcase
phys_rep_nomatch ${REPL_META_DBNAME} ${REPL_META_HOST}
testcase_finsh $testcase
testcase_finish $testcase

testcase="tranlog_timeout"
testcase_preamble $testcase
tranlog_timeout ${REPL_META_DBNAME} ${REPL_META_HOST}
testcase_finsh $testcase
testcase_finish $testcase

testcase="revconn_latency $lastNode"
testcase_preamble $testcase
Expand Down Expand Up @@ -1475,6 +1523,17 @@ function run_tests
testcase_preamble $testcase
match_overlap
testcase_finish $testcase

# TODO: skip-recovery seems to cause corruption for physreps- disable for now
#testcase="verify_skip_recovery"
#testcase_preamble $testcase
#verify_skip_recovery
#testcase_finish $testcase

testcase="verify_revconn_fix"
testcase_preamble $testcase
verify_revconn_fix
testcase_finish $testcase
}

run_tests
Expand Down
4 changes: 2 additions & 2 deletions tests/tunables.test/t00_all_tunables.expected
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
(name='blobmem_sz_thresh_kb', description='Sets the threshold (in KB) above which blobs are allocated by the blob allocator. (Default: 0)', type='INTEGER', value='-1', read_only='Y')
(name='blobstripe', description='', type='BOOLEAN', value='ON', read_only='Y')
(name='blocking_latches', description='Block on latch rather than deadlock', type='BOOLEAN', value='OFF', read_only='N')
(name='blocking_physrep', description='Physical replicant blocks on select. (Default: false)', type='BOOLEAN', value='OFF', read_only='N')
(name='blocking_physrep', description='Physical replicant blocks on select. (Default: off)', type='BOOLEAN', value='OFF', read_only='N')
(name='broadcast_check_rmtpol', description='Check rmtpol before sending triggers', type='BOOLEAN', value='ON', read_only='N')
(name='broken_max_rec_sz', description='', type='INTEGER', value='0', read_only='Y')
(name='broken_num_parser', description='', type='BOOLEAN', value='OFF', read_only='Y')
Expand Down Expand Up @@ -712,8 +712,8 @@
(name='physrep_max_rollback', description='Maximum logs physrep can rollback. (Default: 0)', type='INTEGER', value='0', read_only='N')
(name='physrep_metadb_host', description='List of physical replication metadb cluster hosts.', type='STRING', value=NULL, read_only='Y')
(name='physrep_metadb_name', description='Physical replication metadb cluster name.', type='STRING', value=NULL, read_only='Y')
(name='physrep_reconnect_interval', description='Reconnect interval for physical replicants (Default: 600)', type='INTEGER', value='600', read_only='N')
(name='physrep_reconnect_penalty', description='Physrep wait seconds before retry to the same node. (Default: 5)', type='INTEGER', value='0', read_only='N')
(name='physrep_register_interval', description='Interval for physical replicant re-registration. (Default: 600)', type='INTEGER', value='600', read_only='N')
(name='physrep_repl_host', description='Current physrep host.', type='STRING', value=NULL, read_only='Y')
(name='physrep_repl_name', description='Current physrep parent.', type='STRING', value=NULL, read_only='Y')
(name='physrep_revconn_check_interval', description='Physrep recheck revconn interval. (Default: 60)', type='INTEGER', value='60', read_only='N')
Expand Down

0 comments on commit ae9cab4

Please sign in to comment.