Skip to content

Commit

Permalink
br: refactor test to use wait checkpoint method (pingcap#57612)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tristan1900 authored Nov 27, 2024
1 parent 0374ae2 commit 9cc4a20
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 207 deletions.
37 changes: 2 additions & 35 deletions br/tests/br_encryption/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,39 +59,6 @@ insert_additional_data() {
done
}

wait_log_checkpoint_advance() {
echo "wait for log checkpoint to advance"
sleep 10
local current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)")
echo "current ts: $current_ts"
i=0
while true; do
# extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty
log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name $TASK_NAME --json 2>br.log)
echo "log backup status: $log_backup_status"
local checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end')
echo "checkpoint ts: $checkpoint_ts"

# check whether the checkpoint ts is a number
if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then
if [ $checkpoint_ts -gt $current_ts ]; then
echo "the checkpoint has advanced"
break
fi
echo "the checkpoint hasn't advanced"
i=$((i+1))
if [ "$i" -gt 50 ]; then
echo 'the checkpoint lag is too large'
exit 1
fi
sleep 10
else
echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!"
exit 1
fi
done
}

calculate_checksum() {
local db=$1
local checksum=$(run_sql "USE $db; ADMIN CHECKSUM TABLE $TABLE;" | awk '/CHECKSUM/{print $2}')
Expand Down Expand Up @@ -170,7 +137,7 @@ run_backup_restore_test() {
checksum_ori[${i}]=$(calculate_checksum "$DB${i}") || { echo "Failed to calculate checksum after insertion"; exit 1; }
done

wait_log_checkpoint_advance || { echo "Failed to wait for log checkpoint"; exit 1; }
. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME || { echo "Failed to wait for log checkpoint"; exit 1; }

#sanity check pause still works
run_br log pause --task-name $TASK_NAME --pd $PD_ADDR || { echo "Failed to pause log backup"; exit 1; }
Expand Down Expand Up @@ -270,7 +237,7 @@ test_backup_encrypted_restore_unencrypted() {
# Insert additional test data
insert_additional_data "insert_after_full_backup" || { echo "Failed to insert additional data"; exit 1; }

wait_log_checkpoint_advance || { echo "Failed to wait for log checkpoint"; exit 1; }
. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME || { echo "Failed to wait for log checkpoint"; exit 1; }


# Stop and clean the cluster
Expand Down
36 changes: 3 additions & 33 deletions br/tests/br_pitr/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ CUR=$(cd `dirname $0`; pwd)
# const value
PREFIX="pitr_backup" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*.
res_file="$TEST_DIR/sql_res.$TEST_NAME.txt"
TASK_NAME="br_pitr"

# start a new cluster
echo "restart a services"
Expand All @@ -38,7 +39,7 @@ echo "prepare_delete_range_count: $prepare_delete_range_count"

# start the log backup task
echo "start log task"
run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log"
run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log"

# run snapshot backup
echo "run snapshot backup"
Expand Down Expand Up @@ -70,39 +71,8 @@ incremental_delete_range_count=$(run_sql "select count(*) DELETE_RANGE_CNT from
echo "incremental_delete_range_count: $incremental_delete_range_count"

# wait checkpoint advance
echo "wait checkpoint advance"
sleep 10
current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)")
echo "current ts: $current_ts"
i=0
while true; do
# extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty
log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log)
echo "log backup status: $log_backup_status"
checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end')
echo "checkpoint ts: $checkpoint_ts"

# check whether the checkpoint ts is a number
if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then
# check whether the checkpoint has advanced
if [ $checkpoint_ts -gt $current_ts ]; then
echo "the checkpoint has advanced"
break
fi
# the checkpoint hasn't advanced
echo "the checkpoint hasn't advanced"
i=$((i+1))
if [ "$i" -gt 50 ]; then
echo 'the checkpoint lag is too large'
exit 1
fi
sleep 10
else
# unknown status, maybe somewhere is wrong
echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!"
exit 1
fi
done
. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME

# dump some info from upstream cluster
# ...
Expand Down
38 changes: 3 additions & 35 deletions br/tests/br_pitr_failpoint/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
set -eu
. run_services
CUR=$(cd `dirname $0`; pwd)
TASK_NAME="br_pitr_failpoint"

# const value
PREFIX="pitr_backup_failpoint" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*.
Expand All @@ -42,7 +43,7 @@ sql_pid=$!

# start the log backup task
echo "start log task"
run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log"
run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log"

# wait until the index creation is running
retry_cnt=0
Expand Down Expand Up @@ -121,42 +122,9 @@ check_contains "Column_name: y"
check_contains "Column_name: z"

# wait checkpoint advance
echo "wait checkpoint advance"
sleep 10
current_ts=$(echo $(($(date +%s%3N) << 18)))
echo "current ts: $current_ts"
i=0
while true; do
# extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty
log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>/dev/null)
echo "log backup status: $log_backup_status"
checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end')
echo "checkpoint ts: $checkpoint_ts"

# check whether the checkpoint ts is a number
if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then
# check whether the checkpoint has advanced
if [ $checkpoint_ts -gt $current_ts ]; then
echo "the checkpoint has advanced"
break
fi
# the checkpoint hasn't advanced
echo "the checkpoint hasn't advanced"
i=$((i+1))
if [ "$i" -gt 50 ]; then
echo 'the checkpoint lag is too large'
exit 1
fi
sleep 10
else
# unknown status, maybe somewhere is wrong
echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!"
exit 1
fi
done
. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME

# start a new cluster
echo "restart a services"
restart_services

# PITR restore - 1
Expand Down
39 changes: 4 additions & 35 deletions br/tests/br_pitr_gc_safepoint/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ CUR=$(cd `dirname $0`; pwd)
# const value
PREFIX="pitr_backup" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*.
res_file="$TEST_DIR/sql_res.$TEST_NAME.txt"
TASK_NAME="br_pitr_gc_safepoint"

# start a new cluster
echo "restart a services"
restart_services

# start the log backup task
echo "start log task"
run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log"
run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log"

# prepare the data
echo "prepare the data"
Expand All @@ -41,41 +42,9 @@ prepare_delete_range_count=$(run_sql "select count(*) DELETE_RANGE_CNT from (sel
echo "prepare_delete_range_count: $prepare_delete_range_count"

# wait checkpoint advance
echo "wait checkpoint advance"
sleep 10
current_ts=$(echo $(($(date +%s%3N) << 18)))
echo "current ts: $current_ts"
i=0
while true; do
# extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty
log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log)
echo "log backup status: $log_backup_status"
checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end')
echo "checkpoint ts: $checkpoint_ts"
. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME"

# check whether the checkpoint ts is a number
if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then
# check whether the checkpoint has advanced
if [ $checkpoint_ts -gt $current_ts ]; then
echo "the checkpoint has advanced"
break
fi
# the checkpoint hasn't advanced
echo "the checkpoint hasn't advanced"
i=$((i+1))
if [ "$i" -gt 50 ]; then
echo 'the checkpoint lag is too large'
exit 1
fi
sleep 10
else
# unknown status, maybe somewhere is wrong
echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!"
exit 1
fi
done

run_br --pd $PD_ADDR log pause --task-name integration_test
run_br --pd $PD_ADDR log pause --task-name $TASK_NAME

safe_point=$(run_pd_ctl -u https://$PD_ADDR service-gc-safepoint)

Expand Down
38 changes: 3 additions & 35 deletions br/tests/br_restore_checkpoint/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ CUR=$(cd `dirname $0`; pwd)
PREFIX="checkpoint" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*.
DB=$TEST_NAME
res_file="$TEST_DIR/sql_res.$TEST_NAME.txt"
TASK_NAME="br_restore_checkpoint"

# start a new cluster
echo "restart a services"
restart_services

# prepare snapshot data
Expand All @@ -37,7 +37,7 @@ run_sql "INSERT INTO $DB.tbl2 values (2, 'b');"

# start the log backup task
echo "start log task"
run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log"
run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log"

# run snapshot backup
echo "run snapshot backup"
Expand All @@ -53,41 +53,9 @@ run_sql "INSERT INTO $DB.tbl3 values (33, 'cc');"

# wait checkpoint advance
echo "wait checkpoint advance"
sleep 10
current_ts=$(echo $(($(date +%s%3N) << 18)))
echo "current ts: $current_ts"
i=0
while true; do
# extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty
log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log)
echo "log backup status: $log_backup_status"
checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end')
echo "checkpoint ts: $checkpoint_ts"

# check whether the checkpoint ts is a number
if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then
# check whether the checkpoint has advanced
if [ $checkpoint_ts -gt $current_ts ]; then
echo "the checkpoint has advanced"
break
fi
# the checkpoint hasn't advanced
echo "the checkpoint hasn't advanced"
i=$((i+1))
if [ "$i" -gt 50 ]; then
echo 'the checkpoint lag is too large'
exit 1
fi
sleep 10
else
# unknown status, maybe somewhere is wrong
echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!"
exit 1
fi
done
. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME

# start a new cluster
echo "restart a services"
restart_services

# PITR but failed in the snapshot restore stage
Expand Down
51 changes: 51 additions & 0 deletions br/tests/br_test_utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/sh
#
# Copyright 2024 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -eux

wait_log_checkpoint_advance() {
local task_name=${1:-$TASK_NAME}
echo "wait for log checkpoint to advance for task: $task_name"
sleep 10
local current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)")
echo "current ts: $current_ts"
i=0
while true; do
# extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty
log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name $task_name --json 2>br.log)
echo "log backup status: $log_backup_status"
local checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end')
echo "checkpoint ts: $checkpoint_ts"

# check whether the checkpoint ts is a number
if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then
if [ $checkpoint_ts -gt $current_ts ]; then
echo "the checkpoint has advanced"
break
fi
echo "the checkpoint hasn't advanced"
i=$((i+1))
if [ "$i" -gt 50 ]; then
echo 'the checkpoint lag is too large'
exit 1
fi
sleep 10
else
echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!"
exit 1
fi
done
}
Loading

0 comments on commit 9cc4a20

Please sign in to comment.