From 245ed7eed225a93571ca1a2359aafa55d6f20da9 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Fri, 21 Jul 2023 10:16:53 +0100 Subject: [PATCH 1/2] backupccl: possibly deflake TestBackupRestoreAppend In CI we've seen this test fail with: backup_test.go:670: error scanning '&{ 0xc019c5b580}': pq: restart transaction: TransactionRetryWithProtoRefreshError: TransactionRetryError: retry txn (RETRY_SERIALIZABLE - failed preemptive refresh): "sql txn" meta={id=77054b51 key=/Table/121/1 pri=0.03587869 epo=0 ts=1689916873.568949604,1 min=1689916873.436580640,0 seq=1000} lock=true stat=PENDING rts=1689916873.436580640,0 wto=false gul=1689916873.936580640,0 The `RETURNING` clauses on these two `UPDATE` statements prevent automatic transaction retries. Here we wrap the queries in an explicit transaction with a retry loop which should prevent the test failure test, assuming that the update isn't so contended it won't ever complete. I am unable to stress this enough locally to reproduce this error. Probably Fixes #107330 Epic: none Release note: None --- pkg/ccl/backupccl/backup_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 2b9942504cdb..4d4a863e9ee5 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -671,15 +671,18 @@ func TestBackupRestoreAppend(t *testing.T) { sqlDB.ExpectErr(t, "A full backup cannot be written to \"/subdir\", a user defined subdirectory", "BACKUP INTO $4 IN ($1, $2, $3) AS OF SYSTEM TIME "+tsBefore, append(test.collectionsWithSubdir, specifiedSubdir)...) - sqlDB.QueryRow(t, "UPDATE data.bank SET balance = 100 RETURNING cluster_logical_timestamp()").Scan(&ts1) + sqlDB.RunWithRetriableTxn(t, func(txn *gosql.Tx) error { + return txn.QueryRow("UPDATE data.bank SET balance = 100 RETURNING cluster_logical_timestamp()").Scan(&ts1) + }) sqlDB.Exec(t, "BACKUP INTO LATEST IN ($1, $2, $3) AS OF SYSTEM TIME "+ts1, test.collections...) // Append to latest again, just to prove we can append to an appended one and // that appended didn't e.g. mess up LATEST. sqlDB.QueryRow(t, "SELECT cluster_logical_timestamp()").Scan(&ts1again) sqlDB.Exec(t, "BACKUP INTO LATEST IN ($1, $2, $3) AS OF SYSTEM TIME "+ts1again, test.collections...) - - sqlDB.QueryRow(t, "UPDATE data.bank SET balance = 200 RETURNING cluster_logical_timestamp()").Scan(&ts2) + sqlDB.RunWithRetriableTxn(t, func(txn *gosql.Tx) error { + return txn.QueryRow("UPDATE data.bank SET balance = 200 RETURNING cluster_logical_timestamp()").Scan(&ts2) + }) rowsTS2 := sqlDB.QueryStr(t, "SELECT * from data.bank ORDER BY id") // Start a new full-backup in the collection version. From 7f60f22b095c4883268931a4f3d66e7a81b59669 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Fri, 21 Jul 2023 11:51:07 +0100 Subject: [PATCH 2/2] backupccl: skip TestBackupRestoreJobTagAndLabel under race This test fails under race when running in a tenant occasionally. Requires further investigation. Informs: #107336 Release note: None --- pkg/ccl/backupccl/backup_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 2b9942504cdb..7e28baa56b04 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -219,6 +219,7 @@ func TestBackupRestoreMultiNodeRemote(t *testing.T) { func TestBackupRestoreJobTagAndLabel(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + skip.UnderRaceWithIssue(t, 107336) const numAccounts = 1000 ctx := context.Background()