Skip to content

Commit

Permalink
Merge #70299
Browse files Browse the repository at this point in the history
70299: sql: TestTenantTempTableCleanup design leads to stress failures r=fqazi a=fqazi

Fixes: #70292

Previously, TestTenantTempTableCleanup was relying on
waiting on a certain amount of time to ensure that the
temporary object cleaner ran. This was inadequate because
there is potential for the temporary object cleaner to
run slower under stress. To address this, this patch
updates the test to ensure exact timing using channels
and testing knobs.

Release note: None

Co-authored-by: Faizan Qazi <[email protected]>
  • Loading branch information
craig[bot] and fqazi committed Sep 20, 2021
2 parents 5fb1f14 + d521967 commit d927893
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 33 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/testccl/sqlccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ go_test(
"//pkg/util/randutil",
"//pkg/util/retry",
"//pkg/util/stop",
"//pkg/util/timeutil",
"//pkg/util/syncutil",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//require",
],
Expand Down
122 changes: 90 additions & 32 deletions pkg/ccl/testccl/sqlccl/temp_table_clean_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ package sqlccl_test

import (
"context"
"reflect"
"testing"
"time"

Expand All @@ -23,7 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
)

func TestTenantTempTableCleanup(t *testing.T) {
Expand All @@ -38,22 +37,80 @@ func TestTenantTempTableCleanup(t *testing.T) {
// nodes death.
slinstance.DefaultTTL.Override(ctx, &settings.SV, 5*time.Second)
slinstance.DefaultHeartBeat.Override(ctx, &settings.SV, time.Second)

// Knob state is used to track when temporary object clean up
// is executed.
var (
knobState struct {
syncutil.Mutex
paused bool
finishCh chan struct{}
}
knobStateLocked = func(f func()) {
knobState.Lock()
defer knobState.Unlock()
f()
}
// Enables pausing of temp object cleanups
// until signaled.
pause = func() {
knobStateLocked(func() {
knobState.paused, knobState.finishCh = true, make(chan struct{})
})
}
// Disables pausing of temp object cleanups.
unpause = func() {
knobStateLocked(func() {
close(knobState.finishCh)
knobState.paused, knobState.finishCh = false, nil
})
}
getPaused = func() (paused bool) {
knobStateLocked(func() { paused = knobState.paused })
return paused
}
getFinishCh = func() (finishCh chan struct{}) {
knobStateLocked(func() { finishCh = knobState.finishCh })
return finishCh
}
// Waits for temp object cleanup, we intentionally
// wait for two cycles which will guarantee that any
// temp object is destroyed.
waitForCleanup = func() {
getFinishCh() <- struct{}{}
getFinishCh() <- struct{}{}
}
)
tenantTempKnobSettings := base.TestingKnobs{
SQLExecutor: &sql.ExecutorTestingKnobs{
OnTempObjectsCleanupDone: func() {
// If pausing is disabled, we don't need
// to inform anyone.
if !getPaused() {
return
}
// Indicate that a cleanup has occurred.
<-getFinishCh()
},
},
}
tc := serverutils.StartNewTestCluster(
t, 3 /* numNodes */, base.TestClusterArgs{ReplicationMode: base.ReplicationManual,
ServerArgs: base.TestServerArgs{
Settings: settings,
}},
},
},
)
log.TestingClearServerIdentifiers()
tenantStoppers := []*stop.Stopper{stop.NewStopper(), stop.NewStopper()}
_, tenantPrimaryDB := serverutils.StartTenant(t, tc.Server(0),
base.TestTenantArgs{
TenantID: serverutils.TestTenantID(),
Settings: settings,
Stopper: tenantStoppers[0],
TenantID: serverutils.TestTenantID(),
Settings: settings,
TestingKnobs: tenantTempKnobSettings,
Stopper: tenantStoppers[0],
})
tenantSQL := sqlutils.MakeSQLRunner(tenantPrimaryDB)
pause()

// Sanity: Temporary table clean up works fine, we are going
// to start two nodes and close the kill one of
Expand All @@ -78,50 +135,51 @@ func TestTenantTempTableCleanup(t *testing.T) {
})
tenantSecondSQL.Exec(t, "SET experimental_enable_temp_tables = 'on'")
tenantSecondSQL.Exec(t, "CREATE TEMP TABLE temp_table2 (x INT PRIMARY KEY, y INT);")
waitForCleanup()
tenantSecondSQL.CheckQueryResults(t, "SELECT table_name FROM [SHOW TABLES]",
[][]string{
{"temp_table"},
{"temp_table2"},
})
// Stop the second node, we should one be left with a single temp table.
tenantStoppers[1].Stop(ctx)
// Session should expire in 5 seconds, but
// we will wait for up to 15 seconds.
{
tEnd := timeutil.Now().Add(time.Second * 15)
found := false
lastResult := [][]string{}
expectedResult := [][]string{
// Session should expire in 5 seconds, wait for
// two clean up cycles just in case, so that we have
// stable timing.
waitForCleanup()
tenantSQL.CheckQueryResults(t, "SELECT table_name FROM [SHOW TABLES]",
[][]string{
{"temp_table"},
}
for tEnd.After(timeutil.Now()) {
lastResult = tenantSQL.QueryStr(t, "SELECT table_name FROM [SHOW TABLES]")
if reflect.DeepEqual(lastResult, expectedResult) {
found = true
break
}
}
if !found {
log.Fatalf(ctx, "temporary table was not correctly dropped, expected %v and got %v", expectedResult, lastResult)
}
}

})
// Disable our hook to allow the database to be
// brought down.
unpause()
// Close the primary DB, there should be no temporary
// tables left.
tenantPrimaryDB.Close()
tenantStoppers[0].Stop(ctx)
// Enable our hook to allow the database to be
// brought up.
pause()
// Prevent a logging assertion that the server ID is initialized multiple times.
log.TestingClearServerIdentifiers()
// Once we restart the tenant, no sessions should exist
// so all temporary tables should be cleaned up.
tenantStoppers[0] = stop.NewStopper()
_, tenantPrimaryDB = serverutils.StartTenant(t, tc.Server(0),
base.TestTenantArgs{
Existing: true,
TenantID: serverutils.TestTenantID(),
Settings: settings,
Stopper: tenantStoppers[0]})
Existing: true,
TenantID: serverutils.TestTenantID(),
Settings: settings,
TestingKnobs: tenantTempKnobSettings,
Stopper: tenantStoppers[0]})
tenantSQL = sqlutils.MakeSQLRunner(tenantPrimaryDB)
waitForCleanup()
tenantSQL.CheckQueryResultsRetry(t, "SELECT table_name FROM [SHOW TABLES]",
[][]string{})
// Disable our hook to allow the database to be
// brought stopped.
unpause()
// Bring down the tenants.
tenantStoppers[0].Stop(ctx)
tc.Stopper().Stop(ctx)
}

0 comments on commit d927893

Please sign in to comment.