Skip to content

Commit

Permalink
gcjob: handle missing tables when refreshing status
Browse files Browse the repository at this point in the history
Fixes #50344.

Release note (bug fix): Fixed a bug whereby gc jobs for tables dropped as
part of a DROP DATABASE CASCADE might never complete.
  • Loading branch information
ajwerner authored and pbardea committed Sep 11, 2020
1 parent f990441 commit 53bc491
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 11 deletions.
5 changes: 5 additions & 0 deletions pkg/sql/gcjob/gc_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ func (r schemaChangeGCResumer) Resume(
return nil
}
expired, earliestDeadline = refreshTables(ctx, execCfg, remainingTables, tableDropTimes, indexDropTimes, r.jobID, progress)

if isDoneGC(progress) {
return nil
}

timerDuration := time.Until(earliestDeadline)
if expired {
timerDuration = 0
Expand Down
31 changes: 20 additions & 11 deletions pkg/sql/gcjob/refresh_statuses.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
)

var maxDeadline = timeutil.Unix(0, math.MaxInt64)

// refreshTables updates the status of tables/indexes that are waiting to be
// GC'd.
// It returns whether or not any index/table has expired and the duration until
Expand All @@ -43,25 +46,24 @@ func refreshTables(
jobID int64,
progress *jobspb.SchemaChangeGCProgress,
) (expired bool, earliestDeadline time.Time) {
earliestDeadline = timeutil.Unix(0, math.MaxInt64)

earliestDeadline = maxDeadline
var haveAnyMissing bool
for _, tableID := range tableIDs {
tableHasExpiredElem, deadline := updateStatusForGCElements(
tableHasExpiredElem, tableIsMissing, deadline := updateStatusForGCElements(
ctx,
execCfg,
tableID,
tableDropTimes, indexDropTimes,
progress,
)
if tableHasExpiredElem {
expired = true
}
expired = expired || tableHasExpiredElem
haveAnyMissing = haveAnyMissing || tableIsMissing
if deadline.Before(earliestDeadline) {
earliestDeadline = deadline
}
}

if expired {
if expired || haveAnyMissing {
persistProgress(ctx, execCfg, jobID, progress)
}

Expand All @@ -72,15 +74,17 @@ func refreshTables(
// are waiting for GC. If the table is waiting for GC then the status of the table
// will be updated.
// It returns whether any indexes or the table have expired as well as the time
// until the next index expires if there are any more to drop.
// until the next index expires if there are any more to drop. It also returns
// whether the table descriptor is missing indicating that it was gc'd by
// another job, in which case the progress will have been updated.
func updateStatusForGCElements(
ctx context.Context,
execCfg *sql.ExecutorConfig,
tableID sqlbase.ID,
tableDropTimes map[sqlbase.ID]int64,
indexDropTimes map[sqlbase.IndexID]int64,
progress *jobspb.SchemaChangeGCProgress,
) (expired bool, timeToNextTrigger time.Time) {
) (expired, missing bool, timeToNextTrigger time.Time) {
defTTL := execCfg.DefaultZoneConfig.GC.TTLSeconds
cfg := execCfg.Gossip.GetSystemConfig()
protectedtsCache := execCfg.ProtectedTimestampProvider
Expand Down Expand Up @@ -124,11 +128,16 @@ func updateStatusForGCElements(

return nil
}); err != nil {
if errors.Is(err, sqlbase.ErrDescriptorNotFound) {
log.Warningf(ctx, "table %d not found, marking as GC'd", tableID)
markTableGCed(ctx, tableID, progress)
return false, true, maxDeadline
}
log.Warningf(ctx, "error while calculating GC time for table %d, err: %+v", tableID, err)
return false, earliestDeadline
return false, false, maxDeadline
}

return expired, earliestDeadline
return expired, false, earliestDeadline
}

// updateTableStatus sets the status the table to DELETING if the GC TTL has
Expand Down
67 changes: 67 additions & 0 deletions pkg/sql/gcjob_test/gc_job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/gcjob"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/jobutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -230,3 +232,68 @@ func TestSchemaChangeGCJob(t *testing.T) {
}
}
}

func TestSchemaChangeGCJobTableGCdWhileWaitingForExpiration(t *testing.T) {
defer leaktest.AfterTest(t)()

defer func(oldAdoptInterval, oldGCInterval time.Duration) {
jobs.DefaultAdoptInterval = oldAdoptInterval
}(jobs.DefaultAdoptInterval, gcjob.MaxSQLGCInterval)
jobs.DefaultAdoptInterval = 100 * time.Millisecond

// We're going to drop a table then manually delete it, then update the
// database zone config and ensure the job finishes successfully.
s, db, kvDB := serverutils.StartServer(t, base.TestServerArgs{})
ctx := context.Background()
defer s.Stopper().Stop(ctx)
sqlDB := sqlutils.MakeSQLRunner(db)

// Note: this is to avoid a common failure during shutdown when a range
// merge runs concurrently with node shutdown leading to a panic due to
// pebble already being closed. See #51544.
sqlDB.Exec(t, "SET CLUSTER SETTING kv.range_merge.queue_enabled = false")

sqlDB.Exec(t, "CREATE DATABASE db")
sqlDB.Exec(t, "CREATE TABLE db.foo ()")
var dbID, tableID descpb.ID
sqlDB.QueryRow(t, `
SELECT parent_id, table_id
FROM crdb_internal.tables
WHERE database_name = $1 AND name = $2;
`, "db", "foo").Scan(&dbID, &tableID)
sqlDB.Exec(t, "DROP TABLE db.foo")

// Now we should be able to find our GC job
var jobID int64
var status jobs.Status
sqlDB.QueryRow(t, `
SELECT job_id, status
FROM crdb_internal.jobs
WHERE description LIKE 'GC for DROP TABLE db.public.foo';
`).Scan(&jobID, &status)
require.Equal(t, jobs.StatusRunning, status)

// Manually delete the table.
require.NoError(t, kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
nameKey := sqlbase.MakeNameMetadataKey(keys.SystemSQLCodec, dbID, keys.PublicSchemaID, "foo")
if err := txn.Del(ctx, nameKey); err != nil {
return err
}
descKey := sqlbase.MakeDescMetadataKey(keys.SystemSQLCodec, tableID)
return txn.Del(ctx, descKey)
}))
// Update the GC TTL to tickle the job to refresh the status and discover that
// it has been removed. Use a SucceedsSoon to deal with races between setting
// the zone config and when the job subscribes to the zone config.
var i int
testutils.SucceedsSoon(t, func() error {
i++
sqlDB.Exec(t, "ALTER DATABASE db CONFIGURE ZONE USING gc.ttlseconds = 60 * 60 * 25 + $1", i)
var status jobs.Status
sqlDB.QueryRow(t, "SELECT status FROM [SHOW JOB $1]", jobID).Scan(&status)
if status != jobs.StatusSucceeded {
return errors.Errorf("job status %v != %v", status, jobs.StatusSucceeded)
}
return nil
})
}

0 comments on commit 53bc491

Please sign in to comment.