Skip to content

Commit

Permalink
backupccl,spanconfig,kvserver: ExportRequest noops on ephemeral ranges
Browse files Browse the repository at this point in the history
This change is the first of two changes that gets us to the goal of backup
ignoring ephemeral table row data, and not holding up GC on these ranges.

This change does a few things:

- It sets up the transport of the ephemeral bit set on a table descriptor
via `ALTER TABLE ... SET EPHEMERAL DATA`, to the span configuration applied
in KV.

- It teaches ExportRequest on a range marked as ephemeral to return
an empty ExportResponse. In this way, a backup processor will receive no row
data to backup up for an ephemeral table.

- A follow up change will also teach the SQLTranslator
to not populate the protected timestamp field on the SpanConfig for ephemeral
tables. This way, a long running backup will not hold up GC on such high-churn
tables. With no protection on ephemeral ranges, it is possible that an
ExportRequest targetting an ephemeral range has a StartTime
below the range's GCThreshold. To avoid the returned BatchTimestampBeforeGCError
from failing the backup we decorate the the error with information about the
range being ephemeral and handle the error in the backup processor.

Informs: cockroachdb#73536

Release note (sql change): BACKUP of a table marked as `ephemeral` via
`ALTER TABLE ... SET EPHEMERAL DATA` will no longer backup that table's row
data. The backup will continue to backup the table's descriptor and related
metadata, and so on restore we will end up with an empty version of the backed
up table.
  • Loading branch information
adityamaru committed Jan 24, 2022
1 parent 54ac240 commit 9439375
Show file tree
Hide file tree
Showing 12 changed files with 294 additions and 2 deletions.
9 changes: 9 additions & 0 deletions pkg/ccl/backupccl/backup_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,15 @@ func runBackupProcessor(
if errors.HasType(exportRequestErr, (*contextutil.TimeoutError)(nil)) {
return errors.Wrap(exportRequestErr, "export request timeout")
}
// BatchTimestampBeforeGCError is returned if the ExportRequest
// attempts to read below the range's GC threshold.
if batchTimestampBeforeGCError, ok := pErr.GetDetail().(*roachpb.BatchTimestampBeforeGCError); ok {
// If the range we are exporting is marked as ephemeral, we do not
// want to back this up and so it is safe to ignore the error.
if batchTimestampBeforeGCError.EphemeralData {
continue
}
}
return errors.Wrapf(exportRequestErr, "exporting %s", span.span)
}

Expand Down
113 changes: 113 additions & 0 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv"
Expand Down Expand Up @@ -9191,3 +9192,115 @@ func TestBackupRestoreSeparateIncrementalPrefix(t *testing.T) {
sqlDB.Exec(t, "DROP DATABASE inc_fkdb;")
}
}

func TestEphemeralBackupAndRestore(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

_, sqlDB, iodir, cleanupFn := backupRestoreTestSetupWithParams(t, singleNode, 10,
InitManualReplication, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
Knobs: base.TestingKnobs{
JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), // speeds up test
SpanConfig: &spanconfig.TestingKnobs{
SQLWatcherCheckpointNoopsEveryDurationOverride: 100 * time.Millisecond,
},
},
},
})
defer cleanupFn()

_, restoreDB, cleanup := backupRestoreTestSetupEmpty(t, singleNode, iodir, InitManualReplication,
base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
Knobs: base.TestingKnobs{
JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), // speeds up test
},
},
})
defer cleanup()
restoreDB.Exec(t, `CREATE DATABASE data`)

sqlDB.Exec(t, `SET CLUSTER SETTING kv.rangefeed.enabled = true`)
sqlDB.Exec(t, `SET CLUSTER SETTING kv.closed_timestamp.target_duration = '100ms'`)

sqlDB.Exec(t, `CREATE TABLE data.foo (id INT, INDEX bar(id))`)
sqlDB.Exec(t, `INSERT INTO data.foo select * from generate_series(1,10)`)

checkNumRestoredRows := func(backupDir string, retryAttempt, expectedRows int) error {
backupPath := fmt.Sprintf("%s/%d", backupDir, retryAttempt)
defer func() {
restoreDB.Exec(t, `DROP TABLE IF EXISTS data.foo`)
}()

sqlDB.Exec(t, `BACKUP TABLE data.foo TO $1`, backupPath)
restoreDB.Exec(t, `RESTORE TABLE data.foo FROM $1`, backupPath)

res := restoreDB.QueryStr(t, `SELECT count(*) FROM data.foo`)
numRows, err := strconv.Atoi(res[0][0])
require.NoError(t, err)
if numRows != expectedRows {
return errors.Newf("expected %d rows, but found %d", expectedRows, numRows)
}
return nil
}

// Set table to ephemeral and back it up. The ExportRequest should be a noop
// and backup no data.
sqlDB.Exec(t, `ALTER TABLE data.foo SET EPHEMERAL DATA`)

var retryAttempt int
testutils.SucceedsSoon(t, func() error {
defer func() {
retryAttempt++
}()
return checkNumRestoredRows(LocalFoo, retryAttempt, 0)
})

// Set table to non-ephemeral and backup the table once again. This time the
// restored table should have all the rows.
sqlDB.Exec(t, `ALTER TABLE foo SET NOT EPHEMERAL DATA`)

nonEphemeralBackupDir := fmt.Sprintf("%s/nonephemeral", LocalFoo)
testutils.SucceedsSoon(t, func() error {
defer func() {
retryAttempt++
}()
return checkNumRestoredRows(nonEphemeralBackupDir, retryAttempt, 10)
})
}

func TestEphemeralExportRequestBelowGCThreshold(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
ctx := context.Background()
var failed atomic.Value
failed.Store(false)
params := base.TestServerArgs{}
localExternalDir, cleanup := testutils.TempDir(t)
defer cleanup()
params.ExternalIODir = localExternalDir
params.Knobs.JobsTestingKnobs = jobs.NewTestingKnobsWithShortIntervals()
params.Knobs.Store = &kvserver.StoreTestingKnobs{
TestingRequestFilter: func(ctx context.Context, request roachpb.BatchRequest) *roachpb.Error {
_, ok := request.GetArg(roachpb.Export)
if !ok {
return nil
}
if failed.Load().(bool) {
return nil
}
failed.Store(true)
return roachpb.NewError(&roachpb.BatchTimestampBeforeGCError{
Timestamp: hlc.Timestamp{},
Threshold: hlc.Timestamp{},
EphemeralData: true,
})
},
}
s, db, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)
tdb := sqlutils.MakeSQLRunner(db)
tdb.Exec(t, "CREATE TABLE foo (i INT PRIMARY KEY)")
tdb.Exec(t, "BACKUP TABLE foo TO $1", LocalFoo)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
exec-sql
CREATE DATABASE db;
CREATE TABLE db.t1();
CREATE TABLE db.t2();
----

query-sql
SELECT id FROM system.namespace WHERE name='t1'
----
56

query-sql
SELECT id FROM system.namespace WHERE name='t2'
----
57

# We only expect there to be span config entries for tables t1 and t2.
translate database=db
----
/Table/5{6-7} range default
/Table/5{7-8} range default

# Alter table t1 to mark its data ephemeral.
exec-sql
ALTER TABLE db.t1 SET EPHEMERAL DATA
----

translate database=db
----
/Table/5{6-7} ephemeral_data=true
/Table/5{7-8} range default

# Translating the tables in the database individually should result in the same
# config as above.

translate database=db table=t1
----
/Table/5{6-7} ephemeral_data=true

translate database=db table=t2
----
/Table/5{7-8} range default

# Alter table t1 to unmark its data ephemeral.
exec-sql
ALTER TABLE db.t1 SET NOT EPHEMERAL DATA
----

translate database=db
----
/Table/5{6-7} range default
/Table/5{7-8} range default
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
exec-sql
CREATE DATABASE db;
CREATE TABLE db.t1();
CREATE TABLE db.t2();
----

query-sql
SELECT id FROM system.namespace WHERE name='t1'
----
56

query-sql
SELECT id FROM system.namespace WHERE name='t2'
----
57

# We only expect there to be span config entries for tables t1 and t2.
translate database=db
----
/Tenant/10/Table/5{6-7} range default
/Tenant/10/Table/5{7-8} range default

# Alter table t1 to mark its data ephemeral.
exec-sql
ALTER TABLE db.t1 SET EPHEMERAL DATA
----

translate database=db
----
/Tenant/10/Table/5{6-7} ephemeral_data=true
/Tenant/10/Table/5{7-8} range default

# Translating the tables in the database individually should result in the same
# config as above.

translate database=db table=t1
----
/Tenant/10/Table/5{6-7} ephemeral_data=true

translate database=db table=t2
----
/Tenant/10/Table/5{7-8} range default

# Alter table t1 to unmark its data ephemeral.
exec-sql
ALTER TABLE db.t1 SET NOT EPHEMERAL DATA
----

translate database=db
----
/Tenant/10/Table/5{6-7} range default
/Tenant/10/Table/5{7-8} range default
12 changes: 12 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_export.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -110,6 +111,17 @@ func evalExport(
}
evalExportSpan.RecordStructured(&evalExportTrace)

// Table's with ephemeral data are expected to be configured with a short GC
// TTL. Additionally, backup excludes such ephemeral table's from being
// protected from GC when writing its ProtectedTimestamp record. The
// ExportRequest is likely to find its target data has been GC'ed at this
// point, and so if the range being exported is marked as ephemeral, we do not
// want to send back any row data to be backed up.
if cArgs.EvalCtx.IsEphemeralData() {
log.Infof(ctx, "[%s, %s) marked as ephemeral, returning empty ExportResponse", args.Key, args.EndKey)
return result.Result{}, nil
}

if !args.ReturnSST {
return result.Result{}, errors.New("ReturnSST is required")
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/batcheval/eval_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ type EvalContext interface {
GetLastSplitQPS() float64

GetGCThreshold() hlc.Timestamp
IsEphemeralData() bool
GetLastReplicaGCTimestamp(context.Context) (hlc.Timestamp, error)
GetLease() (roachpb.Lease, roachpb.Lease)
GetRangeInfo(context.Context) roachpb.RangeInfo
Expand Down Expand Up @@ -231,6 +232,9 @@ func (m *mockEvalCtxImpl) CanCreateTxnRecord(
func (m *mockEvalCtxImpl) GetGCThreshold() hlc.Timestamp {
return m.GCThreshold
}
func (m *mockEvalCtxImpl) IsEphemeralData() bool {
return false
}
func (m *mockEvalCtxImpl) GetLastReplicaGCTimestamp(context.Context) (hlc.Timestamp, error) {
panic("unimplemented")
}
Expand Down
16 changes: 14 additions & 2 deletions pkg/kv/kvserver/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,17 @@ func (r *Replica) GetGCThreshold() hlc.Timestamp {
return *r.mu.state.GCThreshold
}

// IsEphemeralData returns whether the replica is marked as ephemeral.
func (r *Replica) IsEphemeralData() bool {
r.mu.RLock()
defer r.mu.RUnlock()
return r.mu.conf.EphemeralData
}

func (r *Replica) isEphemeralRLocked() bool {
return r.mu.conf.EphemeralData
}

// Version returns the replica version.
func (r *Replica) Version() roachpb.Version {
if r.mu.state.Version == nil {
Expand Down Expand Up @@ -1503,8 +1514,9 @@ func (r *Replica) checkTSAboveGCThresholdRLocked(
return nil
}
return &roachpb.BatchTimestampBeforeGCError{
Timestamp: ts,
Threshold: threshold,
Timestamp: ts,
Threshold: threshold,
EphemeralData: r.isEphemeralRLocked(),
}
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/kv/kvserver/replica_eval_context_span.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ func (rec SpanSetReplicaEvalContext) GetGCThreshold() hlc.Timestamp {
return rec.i.GetGCThreshold()
}

// IsEphemeralData returns whether the replica is marked as ephemeral.
func (rec SpanSetReplicaEvalContext) IsEphemeralData() bool {
return rec.i.IsEphemeralData()
}

// String implements Stringer.
func (rec SpanSetReplicaEvalContext) String() string {
return rec.i.String()
Expand Down
3 changes: 3 additions & 0 deletions pkg/roachpb/errors.proto
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,9 @@ message IntegerOverflowError {
message BatchTimestampBeforeGCError {
optional util.hlc.Timestamp Timestamp = 1 [(gogoproto.nullable) = false];
optional util.hlc.Timestamp Threshold = 2 [(gogoproto.nullable) = false];
// EphemeralData is set to true if the request is targeting a range that has
// been marked as ephemeral via `ALTER TABLE ... SET EPHEMERAL DATA.
optional bool ephemeral_data = 3 [(gogoproto.nullable) = false];
}

// An IntentMissingError indicates that a QueryIntent request expected
Expand Down
8 changes: 8 additions & 0 deletions pkg/roachpb/span_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,14 @@ message SpanConfig {
// preferred option to least. The first preference that an existing replica of
// a range matches will take priority for the lease.
repeated LeasePreference lease_preferences = 9 [(gogoproto.nullable) = false];
// EphemeralData specifies if the range has been marked as ephemeral.
// Ephemeral data is assumed to have a short GC.TTL, and is therefore handled
// specially for certain operations that rely on reading revisions. egs:
// BACKUP. Refer to the comment above `TableDescriptor.ephemeral` in
// pkg/sql/catalog/descpb/structured.proto.
bool ephemeral_data = 10;

// Next ID: 11
}

// SpanConfigEntry ties a span to its corresponding config.
Expand Down
19 changes: 19 additions & 0 deletions pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,23 @@ func (s *SQLTranslator) generateSpanConfigurationsForNamedZone(
return entries, nil
}

// This method mutates the passed in `entries`.
func setEphemeralDataForTable(entries *[]roachpb.SpanConfigEntry, desc catalog.TableDescriptor) {
for i := range *entries {
(*entries)[i].Config.EphemeralData = desc.IsEphemeral()
}
}

// hydrateSpanConfigurationsForTable hydrates fields in a table's span
// configurations that are not derived from the table's zone configuration.
//
// This method mutates the passed in `entries`.
func hydrateSpanConfigurationsForTable(
entries *[]roachpb.SpanConfigEntry, desc catalog.Descriptor,
) {
setEphemeralDataForTable(entries, desc.(catalog.TableDescriptor))
}

// generateSpanConfigurationsForTable generates the span configurations
// corresponding to the given tableID. It uses a transactional view of
// system.zones and system.descriptors to do so.
Expand Down Expand Up @@ -279,6 +296,7 @@ func (s *SQLTranslator) generateSpanConfigurationsForTable(
})
}

hydrateSpanConfigurationsForTable(&entries, desc)
return entries, nil

// TODO(irfansharif): There's an attack vector here that we haven't
Expand Down Expand Up @@ -363,6 +381,7 @@ func (s *SQLTranslator) generateSpanConfigurationsForTable(
},
)
}
hydrateSpanConfigurationsForTable(&entries, desc)
return entries, nil
}

Expand Down
Loading

0 comments on commit 9439375

Please sign in to comment.