Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtest: add backup-restore/small-ranges #112356

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 62 additions & 21 deletions pkg/cmd/roachtest/tests/backup_restore_roundtrip.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,66 @@ import (
"github.com/cockroachdb/errors"
)

var (
// maxRangeSizeBytes defines the possible non default (default is 512 MiB) maximum range
// sizes that may get set for all user databases.
maxRangeSizeBytes = []int64{4 << 20 /* 4 MiB*/, 32 << 20 /* 32 MiB */, 128 << 20}

// SystemSettingsValuesBoundOnRangeSize defines the cluster settings that
// should scale in proportion to the range size. For example, if the range
// size is halved, all the values of these cluster settings should also be
// halved.
systemSettingsScaledOnRangeSize = []string{
"backup.restore_span.target_size",
"bulkio.backup.file_size",
"kv.bulk_sst.target_size",
}
)

const numFullBackups = 5

type roundTripSpecs struct {
name string
metamorphicRangeSize bool
}

func registerBackupRestoreRoundTrip(r registry.Registry) {
// backup-restore/round-trip tests that a round trip of creating a backup and
// restoring the created backup create the same objects.
r.Add(registry.TestSpec{
Name: "backup-restore/round-trip",
Timeout: 8 * time.Hour,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(4),
EncryptionSupport: registry.EncryptionMetamorphic,
RequiresLicense: true,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Run: backupRestoreRoundTrip,
})

for _, sp := range []roundTripSpecs{
{
name: "backup-restore/round-trip",
metamorphicRangeSize: false,
},
{
name: "backup-restore/small-ranges",
metamorphicRangeSize: true,
},
} {
sp := sp
r.Add(registry.TestSpec{
Name: sp.name,
Timeout: 4 * time.Hour,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(4),
EncryptionSupport: registry.EncryptionMetamorphic,
RequiresLicense: true,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
backupRestoreRoundTrip(ctx, t, c, sp.metamorphicRangeSize)
},
})
}
}

func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) {
// backup-restore/round-trip tests that a round trip of creating a backup and
// restoring the created backup create the same objects.
func backupRestoreRoundTrip(
ctx context.Context, t test.Test, c cluster.Cluster, metamorphicRangeSize bool,
) {
if c.Spec().Cloud != spec.GCE {
t.Skip("uses gs://cockroachdb-backup-testing; see https://github.com/cockroachdb/cockroach/issues/105968")
}

pauseProbability := 0.2
roachNodes := c.Range(1, c.Spec().NodeCount-1)
workloadNode := c.Node(c.Spec().NodeCount)
Expand All @@ -62,7 +99,11 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
// Upload binaries and start cluster.
uploadVersion(ctx, t, c, c.All(), clusterupgrade.MainVersion)

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true)), roachNodes)
envOption := install.EnvOption([]string{
"COCKROACH_MIN_RANGE_MAX_BYTES=1",
})

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes)
m := c.NewMonitor(ctx, roachNodes)

m.Go(func(ctx context.Context) error {
Expand All @@ -77,24 +118,25 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
if err != nil {
return err
}

tables, err := testUtils.loadTablesForDBs(ctx, t.L(), testRNG, dbs...)
if err != nil {
return err
}

d, err := newBackupRestoreTestDriver(ctx, t, c, testUtils, roachNodes, dbs, tables)
if err != nil {
return err
}

if err := testUtils.setShortJobIntervals(ctx, testRNG); err != nil {
return err
}
if err := testUtils.setClusterSettings(ctx, t.L(), testRNG); err != nil {
return err
}

if metamorphicRangeSize {
if err := testUtils.setMaxRangeSizeAndDependentSettings(ctx, t, testRNG, dbs); err != nil {
return err
}
}
stopBackgroundCommands, err := runBackgroundWorkload()
if err != nil {
return err
Expand Down Expand Up @@ -146,7 +188,6 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
}
}
}

stopBackgroundCommands()
return nil
})
Expand Down
58 changes: 58 additions & 0 deletions pkg/cmd/roachtest/tests/mixed_version_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/jobutils"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/retry"
Expand Down Expand Up @@ -143,6 +144,14 @@ var (
"kv.bulk_io_write.max_rate": {"250MiB", "500MiB", "2TiB"},
"kv.bulk_sst.max_allowed_overage": {"16MiB", "256MiB"},
"kv.bulk_sst.target_size": {"4MiB", "64MiB", "128MiB"},
// The default is currently 384 MB, which was set to be about 75% of a
// range's worth of data. This configuration will reduce the size of this
// setting to test restore_span_covering correctness, at the cost of a
// performance dip.
//
// Note that a size of 0 indicates that target_size will not be used while
// constructing restore span entries.
"backup.restore_span.target_size": {"0 B", "4 MiB", "32 MiB", "128 MiB"},
}

systemSettingNames = func() []string {
Expand Down Expand Up @@ -1285,6 +1294,53 @@ func (u *CommonTestUtils) loadTablesForDBs(
return allTables, nil
}

// setMaxRangeSizeAndDependentSettings chooses a random default range size from
// maxRangeSize bytes and scales the cluster settings in
// systemSettingsScaledOnRangeSize such that rangeSize/settingValue remains the
// same.
func (u *CommonTestUtils) setMaxRangeSizeAndDependentSettings(
ctx context.Context, t test.Test, rng *rand.Rand, dbs []string,
) error {
const defaultRangeMinBytes = 1024
const defaultRangeSize int64 = 512 << 20

rangeSize := maxRangeSizeBytes[rng.Intn(len(maxRangeSizeBytes))]
t.L().Printf("Set max range rangeSize to %s", humanizeutil.IBytes(rangeSize))

scale := func(current int64) int64 {
currentF := float64(current)
ratio := float64(rangeSize) / float64(defaultRangeSize)
return int64(currentF * ratio)
}
for _, dbName := range dbs {
query := fmt.Sprintf("ALTER DATABASE %s CONFIGURE ZONE USING range_max_bytes=%d, range_min_bytes=%d",
dbName, rangeSize, defaultRangeMinBytes)
if err := u.Exec(ctx, rng, query); err != nil {
return err
}
}

for _, setting := range systemSettingsScaledOnRangeSize {
var humanizedCurrentValue string
if err := u.QueryRow(ctx, rng, fmt.Sprintf("SHOW CLUSTER SETTING %s", setting)).Scan(&humanizedCurrentValue); err != nil {
return err
}
currentValue, err := humanizeutil.ParseBytes(humanizedCurrentValue)
if err != nil {
return err
}
newValue := scale(currentValue)
t.L().Printf("changing cluster setting %s from %s to %s", setting, humanizedCurrentValue, humanizeutil.IBytes(newValue))
stmt := fmt.Sprintf("SET CLUSTER SETTING %s = '%d'", setting, newValue)
if err := u.Exec(ctx, rng, stmt); err != nil {
return err
}
}
// Ensure ranges have been properly replicated.
_, dbConn := u.RandomDB(rng, u.roachNodes)
return WaitFor3XReplication(ctx, t, dbConn)
}

// setClusterSettings may set up to numCustomSettings cluster settings
// as defined in `systemSettingValues`. The system settings changed
// are logged. This function should be called *before* the upgrade
Expand Down Expand Up @@ -1573,6 +1629,7 @@ func (d *BackupRestoreTestDriver) computeTableContents(
return err
}
result[j] = contents
l.Printf("loaded contents for %s", table)
return nil
})
}
Expand Down Expand Up @@ -2131,6 +2188,7 @@ func (bc *backupCollection) verifyBackupCollection(
restoredContents, err := d.computeTableContents(
ctx, l, rng, restoredTables, bc.contents, "", /* timestamp */
)

if err != nil {
return fmt.Errorf("backup %s: error loading restored contents: %w", bc.name, err)
}
Expand Down