From e8bd19754af366ce2d98cb908b89c97b1a935a3e Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Fri, 13 Oct 2023 16:15:05 -0400 Subject: [PATCH] roachtest: add backup-restore/small-ranges This patch adds a new backup-restore roachtest variant that reduces default range size for user databases in the backup-restore/round-trip roachtest to simulate a larger cluster, in terms of range count, at smaller data sizes. In addition, the roachtest scales down a few cluster settings such that the ratio of rangeSize/clusterSetting remains constant. This patch should allow us to recreate a roachtest workload that can simulate the conditions that lead to #109483 but at smaller data sizes. Informs #109483 Release note: None --- .../tests/backup_restore_roundtrip.go | 83 ++++++++++++++----- .../roachtest/tests/mixed_version_backup.go | 58 +++++++++++++ 2 files changed, 120 insertions(+), 21 deletions(-) diff --git a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go index c97ed1f967f1..4644f048c43e 100644 --- a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go +++ b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go @@ -30,29 +30,66 @@ import ( "github.com/cockroachdb/errors" ) +var ( + // maxRangeSizeBytes defines the possible non default (default is 512 MiB) maximum range + // sizes that may get set for all user databases. + maxRangeSizeBytes = []int64{4 << 20 /* 4 MiB*/, 32 << 20 /* 32 MiB */, 128 << 20} + + // SystemSettingsValuesBoundOnRangeSize defines the cluster settings that + // should scale in proportion to the range size. For example, if the range + // size is halved, all the values of these cluster settings should also be + // halved. + systemSettingsScaledOnRangeSize = []string{ + "backup.restore_span.target_size", + "bulkio.backup.file_size", + "kv.bulk_sst.target_size", + } +) + const numFullBackups = 5 +type roundTripSpecs struct { + name string + metamorphicRangeSize bool +} + func registerBackupRestoreRoundTrip(r registry.Registry) { - // backup-restore/round-trip tests that a round trip of creating a backup and - // restoring the created backup create the same objects. - r.Add(registry.TestSpec{ - Name: "backup-restore/round-trip", - Timeout: 8 * time.Hour, - Owner: registry.OwnerDisasterRecovery, - Cluster: r.MakeClusterSpec(4), - EncryptionSupport: registry.EncryptionMetamorphic, - RequiresLicense: true, - CompatibleClouds: registry.AllExceptAWS, - Suites: registry.Suites(registry.Nightly), - Run: backupRestoreRoundTrip, - }) + + for _, sp := range []roundTripSpecs{ + { + name: "backup-restore/round-trip", + metamorphicRangeSize: false, + }, + { + name: "backup-restore/small-ranges", + metamorphicRangeSize: true, + }, + } { + sp := sp + r.Add(registry.TestSpec{ + Name: sp.name, + Timeout: 4 * time.Hour, + Owner: registry.OwnerDisasterRecovery, + Cluster: r.MakeClusterSpec(4), + EncryptionSupport: registry.EncryptionMetamorphic, + RequiresLicense: true, + CompatibleClouds: registry.AllExceptAWS, + Suites: registry.Suites(registry.Nightly), + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + backupRestoreRoundTrip(ctx, t, c, sp.metamorphicRangeSize) + }, + }) + } } -func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) { +// backup-restore/round-trip tests that a round trip of creating a backup and +// restoring the created backup create the same objects. +func backupRestoreRoundTrip( + ctx context.Context, t test.Test, c cluster.Cluster, metamorphicRangeSize bool, +) { if c.Spec().Cloud != spec.GCE { t.Skip("uses gs://cockroachdb-backup-testing; see https://github.com/cockroachdb/cockroach/issues/105968") } - pauseProbability := 0.2 roachNodes := c.Range(1, c.Spec().NodeCount-1) workloadNode := c.Node(c.Spec().NodeCount) @@ -62,7 +99,11 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) // Upload binaries and start cluster. uploadVersion(ctx, t, c, c.All(), clusterupgrade.MainVersion) - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true)), roachNodes) + envOption := install.EnvOption([]string{ + "COCKROACH_MIN_RANGE_MAX_BYTES=1", + }) + + c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes) m := c.NewMonitor(ctx, roachNodes) m.Go(func(ctx context.Context) error { @@ -77,24 +118,25 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) if err != nil { return err } - tables, err := testUtils.loadTablesForDBs(ctx, t.L(), testRNG, dbs...) if err != nil { return err } - d, err := newBackupRestoreTestDriver(ctx, t, c, testUtils, roachNodes, dbs, tables) if err != nil { return err } - if err := testUtils.setShortJobIntervals(ctx, testRNG); err != nil { return err } if err := testUtils.setClusterSettings(ctx, t.L(), testRNG); err != nil { return err } - + if metamorphicRangeSize { + if err := testUtils.setMaxRangeSizeAndDependentSettings(ctx, t, testRNG, dbs); err != nil { + return err + } + } stopBackgroundCommands, err := runBackgroundWorkload() if err != nil { return err @@ -146,7 +188,6 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) } } } - stopBackgroundCommands() return nil }) diff --git a/pkg/cmd/roachtest/tests/mixed_version_backup.go b/pkg/cmd/roachtest/tests/mixed_version_backup.go index 00f4ed6cc52e..a6dde7950b03 100644 --- a/pkg/cmd/roachtest/tests/mixed_version_backup.go +++ b/pkg/cmd/roachtest/tests/mixed_version_backup.go @@ -41,6 +41,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" + "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/retry" @@ -143,6 +144,14 @@ var ( "kv.bulk_io_write.max_rate": {"250MiB", "500MiB", "2TiB"}, "kv.bulk_sst.max_allowed_overage": {"16MiB", "256MiB"}, "kv.bulk_sst.target_size": {"4MiB", "64MiB", "128MiB"}, + // The default is currently 384 MB, which was set to be about 75% of a + // range's worth of data. This configuration will reduce the size of this + // setting to test restore_span_covering correctness, at the cost of a + // performance dip. + // + // Note that a size of 0 indicates that target_size will not be used while + // constructing restore span entries. + "backup.restore_span.target_size": {"0 B", "4 MiB", "32 MiB", "128 MiB"}, } systemSettingNames = func() []string { @@ -1285,6 +1294,53 @@ func (u *CommonTestUtils) loadTablesForDBs( return allTables, nil } +// setMaxRangeSizeAndDependentSettings chooses a random default range size from +// maxRangeSize bytes and scales the cluster settings in +// systemSettingsScaledOnRangeSize such that rangeSize/settingValue remains the +// same. +func (u *CommonTestUtils) setMaxRangeSizeAndDependentSettings( + ctx context.Context, t test.Test, rng *rand.Rand, dbs []string, +) error { + const defaultRangeMinBytes = 1024 + const defaultRangeSize int64 = 512 << 20 + + rangeSize := maxRangeSizeBytes[rng.Intn(len(maxRangeSizeBytes))] + t.L().Printf("Set max range rangeSize to %s", humanizeutil.IBytes(rangeSize)) + + scale := func(current int64) int64 { + currentF := float64(current) + ratio := float64(rangeSize) / float64(defaultRangeSize) + return int64(currentF * ratio) + } + for _, dbName := range dbs { + query := fmt.Sprintf("ALTER DATABASE %s CONFIGURE ZONE USING range_max_bytes=%d, range_min_bytes=%d", + dbName, rangeSize, defaultRangeMinBytes) + if err := u.Exec(ctx, rng, query); err != nil { + return err + } + } + + for _, setting := range systemSettingsScaledOnRangeSize { + var humanizedCurrentValue string + if err := u.QueryRow(ctx, rng, fmt.Sprintf("SHOW CLUSTER SETTING %s", setting)).Scan(&humanizedCurrentValue); err != nil { + return err + } + currentValue, err := humanizeutil.ParseBytes(humanizedCurrentValue) + if err != nil { + return err + } + newValue := scale(currentValue) + t.L().Printf("changing cluster setting %s from %s to %s", setting, humanizedCurrentValue, humanizeutil.IBytes(newValue)) + stmt := fmt.Sprintf("SET CLUSTER SETTING %s = '%d'", setting, newValue) + if err := u.Exec(ctx, rng, stmt); err != nil { + return err + } + } + // Ensure ranges have been properly replicated. + _, dbConn := u.RandomDB(rng, u.roachNodes) + return WaitFor3XReplication(ctx, t, dbConn) +} + // setClusterSettings may set up to numCustomSettings cluster settings // as defined in `systemSettingValues`. The system settings changed // are logged. This function should be called *before* the upgrade @@ -1573,6 +1629,7 @@ func (d *BackupRestoreTestDriver) computeTableContents( return err } result[j] = contents + l.Printf("loaded contents for %s", table) return nil }) } @@ -2131,6 +2188,7 @@ func (bc *backupCollection) verifyBackupCollection( restoredContents, err := d.computeTableContents( ctx, l, rng, restoredTables, bc.contents, "", /* timestamp */ ) + if err != nil { return fmt.Errorf("backup %s: error loading restored contents: %w", bc.name, err) }