Merge #109221

109221: roachtest: provision 250 MB/s for restore tests on AWS r=pavelkalinnikov a=pavelkalinnikov The `restore/tpce/*` family of tests on AWS max out the default 125 MB/s EBS throughput. In contrast, similar tests in GCE provision for more throughput and [don't max it out](#107609 (comment)). This commit bumps the provisioned throughput from 125 MB/s to 250 MB/s in all `restore` tests on AWS, so that the tests don't work at the edge of overload. This both brings some parity between testing on GCE and AWS, and reduces likelihood of raft OOMs (which manifest more often when disk is overloaded). Fixes #107609 Touches #106248 Epic: none Release note: none Co-authored-by: Pavel Kalinnikov <[email protected]>
cockroachdb · Aug 22, 2023 · ab15d81 · ab15d81
2 parents 3217767 + 0c4a65b
commit ab15d81
Showing 1 changed file with 11 additions and 9 deletions.
diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go
@@ -108,7 +108,7 @@ func registerRestore(r registry.Registry) {
 		PrometheusNameSpace, Subsystem: "restore", Name: "duration"}, []string{"test_name"})
 
 	withPauseSpecs := restoreSpecs{
-		hardware: makeHardwareSpecs(hardwareSpecs{}),
+		hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
 		backup: makeRestoringBackupSpecs(
 			backupSpecs{workload: tpceRestore{customers: 1000},
 				version: "v22.2.1"}),
@@ -265,7 +265,7 @@ func registerRestore(r registry.Registry) {
 
 	for _, sp := range []restoreSpecs{
 		{
-			hardware: makeHardwareSpecs(hardwareSpecs{}),
+			hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
 			backup:   makeRestoringBackupSpecs(backupSpecs{}),
 			timeout:  1 * time.Hour,
 			tags:     registry.Tags("aws"),
@@ -287,7 +287,7 @@ func registerRestore(r registry.Registry) {
 		{
 			// Benchmarks if per node throughput remains constant if the number of
 			// nodes doubles relative to default.
-			hardware: makeHardwareSpecs(hardwareSpecs{nodes: 8}),
+			hardware: makeHardwareSpecs(hardwareSpecs{nodes: 8, ebsThroughput: 250 /* MB/s */}),
 			backup:   makeRestoringBackupSpecs(backupSpecs{}),
 			timeout:  1 * time.Hour,
 			tags:     registry.Tags("aws"),
@@ -296,7 +296,7 @@ func registerRestore(r registry.Registry) {
 			// Benchmarks if per node throughput remains constant if the cluster
 			// is multi-region.
 			hardware: makeHardwareSpecs(hardwareSpecs{
-				nodes: 9,
+				nodes: 9, ebsThroughput: 250, /* MB/s */
 				zones: []string{"us-east-2b", "us-west-2b", "eu-west-1b"}}), // These zones are AWS-specific.
 			backup:  makeRestoringBackupSpecs(backupSpecs{cloud: spec.AWS}),
 			timeout: 90 * time.Minute,
@@ -305,15 +305,15 @@ func registerRestore(r registry.Registry) {
 		{
 			// Benchmarks if per node throughput doubles if the vcpu count doubles
 			// relative to default.
-			hardware: makeHardwareSpecs(hardwareSpecs{cpus: 16}),
+			hardware: makeHardwareSpecs(hardwareSpecs{cpus: 16, ebsThroughput: 250 /* MB/s */}),
 			backup:   makeRestoringBackupSpecs(backupSpecs{}),
 			timeout:  1 * time.Hour,
 			tags:     registry.Tags("aws"),
 		},
 		{
 			// Ensures we can restore a 48 length incremental chain.
 			// Also benchmarks per node throughput for a long chain.
-			hardware: makeHardwareSpecs(hardwareSpecs{}),
+			hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
 			backup:   makeRestoringBackupSpecs(backupSpecs{backupsIncluded: 48}),
 			timeout:  1 * time.Hour,
 			tags:     registry.Tags("aws"),
@@ -332,7 +332,8 @@ func registerRestore(r registry.Registry) {
 		},
 		{
 			// The weekly 32TB Restore test.
-			hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000}),
+			hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000,
+				ebsThroughput: 250 /* MB/s */}),
 			backup: makeRestoringBackupSpecs(backupSpecs{
 				version:  "v22.2.1",
 				workload: tpceRestore{customers: 2000000}}),
@@ -348,7 +349,8 @@ func registerRestore(r registry.Registry) {
 			// spans. Together with having a 400 incremental chain, this
 			// regression tests against the OOMs that we've seen in previous
 			// versions.
-			hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000}),
+			hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000,
+				ebsThroughput: 250 /* MB/s */}),
 			backup: makeRestoringBackupSpecs(backupSpecs{
 				version:          "v22.2.4",
 				workload:         tpceRestore{customers: 2000000},
@@ -377,7 +379,7 @@ func registerRestore(r registry.Registry) {
 		},
 		{
 			// A teeny weeny 15GB restore that could be used to bisect scale agnostic perf regressions.
-			hardware: makeHardwareSpecs(hardwareSpecs{}),
+			hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
 			backup: makeRestoringBackupSpecs(
 				backupSpecs{workload: tpceRestore{customers: 1000},
 					version: "v22.2.1"}),