Skip to content

Commit

Permalink
Merge #109221
Browse files Browse the repository at this point in the history
109221: roachtest: provision 250 MB/s for restore tests on AWS r=pavelkalinnikov a=pavelkalinnikov

The `restore/tpce/*` family of tests on AWS max out the default 125 MB/s EBS throughput. In contrast, similar tests in GCE provision for more throughput and [don't max it out](#107609 (comment)).

This commit bumps the provisioned throughput from 125 MB/s to 250 MB/s in all `restore` tests on AWS, so that the tests don't work at the edge of overload.

This both brings some parity between testing on GCE and AWS, and reduces likelihood of raft OOMs (which manifest more often when disk is overloaded).

Fixes #107609
Touches #106248
Epic: none
Release note: none

Co-authored-by: Pavel Kalinnikov <[email protected]>
  • Loading branch information
craig[bot] and pav-kv committed Aug 22, 2023
2 parents 3217767 + 0c4a65b commit ab15d81
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions pkg/cmd/roachtest/tests/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func registerRestore(r registry.Registry) {
PrometheusNameSpace, Subsystem: "restore", Name: "duration"}, []string{"test_name"})

withPauseSpecs := restoreSpecs{
hardware: makeHardwareSpecs(hardwareSpecs{}),
hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(
backupSpecs{workload: tpceRestore{customers: 1000},
version: "v22.2.1"}),
Expand Down Expand Up @@ -265,7 +265,7 @@ func registerRestore(r registry.Registry) {

for _, sp := range []restoreSpecs{
{
hardware: makeHardwareSpecs(hardwareSpecs{}),
hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(backupSpecs{}),
timeout: 1 * time.Hour,
tags: registry.Tags("aws"),
Expand All @@ -287,7 +287,7 @@ func registerRestore(r registry.Registry) {
{
// Benchmarks if per node throughput remains constant if the number of
// nodes doubles relative to default.
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 8}),
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 8, ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(backupSpecs{}),
timeout: 1 * time.Hour,
tags: registry.Tags("aws"),
Expand All @@ -296,7 +296,7 @@ func registerRestore(r registry.Registry) {
// Benchmarks if per node throughput remains constant if the cluster
// is multi-region.
hardware: makeHardwareSpecs(hardwareSpecs{
nodes: 9,
nodes: 9, ebsThroughput: 250, /* MB/s */
zones: []string{"us-east-2b", "us-west-2b", "eu-west-1b"}}), // These zones are AWS-specific.
backup: makeRestoringBackupSpecs(backupSpecs{cloud: spec.AWS}),
timeout: 90 * time.Minute,
Expand All @@ -305,15 +305,15 @@ func registerRestore(r registry.Registry) {
{
// Benchmarks if per node throughput doubles if the vcpu count doubles
// relative to default.
hardware: makeHardwareSpecs(hardwareSpecs{cpus: 16}),
hardware: makeHardwareSpecs(hardwareSpecs{cpus: 16, ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(backupSpecs{}),
timeout: 1 * time.Hour,
tags: registry.Tags("aws"),
},
{
// Ensures we can restore a 48 length incremental chain.
// Also benchmarks per node throughput for a long chain.
hardware: makeHardwareSpecs(hardwareSpecs{}),
hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(backupSpecs{backupsIncluded: 48}),
timeout: 1 * time.Hour,
tags: registry.Tags("aws"),
Expand All @@ -332,7 +332,8 @@ func registerRestore(r registry.Registry) {
},
{
// The weekly 32TB Restore test.
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000}),
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000,
ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(backupSpecs{
version: "v22.2.1",
workload: tpceRestore{customers: 2000000}}),
Expand All @@ -348,7 +349,8 @@ func registerRestore(r registry.Registry) {
// spans. Together with having a 400 incremental chain, this
// regression tests against the OOMs that we've seen in previous
// versions.
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000}),
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000,
ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(backupSpecs{
version: "v22.2.4",
workload: tpceRestore{customers: 2000000},
Expand Down Expand Up @@ -377,7 +379,7 @@ func registerRestore(r registry.Registry) {
},
{
// A teeny weeny 15GB restore that could be used to bisect scale agnostic perf regressions.
hardware: makeHardwareSpecs(hardwareSpecs{}),
hardware: makeHardwareSpecs(hardwareSpecs{ebsThroughput: 250 /* MB/s */}),
backup: makeRestoringBackupSpecs(
backupSpecs{workload: tpceRestore{customers: 1000},
version: "v22.2.1"}),
Expand Down

0 comments on commit ab15d81

Please sign in to comment.