Skip to content

Commit

Permalink
roachtest: require perf. tests to opt in via TestSpec.Benchmark
Browse files Browse the repository at this point in the history
Previously, roachtests which benchmark performance (cf. correctness)
were indistinguishable from correctness tests. That is, a performance
test is like any other test with the exception of _optionally_ writing
stats.json under 'Test.PerfArtifactsDir'; these artifacts are
automatically exported to a gcs bucket, used in conjunction with the
roachperf dashboard.

Having no direct way to distinguish a performance test from a correctness
test has several challenges. E.g., performance tests may require a specific
machine type or architecture; background workloads like incremental backup
may cause a performance regression; new metamorphic configurations like
arm64 and fips may require a "bake-in" time before performance tests
can be enabled. In future, the test runner may make specialized
decisions (e.g., don't reuse a cluster) when executing a performance
test. Thus, we need a (standard) mechanism to enumerate all performance tests.
Given their specific requirements, the test author must explicitly opt in,
by setting TestSpec.Benchmark to 'true'.

This PR applies the above change retroactively, i.e., setting 'TestSpec.Benchmark'
for all _known_ performance tests, including those which _assert_ on performance
instead of exporting stats.json.
It also fixes `roachtest list --bench` and `roachtest bench`, which were
out-of-date, albeit not actively used.

Epic: none
Release note: None
  • Loading branch information
srosenberg committed Jun 1, 2023
1 parent e2b704d commit 0df3a03
Show file tree
Hide file tree
Showing 30 changed files with 198 additions and 156 deletions.
4 changes: 2 additions & 2 deletions pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func TestShouldPost(t *testing.T) {
{false, 1, "token", "master", true, ""},
}

reg := makeTestRegistry(spec.GCE, "", "", false)
reg := makeTestRegistry(spec.GCE, "", "", false, false)
for _, c := range testCases {
t.Setenv("GITHUB_API_TOKEN", c.envGithubAPIToken)
t.Setenv("TC_BUILD_BRANCH", c.envTcBuildBranch)
Expand Down Expand Up @@ -146,7 +146,7 @@ func TestCreatePostRequest(t *testing.T) {
{true, false, true, false, otherErr, false, false, nil},
}

reg := makeTestRegistry(spec.GCE, "", "", false)
reg := makeTestRegistry(spec.GCE, "", "", false, false)
for _, c := range testCases {
clusterSpec := reg.MakeClusterSpec(1)

Expand Down
28 changes: 11 additions & 17 deletions pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,10 @@ Examples:
roachtest list tag:owner-kv,weekly tag:aws
`,
RunE: func(_ *cobra.Command, args []string) error {
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
if !listBench {
tests.RegisterTests(&r)
} else {
tests.RegisterBenchmarks(&r)
}
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg, listBench)
tests.RegisterTests(&r)

matchedTests := r.List(context.Background(), args)
matchedTests := r.List(args)
for _, test := range matchedTests {
var skip string
if test.Skip != "" && !runSkipped {
Expand Down Expand Up @@ -258,7 +254,7 @@ runner itself.
clusterID: clusterID,
versionsBinaryOverride: versionsBinaryOverride,
enableFIPS: enableFIPS,
})
}, false /* benchOnly */)
},
}

Expand All @@ -282,7 +278,7 @@ runner itself.
if literalArtifacts == "" {
literalArtifacts = artifacts
}
return runTests(tests.RegisterBenchmarks, cliCfg{
return runTests(tests.RegisterTests, cliCfg{
args: args,
count: count,
cpuQuota: cpuQuota,
Expand All @@ -296,7 +292,7 @@ runner itself.
clusterID: clusterID,
versionsBinaryOverride: versionsBinaryOverride,
enableFIPS: enableFIPS,
})
}, true /* benchOnly */)
},
}

Expand Down Expand Up @@ -404,11 +400,11 @@ type cliCfg struct {
enableFIPS bool
}

func runTests(register func(registry.Registry), cfg cliCfg) error {
func runTests(register func(registry.Registry), cfg cliCfg, benchOnly bool) error {
if cfg.count <= 0 {
return fmt.Errorf("--count (%d) must by greater than 0", cfg.count)
}
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg, benchOnly)

// actual registering of tests
// TODO: don't register if we can't run on the specified registry cloud
Expand Down Expand Up @@ -448,7 +444,7 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {
return err
}

tests := testsToRun(context.Background(), r, filter)
tests := testsToRun(r, filter)
n := len(tests)
if n*cfg.count < cfg.parallelism {
// Don't spin up more workers than necessary. This has particular
Expand Down Expand Up @@ -594,10 +590,8 @@ func testRunnerLogger(
return l, teeOpt
}

func testsToRun(
ctx context.Context, r testRegistryImpl, filter *registry.TestFilter,
) []registry.TestSpec {
tests, tagMismatch := r.GetTests(ctx, filter)
func testsToRun(r testRegistryImpl, filter *registry.TestFilter) []registry.TestSpec {
tests, tagMismatch := r.GetTests(filter)

var notSkipped []registry.TestSpec
for _, s := range tests {
Expand Down
8 changes: 8 additions & 0 deletions pkg/cmd/roachtest/registry/test_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ type TestSpec struct {
// associated cluster expires. The timeout is always truncated to 10m before
// the test's cluster expires.
Timeout time.Duration
// Denotes whether the test is a roachperf benchmark. If true, the test is expected, but not required, to produce
// artifacts in Test.PerfArtifactsDir(), which get exported to the roachperf dashboard
// (see getPerfArtifacts() in test_runner.go).
// N.B. performance tests may choose to _assert_ on latency, throughput, or some other perf. metric, _without_
// exporting artifacts to the dashboard. E.g., see 'registerKVContention' and 'verifyTxnPerSecond'.
// N.B. performance tests may have different requirements than correctness tests, e.g., machine type/architecture.
// Thus, they must be opted into explicitly via this field.
Benchmark bool
// Tags is a set of tags associated with the test that allow grouping
// tests. If no tags are specified, the set ["default"] is automatically
// given.
Expand Down
16 changes: 11 additions & 5 deletions pkg/cmd/roachtest/test_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
package main

import (
"context"
"fmt"
"os"
"regexp"
Expand Down Expand Up @@ -43,11 +42,13 @@ type testRegistryImpl struct {
snapshotPrefixes map[string]struct{}

promRegistry *prometheus.Registry
// benchOnly is true iff the registry is being used to run benchmarks only.
benchOnly bool
}

// makeTestRegistry constructs a testRegistryImpl and configures it with opts.
func makeTestRegistry(
cloud string, instanceType string, zones string, preferSSD bool,
cloud string, instanceType string, zones string, preferSSD bool, benchOnly bool,
) testRegistryImpl {
return testRegistryImpl{
cloud: cloud,
Expand All @@ -57,6 +58,7 @@ func makeTestRegistry(
m: make(map[string]*registry.TestSpec),
snapshotPrefixes: make(map[string]struct{}),
promRegistry: prometheus.NewRegistry(),
benchOnly: benchOnly,
}
}

Expand All @@ -66,6 +68,10 @@ func (r *testRegistryImpl) Add(spec registry.TestSpec) {
fmt.Fprintf(os.Stderr, "test %s already registered\n", spec.Name)
os.Exit(1)
}
if r.benchOnly && !spec.Benchmark {
// Skip non-benchmarks.
return
}
if spec.SnapshotPrefix != "" {
for existingPrefix := range r.snapshotPrefixes {
if strings.HasPrefix(existingPrefix, spec.SnapshotPrefix) {
Expand Down Expand Up @@ -160,7 +166,7 @@ func (r *testRegistryImpl) PromFactory() promauto.Factory {
// Skipped tests are included, and tests that don't match their minVersion spec
// are also included but marked as skipped.
func (r testRegistryImpl) GetTests(
ctx context.Context, filter *registry.TestFilter,
filter *registry.TestFilter,
) ([]registry.TestSpec, []registry.TestSpec) {
var tests []registry.TestSpec
var tagMismatch []registry.TestSpec
Expand All @@ -183,9 +189,9 @@ func (r testRegistryImpl) GetTests(
}

// List lists tests that match one of the filters.
func (r testRegistryImpl) List(ctx context.Context, filters []string) []registry.TestSpec {
func (r testRegistryImpl) List(filters []string) []registry.TestSpec {
filter := registry.NewTestFilter(filters, true)
tests, _ := r.GetTests(ctx, filter)
tests, _ := r.GetTests(filter)
sort.Slice(tests, func(i, j int) bool { return tests[i].Name < tests[j].Name })
return tests
}
4 changes: 2 additions & 2 deletions pkg/cmd/roachtest/test_registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

func TestMakeTestRegistry(t *testing.T) {
testutils.RunTrueAndFalse(t, "preferSSD", func(t *testing.T, preferSSD bool) {
r := makeTestRegistry(spec.AWS, "foo", "zone123", preferSSD)
r := makeTestRegistry(spec.AWS, "foo", "zone123", preferSSD, false)
require.Equal(t, preferSSD, r.preferSSD)
require.Equal(t, "zone123", r.zones)
require.Equal(t, "foo", r.instanceType)
Expand All @@ -48,7 +48,7 @@ func TestMakeTestRegistry(t *testing.T) {
// TestPrometheusMetricParser tests that the registry.PromSub()
// helper properly converts a string into a metric name that Prometheus can read.
func TestPrometheusMetricParser(t *testing.T) {
r := makeTestRegistry(spec.AWS, "foo", "zone123", true)
r := makeTestRegistry(spec.AWS, "foo", "zone123", true, false)
f := r.PromFactory()

rawName := "restore/nodes=4/duration"
Expand Down
4 changes: 3 additions & 1 deletion pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,9 @@ func (r *testRunner) runWorker(
}
} else {
// Upon success fetch the perf artifacts from the remote hosts.
getPerfArtifacts(ctx, l, c, t)
if t.spec.Benchmark {
getPerfArtifacts(ctx, l, c, t)
}
if debugMode == DebugKeepAlways {
c.Save(ctx, "cluster saved since --debug-always set", l)
c = nil
Expand Down
8 changes: 4 additions & 4 deletions pkg/cmd/roachtest/test_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ const defaultParallelism = 10

func mkReg(t *testing.T) testRegistryImpl {
t.Helper()
return makeTestRegistry(spec.GCE, "", "", false /* preferSSD */)
return makeTestRegistry(spec.GCE, "", "", false /* preferSSD */, false /* benchOnly */)
}

func TestMatchOrSkip(t *testing.T) {
Expand Down Expand Up @@ -248,7 +248,7 @@ type runnerTest struct {
func setupRunnerTest(t *testing.T, r testRegistryImpl, testFilters []string) *runnerTest {
ctx := context.Background()

tests := testsToRun(ctx, r, registry.NewTestFilter(testFilters, false))
tests := testsToRun(r, registry.NewTestFilter(testFilters, false))
cr := newClusterRegistry()

stopper := stop.NewStopper()
Expand Down Expand Up @@ -405,7 +405,7 @@ func TestRegistryPrepareSpec(t *testing.T) {
}
for _, c := range testCases {
t.Run("", func(t *testing.T) {
r := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */)
r := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */, false /* benchOnly */)
err := r.prepareSpec(&c.spec)
if !testutils.IsError(err, c.expectedErr) {
t.Fatalf("expected %q, but found %q", c.expectedErr, err.Error())
Expand Down Expand Up @@ -439,7 +439,7 @@ func runExitCodeTest(t *testing.T, injectedError error) error {
}
},
})
tests := testsToRun(ctx, r, registry.NewTestFilter(nil, false))
tests := testsToRun(r, registry.NewTestFilter(nil, false))
lopt := loggingOpt{
l: nilLogger(),
tee: logger.NoTee,
Expand Down
5 changes: 3 additions & 2 deletions pkg/cmd/roachtest/tests/allocation_bench.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ func registerAllocationBench(r registry.Registry) {
func registerAllocationBenchSpec(r registry.Registry, allocSpec allocationBenchSpec) {
specOptions := []spec.Option{spec.CPU(allocSpec.cpus)}
r.Add(registry.TestSpec{
Name: fmt.Sprintf("allocbench/nodes=%d/cpu=%d/%s", allocSpec.nodes, allocSpec.cpus, allocSpec.load.desc),
Owner: registry.OwnerKV,
Name: fmt.Sprintf("allocbench/nodes=%d/cpu=%d/%s", allocSpec.nodes, allocSpec.cpus, allocSpec.load.desc),
Owner: registry.OwnerKV,
Benchmark: true,
Cluster: r.MakeClusterSpec(
allocSpec.nodes+1,
specOptions...,
Expand Down
31 changes: 17 additions & 14 deletions pkg/cmd/roachtest/tests/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,30 +148,33 @@ func registerAllocator(r registry.Registry) {
}

r.Add(registry.TestSpec{
Name: `replicate/up/1to3`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(4),
Leases: registry.MetamorphicLeases,
Name: `replicate/up/1to3`,
Owner: registry.OwnerKV,
Benchmark: true,
Cluster: r.MakeClusterSpec(4),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAllocator(ctx, t, c, 1, 10.0)
},
})
r.Add(registry.TestSpec{
Name: `replicate/rebalance/3to5`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(6),
Leases: registry.MetamorphicLeases,
Name: `replicate/rebalance/3to5`,
Owner: registry.OwnerKV,
Benchmark: true,
Cluster: r.MakeClusterSpec(6),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAllocator(ctx, t, c, 3, 42.0)
},
})
r.Add(registry.TestSpec{
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Timeout: 10 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Leases: registry.MetamorphicLeases,
Run: runWideReplication,
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 10 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Leases: registry.MetamorphicLeases,
Run: runWideReplication,
})
}

Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ func registerBackup(r registry.Registry) {
r.Add(registry.TestSpec{
Name: fmt.Sprintf("backup/2TB/%s", backup2TBSpec),
Owner: registry.OwnerDisasterRecovery,
Benchmark: true,
Cluster: backup2TBSpec,
EncryptionSupport: registry.EncryptionAlwaysDisabled,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/cdc.go
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,7 @@ func registerCDC(r registry.Registry) {
r.Add(registry.TestSpec{
Name: "cdc/initial-scan-only",
Owner: registry.OwnerCDC,
Benchmark: true,
Cluster: r.MakeClusterSpec(4, spec.CPU(16)),
RequiresLicense: true,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand Down
19 changes: 11 additions & 8 deletions pkg/cmd/roachtest/tests/connection_latency.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,9 @@ func registerConnectionLatencyTest(r registry.Registry) {
// Single region test.
numNodes := 3
r.Add(registry.TestSpec{
Name: fmt.Sprintf("connection_latency/nodes=%d/certs", numNodes),
Owner: registry.OwnerSQLFoundations,
Name: fmt.Sprintf("connection_latency/nodes=%d/certs", numNodes),
Owner: registry.OwnerSQLFoundations,
Benchmark: true,
// Add one more node for load node.
Cluster: r.MakeClusterSpec(numNodes+1, spec.Zones(regionUsCentral)),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand All @@ -134,18 +135,20 @@ func registerConnectionLatencyTest(r registry.Registry) {
loadNodes := numZones

r.Add(registry.TestSpec{
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/certs", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/certs", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Benchmark: true,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runConnectionLatencyTest(ctx, t, c, numMultiRegionNodes, numZones, false /*password*/)
},
})

r.Add(registry.TestSpec{
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/password", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/password", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Benchmark: true,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runConnectionLatencyTest(ctx, t, c, numMultiRegionNodes, numZones, true /*password*/)
},
Expand Down
Loading

0 comments on commit 0df3a03

Please sign in to comment.