Skip to content

Commit

Permalink
roachtest: create perf artifacts concept and collect them
Browse files Browse the repository at this point in the history
Before the roachtest rewrite in #30977 we used to collect the logs directory
from remote hosts unconditionally. We also used to write stats.json histogram
files to the logs directory. This combination enabled the collection of
histogram files for roachperf. Since the new roachtest does not unconditionally
copy logs to the test runner, we have not gotten any perf artifacts since the
change.

This PR introduces a new concept, perf artifacts, which live in a different
directory, the perfArtifactsDir. If a testSpec indicates that it
HasPerfArtifacts then that directory is copied to the test runner's
artifactsDir for that test from each node in the cluster upon success.

This change also necessitates a small change to roachperf to look in this new
"perf" directory.

Fixes #38871.

Release note: None
  • Loading branch information
ajwerner committed Jul 15, 2019
1 parent 02d52a6 commit 0492bb9
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 38 deletions.
5 changes: 3 additions & 2 deletions pkg/cmd/roachtest/indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ func registerNIndexes(r *testRegistry, secondaryIndexes int) {
Name: fmt.Sprintf("indexes/%d/nodes=%d/multi-region", secondaryIndexes, nodes),
Cluster: makeClusterSpec(nodes+1, cpu(16), geo(), zones(geoZonesStr)),
// Uses CONFIGURE ZONE USING ... COPY FROM PARENT syntax.
MinVersion: `v19.1.0`,
MinVersion: `v19.1.0`,
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
firstAZ := geoZones[0]
roachNodes := c.Range(1, nodes)
Expand Down Expand Up @@ -56,7 +57,7 @@ func registerNIndexes(r *testRegistry, secondaryIndexes int) {
payload := " --payload=256"
concurrency := ifLocal("", " --concurrency="+strconv.Itoa(nodes*32))
duration := " --duration=" + ifLocal("10s", "30m")
runCmd := fmt.Sprintf("./workload run indexes --histograms=logs/stats.json"+
runCmd := fmt.Sprintf("./workload run indexes --histograms="+perfArtifactsDir+"/stats.json"+
payload+concurrency+duration+" {pgurl%s}", gatewayNodes)
c.Run(ctx, loadNode, runCmd)
return nil
Expand Down
7 changes: 4 additions & 3 deletions pkg/cmd/roachtest/interleavedpartitioned.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func registerInterleaved(r *testRegistry) {
c.Run(ctx, cockroachEast.randNode(), cmdInit)

duration := " --duration " + ifLocal("10s", "10m")
histograms := " --histograms logs/stats.json"
histograms := " --histograms=" + perfArtifactsDir + "/stats.json"

createCmd := func(locality string, cockroachNodes nodeListOption) string {
return fmt.Sprintf(
Expand Down Expand Up @@ -123,8 +123,9 @@ func registerInterleaved(r *testRegistry) {
}

r.Add(testSpec{
Name: "interleavedpartitioned",
Cluster: makeClusterSpec(12, geo(), zones("us-west1-b,us-east4-b,us-central1-a")),
Name: "interleavedpartitioned",
Cluster: makeClusterSpec(12, geo(), zones("us-west1-b,us-east4-b,us-central1-a")),
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runInterleaved(ctx, t, c,
config{
Expand Down
15 changes: 8 additions & 7 deletions pkg/cmd/roachtest/kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func registerKV(r *testRegistry) {
splits := " --splits=1000"
duration := " --duration=" + ifLocal("10s", "10m")
readPercent := fmt.Sprintf(" --read-percent=%d", opts.readPercent)

histograms := "--histograms=" + perfArtifactsDir + "/stats.json"
var batchSize string
if opts.batchSize > 0 {
batchSize = fmt.Sprintf(" --batch=%d", opts.batchSize)
Expand All @@ -64,9 +64,9 @@ func registerKV(r *testRegistry) {
splits = "" // no splits
sequential = " --sequential"
}

cmd := fmt.Sprintf("./workload run kv --init --histograms=logs/stats.json"+
concurrency+splits+duration+readPercent+batchSize+blockSize+sequential+
t.ArtifactsDir()
cmd := fmt.Sprintf("./workload run kv --init"+
histograms+concurrency+splits+duration+readPercent+batchSize+blockSize+sequential+
" {pgurl:1-%d}", nodes)
c.Run(ctx, c.Node(nodes+1), cmd)
return nil
Expand Down Expand Up @@ -139,9 +139,10 @@ func registerKV(r *testRegistry) {
}

r.Add(testSpec{
Name: strings.Join(nameParts, "/"),
MinVersion: minVersion,
Cluster: makeClusterSpec(opts.nodes+1, cpu(opts.cpus)),
Name: strings.Join(nameParts, "/"),
MinVersion: minVersion,
Cluster: makeClusterSpec(opts.nodes+1, cpu(opts.cpus)),
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runKV(ctx, t, c, opts)
},
Expand Down
7 changes: 4 additions & 3 deletions pkg/cmd/roachtest/ledger.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ func registerLedger(r *testRegistry) {
const nodes = 6
const azs = "us-central1-a,us-central1-b,us-central1-c"
r.Add(testSpec{
Name: fmt.Sprintf("ledger/nodes=%d/multi-az", nodes),
Cluster: makeClusterSpec(nodes+1, cpu(16), geo(), zones(azs)),
Name: fmt.Sprintf("ledger/nodes=%d/multi-az", nodes),
Cluster: makeClusterSpec(nodes+1, cpu(16), geo(), zones(azs)),
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
roachNodes := c.Range(1, nodes)
gatewayNodes := c.Range(1, nodes/3)
Expand All @@ -36,7 +37,7 @@ func registerLedger(r *testRegistry) {
concurrency := ifLocal("", " --concurrency="+fmt.Sprint(nodes*32))
duration := " --duration=" + ifLocal("10s", "30m")

cmd := fmt.Sprintf("./workload run ledger --init --histograms=logs/stats.json"+
cmd := fmt.Sprintf("./workload run ledger --init --histograms="+perfArtifactsDir+"/stats.json"+
concurrency+duration+" {pgurl%s}", gatewayNodes)
c.Run(ctx, loadNode, cmd)
return nil
Expand Down
9 changes: 6 additions & 3 deletions pkg/cmd/roachtest/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ func runNetworkTPCC(ctx context.Context, t *test, origC *cluster, nodes int) {
}

cmd := fmt.Sprintf(
"./workload run tpcc --warehouses=%d --wait=false --histograms=logs/stats.json --duration=%s {pgurl:2-%d}",
"./workload run tpcc --warehouses=%d --wait=false"+
" --histograms="+perfArtifactsDir+"/stats.json"+
" --duration=%s {pgurl:2-%d}",
warehouses, duration, c.spec.NodeCount-1)
return c.RunL(ctx, tpccL, workerNode, cmd)
})
Expand Down Expand Up @@ -244,8 +246,9 @@ func registerNetwork(r *testRegistry) {
},
})
r.Add(testSpec{
Name: fmt.Sprintf("network/tpcc/nodes=%d", numNodes),
Cluster: makeClusterSpec(numNodes),
Name: fmt.Sprintf("network/tpcc/nodes=%d", numNodes),
Cluster: makeClusterSpec(numNodes),
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runNetworkTPCC(ctx, t, c, numNodes)
},
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/roachtest/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ func registerQueue(r *testRegistry) {
// One node runs the workload generator, all other nodes host CockroachDB.
const numNodes = 2
r.Add(testSpec{
Skip: "https://github.com/cockroachdb/cockroach/issues/17229",
Name: fmt.Sprintf("queue/nodes=%d", numNodes-1),
Cluster: makeClusterSpec(numNodes),
Skip: "https://github.com/cockroachdb/cockroach/issues/17229",
Name: fmt.Sprintf("queue/nodes=%d", numNodes-1),
Cluster: makeClusterSpec(numNodes),
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runQueue(ctx, t, c)
},
Expand All @@ -52,7 +53,7 @@ func runQueue(ctx context.Context, t *test, c *cluster) {
init = " --init"
}
cmd := fmt.Sprintf(
"./workload run queue --histograms=logs/stats.json"+
"./workload run queue --histograms="+perfArtifactsDir+"/stats.json"+
init+
concurrency+
duration+
Expand Down
13 changes: 13 additions & 0 deletions pkg/cmd/roachtest/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,23 @@ type testSpec struct {
// care about.
UseIOBarrier bool

// HasPerfArtifacts is true is the test has perf artifacts in its
// perfArtifactsDir on remote nodes which should be retreived when the test
// completes. Because tests control which node has the artifacts we pull this
// directory from all of the nodes if it is non-empty.
// It will not cause an error if the perf artifacts directory does not exist.
HasPerfArtifacts bool

// Run is the test function.
Run func(ctx context.Context, t *test, c *cluster)
}

// perfArtifactsDir is the directory on cluster nodes in which perf artifacts
// should be stored. If a test spec HasPerfArtifacts and passes, then the runner
// will retreive that directory from all nodes and copy it to the test artifacts
// directory.
const perfArtifactsDir = "perf"

// matchOrSkip returns true if the filter matches the test. If the filter does
// not match the test because the tag filter does not match, the test is
// matched, but marked as skipped.
Expand Down
13 changes: 13 additions & 0 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,19 @@ func (r *testRunner) runWorker(
if err != nil {
return err
}
} else if t.spec.HasPerfArtifacts {
// Fetch the perf artifacts from the remote hosts.
// If there's an error, oh well, don't do anything rash like fail the test
// which already passed.
//
// TODO(ajwerner): this Get on all nodes has the unfortunate side effect
// of logging an error in the test runner log for all of the nodes which
// do not have perf artifacts. Ideally we'd have the test tell us which
// nodes have the artifacts, or we'd go check explicitly, or we'd find a
// way for Get to not complain if a file does not exist.
if err := c.Get(ctx, l, perfArtifactsDir, t.artifactsDir+"/"+perfArtifactsDir); err != nil {
l.PrintfCtx(ctx, "failed to get perf artifacts: %v", err)
}
}
}
}
Expand Down
20 changes: 11 additions & 9 deletions pkg/cmd/roachtest/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func runTPCC(ctx context.Context, t *test, c *cluster, opts tpccOptions) {
m.Go(func(ctx context.Context) error {
t.WorkerStatus("running tpcc")
cmd := fmt.Sprintf(
"./workload run tpcc --warehouses=%d --histograms=logs/stats.json "+
"./workload run tpcc --warehouses=%d --histograms="+perfArtifactsDir+"/stats.json "+
opts.Extra+" --ramp=%s --duration=%s {pgurl:1-%d}",
opts.Warehouses, rampDuration, opts.Duration, c.spec.NodeCount-1)
c.Run(ctx, workloadNode, cmd)
Expand Down Expand Up @@ -238,9 +238,10 @@ func registerTPCC(r *testRegistry) {
Name: "tpcc/headroom/" + headroomSpec.String(),
// TODO(dan): Backfill tpccSupportedWarehouses and remove this "v2.1.0"
// minimum on gce.
MinVersion: maxVersion("v2.1.0", maybeMinVersionForFixturesImport(cloud)),
Tags: []string{`default`, `release_qualification`},
Cluster: headroomSpec,
MinVersion: maxVersion("v2.1.0", maybeMinVersionForFixturesImport(cloud)),
Tags: []string{`default`, `release_qualification`},
Cluster: headroomSpec,
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
maxWarehouses := maxSupportedTPCCWarehouses(r.buildVersion, cloud, t.spec.Cluster)
headroomWarehouses := int(float64(maxWarehouses) * 0.7)
Expand Down Expand Up @@ -551,10 +552,11 @@ func registerTPCCBenchSpec(r *testRegistry, b tpccBenchSpec) {
nodes := makeClusterSpec(numNodes, opts...)

r.Add(testSpec{
Name: name,
Cluster: nodes,
MinVersion: maybeMinVersionForFixturesImport(cloud),
Tags: b.Tags,
Name: name,
Cluster: nodes,
MinVersion: maybeMinVersionForFixturesImport(cloud),
Tags: b.Tags,
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runTPCCBench(ctx, t, c, b)
},
Expand Down Expand Up @@ -781,7 +783,7 @@ func runTPCCBench(ctx context.Context, t *test, c *cluster, b tpccBenchSpec) {
}

t.Status(fmt.Sprintf("running benchmark, warehouses=%d", warehouses))
histogramsPath := fmt.Sprintf("logs/warehouses=%d/stats.json", activeWarehouses)
histogramsPath := fmt.Sprintf("%s/warehouses=%d/stats.json", perfArtifactsDir, activeWarehouses)
cmd := fmt.Sprintf("./workload run tpcc --warehouses=%d --active-warehouses=%d "+
"--tolerate-errors --ramp=%s --duration=%s%s {pgurl%s} "+
"--histograms=%s",
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/roachtest/tpchbench.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func runTPCHBench(ctx context.Context, t *test, c *cluster, b tpchBenchSpec) {
cmd := fmt.Sprintf(
"./workload run querybench --db=tpch --concurrency=1 --query-file=%s "+
"--num-runs=%d --max-ops=%d --vectorized=%t {pgurl%s} "+
"--histograms=logs/stats.json --histograms-max-latency=%s",
"--histograms="+perfArtifactsDir+"/stats.json --histograms-max-latency=%s",
filename,
b.numRunsPerQuery,
maxOps,
Expand Down Expand Up @@ -237,9 +237,10 @@ func registerTPCHBenchSpec(r *testRegistry, b tpchBenchSpec) {
}

r.Add(testSpec{
Name: strings.Join(nameParts, "/"),
Cluster: makeClusterSpec(numNodes),
MinVersion: minVersion,
Name: strings.Join(nameParts, "/"),
Cluster: makeClusterSpec(numNodes),
MinVersion: minVersion,
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runTPCHBench(ctx, t, c, b)
},
Expand Down
7 changes: 4 additions & 3 deletions pkg/cmd/roachtest/ycsb.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func registerYCSB(r *testRegistry) {
duration := " --duration=" + ifLocal("10s", "10m")
cmd := fmt.Sprintf(
"./workload run ycsb --init --record-count=1000000 --splits=100"+
" --workload=%s --concurrency=64 --histograms=logs/stats.json"+
" --workload=%s --concurrency=64 --histograms="+perfArtifactsDir+"/stats.json"+
ramp+duration+" {pgurl:1-%d}",
wl, nodes)
c.Run(ctx, c.Node(nodes+1), cmd)
Expand All @@ -49,8 +49,9 @@ func registerYCSB(r *testRegistry) {
}
wl, cpus := wl, cpus
r.Add(testSpec{
Name: name,
Cluster: makeClusterSpec(4, cpu(cpus)),
Name: name,
Cluster: makeClusterSpec(4, cpu(cpus)),
HasPerfArtifacts: true,
Run: func(ctx context.Context, t *test, c *cluster) {
runYCSB(ctx, t, c, wl, cpus)
},
Expand Down

0 comments on commit 0492bb9

Please sign in to comment.