Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtests: introduce admission-control/snapshot-overload #89191

Merged
merged 6 commits into from
Oct 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions pkg/cmd/roachprod/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,18 +235,14 @@ func initFlags() {
cachedHostsCmd.Flags().StringVar(&cachedHostsCluster,
"cluster", "", "print hosts matching cluster")

// TODO (msbutler): this flag should instead point to a relative file path that's check into
// the repo, not some random URL.
grafanaStartCmd.Flags().StringVar(&grafanaConfig,
"grafana-config", "", "URL to grafana json config")

grafanaURLCmd.Flags().BoolVar(&grafanaurlOpen,
"open", false, "open the grafana dashboard url on the browser")

grafanaStopCmd.Flags().StringVar(&grafanaDumpDir, "dump-dir", "",
"the absolute path, on the machine running roachprod, to dump prometheus data to.\n"+
"In the dump-dir, the 'prometheus-docker-run.sh' script spins up a prometheus UI accessible on \n"+
" 0.0.0.0:9090. If dump-dir is empty, no data will get dumped.")
grafanaDumpCmd.Flags().StringVar(&grafanaDumpDir, "dump-dir", "",
"the absolute path to dump prometheus data to (use the contained 'prometheus-docker-run.sh' to visualize")

for _, cmd := range []*cobra.Command{createCmd, destroyCmd, extendCmd, logsCmd} {
cmd.Flags().StringVarP(&username, "username", "u", os.Getenv("ROACHPROD_USER"),
Expand Down
22 changes: 16 additions & 6 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -900,10 +900,8 @@ var getProvidersCmd = &cobra.Command{

var grafanaStartCmd = &cobra.Command{
Use: `grafana-start <cluster>`,
Short: `spins up a prometheus and grafana instances on the last node in the cluster`,
Long: `spins up a prometheus and grafana instances on the highest numbered node in the cluster
and will scrape from all nodes in the cluster`,
Args: cobra.ExactArgs(1),
Short: `spins up a prometheus and grafana instance on the last node in the cluster`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
return roachprod.StartGrafana(context.Background(), roachprodLibraryLogger, args[0],
grafanaConfig, nil)
Expand All @@ -913,10 +911,21 @@ and will scrape from all nodes in the cluster`,
var grafanaStopCmd = &cobra.Command{
Use: `grafana-stop <cluster>`,
Short: `spins down prometheus and grafana instances on the last node in the cluster`,
Long: `spins down the prometheus and grafana instances on the last node in the cluster`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
return roachprod.StopGrafana(context.Background(), roachprodLibraryLogger, args[0], grafanaDumpDir)
return roachprod.StopGrafana(context.Background(), roachprodLibraryLogger, args[0], "")
}),
}

var grafanaDumpCmd = &cobra.Command{
Use: `grafana-dump <cluster>`,
Short: `dump prometheus data to the specified directory`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
if grafanaDumpDir == "" {
return errors.New("--dump-dir unspecified")
}
return roachprod.PrometheusSnapshot(context.Background(), roachprodLibraryLogger, args[0], grafanaDumpDir)
}),
}

Expand Down Expand Up @@ -990,6 +999,7 @@ func main() {
getProvidersCmd,
grafanaStartCmd,
grafanaStopCmd,
grafanaDumpCmd,
grafanaURLCmd,
)
setBashCompletionFunction()
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ func (t testWrapper) VersionsBinaryOverride() map[string]string {
panic("implement me")
}

func (t testWrapper) SkipInit() bool {
panic("implement me")
}

func (t testWrapper) Progress(f float64) {
panic("implement me")
}
Expand Down
11 changes: 10 additions & 1 deletion pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ func main() {
var literalArtifacts string
var httpPort int
var debugEnabled bool
var skipInit bool
var clusterID string
var count = 1
var versionsBinaryOverride map[string]string
Expand Down Expand Up @@ -221,6 +222,7 @@ runner itself.
count: count,
cpuQuota: cpuQuota,
debugEnabled: debugEnabled,
skipInit: skipInit,
httpPort: httpPort,
parallelism: parallelism,
artifactsDir: artifacts,
Expand Down Expand Up @@ -260,6 +262,7 @@ runner itself.
count: count,
cpuQuota: cpuQuota,
debugEnabled: debugEnabled,
skipInit: skipInit,
httpPort: httpPort,
parallelism: parallelism,
artifactsDir: artifacts,
Expand All @@ -284,6 +287,8 @@ runner itself.
&count, "count", 1, "the number of times to run each test")
cmd.Flags().BoolVarP(
&debugEnabled, "debug", "d", debugEnabled, "don't wipe and destroy cluster if test fails")
cmd.Flags().BoolVar(
&skipInit, "skip-init", false, "skip initialization step (imports, table creation, etc.) for tests that support it, useful when re-using clusters with --wipe=false")
cmd.Flags().IntVarP(
&parallelism, "parallelism", "p", parallelism, "number of tests to run in parallel")
cmd.Flags().StringVar(
Expand Down Expand Up @@ -351,6 +356,7 @@ type cliCfg struct {
count int
cpuQuota int
debugEnabled bool
skipInit bool
httpPort int
parallelism int
artifactsDir string
Expand Down Expand Up @@ -426,7 +432,10 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {
CtrlC(ctx, l, cancel, cr)
err = runner.Run(
ctx, tests, cfg.count, cfg.parallelism, opt,
testOpts{versionsBinaryOverride: cfg.versionsBinaryOverride},
testOpts{
versionsBinaryOverride: cfg.versionsBinaryOverride,
skipInit: cfg.skipInit,
},
lopt, nil /* clusterAllocator */)

// Make sure we attempt to clean up. We run with a non-canceled ctx; the
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Test interface {
// through all registered roachtests to change how they register the test.
Spec() interface{}
VersionsBinaryOverride() map[string]string
SkipInit() bool
Skip(args ...interface{})
Skipf(format string, args ...interface{})
Error(args ...interface{})
Expand Down
5 changes: 5 additions & 0 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type testImpl struct {
//
// Version strings look like "20.1.4".
versionsBinaryOverride map[string]string
skipInit bool
}

// BuildVersion exposes the build version of the cluster
Expand All @@ -129,6 +130,10 @@ func (t *testImpl) VersionsBinaryOverride() map[string]string {
return t.versionsBinaryOverride
}

func (t *testImpl) SkipInit() bool {
return t.skipInit
}

// Spec returns the TestSpec.
func (t *testImpl) Spec() interface{} {
return t.spec
Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func (c clustersOpt) validate() error {

type testOpts struct {
versionsBinaryOverride map[string]string
skipInit bool
}

// Run runs tests.
Expand Down Expand Up @@ -607,6 +608,7 @@ func (r *testRunner) runWorker(
artifactsSpec: artifactsSpec,
l: testL,
versionsBinaryOverride: topt.versionsBinaryOverride,
skipInit: topt.skipInit,
debug: debug,
}
// Now run the test.
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ go_library(
"activerecord_blocklist.go",
"admission_control.go",
"admission_control_multi_store_overload.go",
"admission_control_snapshot_overload.go",
"admission_control_tpcc_overload.go",
"allocator.go",
"alterpk.go",
Expand Down
17 changes: 17 additions & 0 deletions pkg/cmd/roachtest/tests/admission_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,25 @@ package tests
import "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"

func registerAdmission(r registry.Registry) {
// TODO(irfansharif): Can we write these tests using cgroups instead?
// Limiting CPU/bandwidth directly?

// TODO(irfansharif): Some of these tests hooks into prometheus/grafana.
// It'd be nice to use the grafana annotations API to explicitly annotate
// the points at which we do cluster-level things, like set zone configs to
// trigger a round of snapshots.

// TODO(irfansharif): Integrate with probabilistic tracing machinery,
// capturing outliers automatically for later analysis.

// TODO(irfansharif): Look into clusterstats and what that emits to
// roachperf. Need to munge with histogram data to compute % test run spent
// over some latency threshold. Will be Useful to track over time.

registerMultiStoreOverload(r)
registerSnapshotOverload(r)
registerTPCCOverload(r)

// TODO(irfansharif): Once registerMultiTenantFairness is unskipped and
// observed to be non-flaky for 3-ish months, transfer ownership to the AC
// group + re-home it here.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func registerMultiStoreOverload(r registry.Registry) {
}
// Defensive, since admission control is enabled by default. This test can
// fail if admission control is disabled.
SetAdmissionControl(ctx, t, c, true)
setAdmissionControl(ctx, t, c, true)
if _, err := db.ExecContext(ctx,
"SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'"); err != nil {
t.Fatalf("failed to disable load based splitting: %v", err)
Expand Down
Loading