Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
108916: tests: roachtest that creates overload when doing intent resolution r=irfansharif a=sumeerbhola

Informs cockroachdb#97108

Epic: CRDB-25458

Release note: None


Co-authored-by: sumeerbhola <[email protected]>
  • Loading branch information
craig[bot] and sumeerbhola committed Aug 18, 2023
2 parents 669b36d + 8f401f6 commit f3c92a0
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ go_library(
"admission_control_elastic_cdc.go",
"admission_control_index_backfill.go",
"admission_control_index_overload.go",
"admission_control_intent_resolution.go",
"admission_control_multi_store_overload.go",
"admission_control_multitenant_fairness.go",
"admission_control_snapshot_overload.go",
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/admission_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ func registerAdmission(r registry.Registry) {
registerIndexOverload(r)
registerIndexBackfill(r)
registerDatabaseDrop(r)
registerIntentResolutionOverload(r)
}
103 changes: 103 additions & 0 deletions pkg/cmd/roachtest/tests/admission_control_intent_resolution.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tests

import (
"context"
gosql "database/sql"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/grafana"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/roachprod/prometheus"
"github.com/stretchr/testify/require"
)

// This test sets up a 1 node CRDB cluster on an 8vCPU machine, runs a txn
// that creates a huge number of intents, and then commits to resolve those
// intents. When intent resolution is not subject to admission control, the
// LSM gets overloaded and has > 50 sub-levels.
func registerIntentResolutionOverload(r registry.Registry) {
r.Add(registry.TestSpec{
Name: "admission-control/intent-resolution",
Owner: registry.OwnerAdmissionControl,
Timeout: time.Hour,
Benchmark: true,
// TODO(sumeer): Reduce to weekly after working well.
// Tags: registry.Tags(`weekly`),
// Second node is solely for Prometheus.
Cluster: r.MakeClusterSpec(2, spec.CPU(8)),
RequiresLicense: true,
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.Spec().NodeCount != 2 {
t.Fatalf("expected 2 nodes, found %d", c.Spec().NodeCount)
}
crdbNodes := c.Spec().NodeCount - 1
promNode := crdbNodes + 1

promCfg := &prometheus.Config{}
promCfg.WithPrometheusNode(c.Node(promNode).InstallNodes()[0]).
WithNodeExporter(c.Range(1, c.Spec().NodeCount-1).InstallNodes()).
WithCluster(c.Range(1, c.Spec().NodeCount-1).InstallNodes()).
WithGrafanaDashboardJSON(grafana.ChangefeedAdmissionControlGrafana)
err := c.StartGrafana(ctx, t.L(), promCfg)
require.NoError(t, err)

c.Put(ctx, t.Cockroach(), "./cockroach", c.Range(1, crdbNodes))
startOpts := option.DefaultStartOptsNoBackups()
startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs,
"--vmodule=io_load_listener=2")
settings := install.MakeClusterSettings()
c.Start(ctx, t.L(), startOpts, settings, c.Range(1, crdbNodes))
setAdmissionControl(ctx, t, c, true)
t.Status("running txn")
m := c.NewMonitor(ctx, c.Range(1, crdbNodes))
m.Go(func(ctx context.Context) error {
db := c.Conn(ctx, t.L(), crdbNodes)
defer db.Close()
_, err := db.Exec(`CREATE TABLE test_table(id integer PRIMARY KEY, t TEXT)`)
if err != nil {
return err
}
tx, err := db.BeginTx(ctx, &gosql.TxOptions{})
if err != nil {
return err
}
query := `INSERT INTO test_table(id, t) SELECT i, sha512(random()::text) FROM ` +
`generate_series(0, 75000000) AS t(i);`
_, err = tx.ExecContext(ctx, query)
if err != nil {
return err
}
t.Status("intents created, committing txn")
err = tx.Commit()
if err != nil {
return err
}
t.Status("sleeping for async intent resolution to complete")
// Intents take ~10min to resolve, and we're padding by another 10min.
time.Sleep(20 * time.Minute)
t.Status("done sleeping")
// TODO(sumeer): use prometheus client and StatCollector to ensure
// that max(storage_l0_sublevels) is below the threshold of 20. Also
// confirm that the intentcount metric has a very low value.
return nil
})
m.Wait()
},
})
}

0 comments on commit f3c92a0

Please sign in to comment.