Skip to content

Commit

Permalink
storage: include version in benchmark fixture directory
Browse files Browse the repository at this point in the history
Some storage-related benchmarks leave fixtures around in `.gitignore`d
directories.

There is potential for fixtures from old versions to cause failures.
We are seeing some CI failures that would be explained by this (we're
not sure yet if it's possible).

In any case, it is better to include the version in the fixture name
to avid this (which can be a problem even locally).

The logging around the fixture location is also improved.

Informs cockroachdb#97061

Release note: None
Epic: none
  • Loading branch information
RaduBerinde committed Feb 14, 2023
1 parent 3b9d8cb commit 204ad97
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 19 deletions.
4 changes: 4 additions & 0 deletions pkg/ccl/storageccl/engineccl/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Do not add environment-specific entries here (see the top-level .gitignore
# for reasoning and alternatives).

# Old benchmark data.
mvcc_data
# New benchmark data.
mvcc_data_*
1 change: 1 addition & 0 deletions pkg/ccl/storageccl/engineccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ go_test(
"//pkg/base",
"//pkg/ccl/baseccl",
"//pkg/ccl/storageccl/engineccl/enginepbccl",
"//pkg/clusterversion",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/settings/cluster",
Expand Down
14 changes: 12 additions & 2 deletions pkg/ccl/storageccl/engineccl/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"path/filepath"
"testing"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
Expand Down Expand Up @@ -43,10 +44,13 @@ import (
// The creation of the database is time consuming, so the caller can choose
// whether to use a temporary or permanent location.
func loadTestData(
dir string, numKeys, numBatches, batchTimeSpan, valueBytes int,
dirPrefix string, numKeys, numBatches, batchTimeSpan, valueBytes int,
) (storage.Engine, error) {
ctx := context.Background()

verStr := fmt.Sprintf("v%s", clusterversion.TestingBinaryVersion.String())
dir := fmt.Sprintf("%s_v%s_%d_%d_%d_%d", dirPrefix, verStr, numKeys, numBatches, batchTimeSpan, valueBytes)

exists := true
if _, err := os.Stat(dir); oserror.IsNotExist(err) {
exists = false
Expand All @@ -60,12 +64,18 @@ func loadTestData(
return nil, err
}

absPath, err := filepath.Abs(dir)
if err != nil {
absPath = dir
}

if exists {
log.Infof(context.Background(), "using existing test data: %s", absPath)
testutils.ReadAllFiles(filepath.Join(dir, "*"))
return eng, nil
}

log.Infof(context.Background(), "creating test data: %s", dir)
log.Infof(context.Background(), "creating test data: %s", absPath)

// Generate the same data every time.
rng := rand.New(rand.NewSource(1449168817))
Expand Down
17 changes: 14 additions & 3 deletions pkg/kv/kvserver/batcheval/cmd_refresh_range_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
Expand Down Expand Up @@ -210,6 +211,9 @@ func setupMVCCPebble(b testing.TB, dir string, lBaseMaxBytes int64, readOnly boo
func setupData(
ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions,
) (storage.Engine, string) {
// Include the current version in the fixture name, or we may inadvertently
// run against a left-over fixture that is no longer supported.
verStr := fmt.Sprintf("v%s", clusterversion.TestingBinaryVersion.String())
orderStr := "linear"
if opts.randomKeyOrder {
orderStr = "random"
Expand All @@ -218,22 +222,29 @@ func setupData(
if opts.readOnlyEngine {
readOnlyStr = "_readonly"
}
loc := fmt.Sprintf("refresh_range_bench_data_%s%s_%d_%d_%d",
orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes)
loc := fmt.Sprintf("refresh_range_bench_data_%s_%s%s_%d_%d_%d",
verStr, orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes)

exists := true
if _, err := os.Stat(loc); oserror.IsNotExist(err) {
exists = false
} else if err != nil {
b.Fatal(err)
}

absPath, err := filepath.Abs(loc)
if err != nil {
absPath = loc
}

if exists {
log.Infof(ctx, "using existing refresh range benchmark data: %s", absPath)
testutils.ReadAllFiles(filepath.Join(loc, "*"))
return emk(b, loc, opts.lBaseMaxBytes, opts.readOnlyEngine), loc
}

eng := emk(b, loc, opts.lBaseMaxBytes, false)
log.Infof(ctx, "creating refresh range benchmark data: %s", loc)
log.Infof(ctx, "creating refresh range benchmark data: %s", absPath)

// Generate the same data every time.
rng := rand.New(rand.NewSource(1449168817))
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/rangefeed/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ go_test(
embed = [":rangefeed"],
deps = [
"//pkg/base",
"//pkg/clusterversion",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/settings/cluster",
Expand Down
13 changes: 10 additions & 3 deletions pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
Expand Down Expand Up @@ -229,6 +230,7 @@ func setupMVCCPebble(b testing.TB, dir string, lBaseMaxBytes int64, readOnly boo
func setupData(
ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions,
) (storage.Engine, string) {
verStr := fmt.Sprintf("v%s", clusterversion.TestingBinaryVersion.String())
orderStr := "linear"
if opts.randomKeyOrder {
orderStr = "random"
Expand All @@ -237,22 +239,27 @@ func setupData(
if opts.readOnlyEngine {
readOnlyStr = "_readonly"
}
loc := fmt.Sprintf("rangefeed_bench_data_%s%s_%d_%d_%d_%d",
orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes, opts.numRangeKeys)
loc := fmt.Sprintf("rangefeed_bench_data_%s_%s%s_%d_%d_%d_%d",
verStr, orderStr, readOnlyStr, opts.numKeys, opts.valueBytes, opts.lBaseMaxBytes, opts.numRangeKeys)
exists := true
if _, err := os.Stat(loc); oserror.IsNotExist(err) {
exists = false
} else if err != nil {
b.Fatal(err)
}

absPath, err := filepath.Abs(loc)
if err != nil {
absPath = loc
}
if exists {
log.Infof(ctx, "using existing refresh range benchmark data: %s", absPath)
testutils.ReadAllFiles(filepath.Join(loc, "*"))
return emk(b, loc, opts.lBaseMaxBytes, opts.readOnlyEngine), loc
}

eng := emk(b, loc, opts.lBaseMaxBytes, false)
log.Infof(ctx, "creating rangefeed benchmark data: %s", loc)
log.Infof(ctx, "creating rangefeed benchmark data: %s", absPath)

// Generate the same data every time.
rng := rand.New(rand.NewSource(1449168817))
Expand Down
16 changes: 13 additions & 3 deletions pkg/storage/bench_data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/errors/oserror"
"github.com/cockroachdb/pebble"
"github.com/cockroachdb/pebble/vfs"
"github.com/stretchr/testify/require"
)
Expand All @@ -38,7 +39,7 @@ type initialState interface {
Base() initialState

// Key returns a unique sequence of strings that uniquely identifies the
// represented initial condtions. Key is used as the cache key for reusing
// represented initial conditions. Key is used as the cache key for reusing
// databases computed by previous runs, so all configuration must be fully
// represented in Key's return value.
Key() []string
Expand All @@ -58,6 +59,14 @@ type engineWithLocation struct {
Location
}

// TODO(jackson): Tie this to the mapping in SetMinVersion.
var latestReleaseFormatMajorVersion = pebble.FormatPrePebblev1Marked // v22.2

var latestReleaseFormatMajorVersionOpt ConfigOption = func(cfg *engineConfig) error {
cfg.PebbleConfig.Opts.FormatMajorVersion = latestReleaseFormatMajorVersion
return nil
}

// getInitialStateEngine constructs an Engine with an initial database
// state necessary for a benchmark. The initial states are cached on the
// filesystem to avoid expensive reconstruction when possible. The return value
Expand Down Expand Up @@ -89,7 +98,7 @@ func getInitialStateEngine(

opts := append([]ConfigOption{
MustExist,
LatestReleaseFormatMajorVersion,
latestReleaseFormatMajorVersionOpt,
}, initial.ConfigOptions()...)

if !inMemory {
Expand Down Expand Up @@ -149,7 +158,7 @@ func buildInitialState(
e.Close()
buildFS = e.Location.fs
} else {
opts := append([]ConfigOption{LatestReleaseFormatMajorVersion}, initial.ConfigOptions()...)
opts := append([]ConfigOption{latestReleaseFormatMajorVersionOpt}, initial.ConfigOptions()...)

// Regardless of whether the initial conditions specify an in-memory engine
// or not, we build the conditions using an in-memory engine for
Expand Down Expand Up @@ -240,6 +249,7 @@ var _ initialState = mvccBenchData{}
func (d mvccBenchData) Key() []string {
key := []string{
"mvcc",
fmt.Sprintf("fmtver_%d", latestReleaseFormatMajorVersion),
fmt.Sprintf("numKeys_%d", d.numKeys),
fmt.Sprintf("numVersions_%d", d.numVersions),
fmt.Sprintf("valueBytes_%d", d.valueBytes),
Expand Down
8 changes: 0 additions & 8 deletions pkg/storage/open.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,6 @@ func Hook(hookFunc func(*base.StorageConfig) error) ConfigOption {
}
}

// LatestReleaseFormatMajorVersion opens the database already upgraded to the
// latest release's format major version.
var LatestReleaseFormatMajorVersion ConfigOption = func(cfg *engineConfig) error {
// TODO(jackson): Tie the below to the mapping in SetMinVersion.
cfg.PebbleConfig.Opts.FormatMajorVersion = pebble.FormatPrePebblev1Marked // v22.2
return nil
}

// If enables the given option if enable is true.
func If(enable bool, opt ConfigOption) ConfigOption {
if enable {
Expand Down

0 comments on commit 204ad97

Please sign in to comment.