Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
96094: roachtest: unskip fuse disk-stall roachtest variant  r=jbowens a=nicktrav

**roachtest: unskip fuse disk-stall roachtest variant**

The `fuse` variant of the `disk-stalled` roachtest was skipped in
#95865.

Reenable the skipped variant, updating it to make use of our forked
version of `charybdefs`. This fork includes a patch that allows for
specifying a delay time for syscalls, making it possible to simulate a
complete disk stall. Previously, delay times were limited to 50ms, which
meant that the detection time had to be even lower (e.g. 40ms), which
was not representative of how Cockroach is configured in practice.

Fixes #95874.

Fixes #95815.

Fixes #96410.

Fixes #95886.

Release note: None.

96130: pkg/compose: bootstrap CCL for TestComposeCompare r=ZhouXing19,rail a=srosenberg

The nightly TestComposeCompare integration test uses a randomized SQL workload (a dialect denoted by sqlsmith.PostgresMode) to compare the results against Postgres. Additionally, it runs a second instance of CockroachDB using a series of mutators, e.g., randgen.StatisticsMutator, intended to alter table statistics, indexes, etc., but not the actual table data. Thus, all three data sources are expected to return the same result, for a given SQL query, modulo ignored SQL errors.

Some of the generated SQL may require CCL as can be seen from a number of previous test failures. This change bootstraps an environment variable (COCKROACH_DEV_LICENSE), similarly to roachprod and roachtest. The environment variable is passed via CI; it's also in our dev. environment, including gceworker.
The test aborts in case COCKROACH_DEV_LICENSE is empty.

Epic: none

Informs: #82867

Release note: None

96378: builtins: check_consistency now requires admin role r=rafiss a=e-mbrown

Resolve #88224

`crdb_internal.check_consistency` is an expensive operation that previously could be invoked by regular users. Now admin role is required.

Release note: None

Co-authored-by: Nick Travers <[email protected]>
Co-authored-by: Stan Rosenberg <[email protected]>
Co-authored-by: e-mbrown <[email protected]>
  • Loading branch information
4 people committed Feb 6, 2023
4 parents a0f8d9b + 6ed2fb0 + 186422f + 7aff21d commit 84bb156
Show file tree
Hide file tree
Showing 11 changed files with 110 additions and 34 deletions.
1 change: 1 addition & 0 deletions build/teamcity/cockroach/nightlies/compose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ $BAZCI --artifacts_dir=$ARTIFACTS_DIR -- \
"--sandbox_writable_path=$ARTIFACTS_DIR" \
"--test_tmpdir=$ARTIFACTS_DIR" \
--test_env=GO_TEST_WRAP_TESTV=1 \
--test_env=COCKROACH_DEV_LICENSE=$COCKROACH_DEV_LICENSE \
--test_arg -cockroach --test_arg $COCKROACH \
--test_arg -compare --test_arg $COMPAREBIN \
--test_timeout=1800 || exit_status=$?
Expand Down
67 changes: 44 additions & 23 deletions pkg/cmd/roachtest/tests/disk_stall.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
"github.com/stretchr/testify/require"
)

const maxSyncDur = 10 * time.Second

// registerDiskStalledDetection registers the disk stall test.
func registerDiskStalledDetection(r registry.Registry) {
stallers := map[string]func(test.Test, cluster.Cluster) diskStaller{
Expand All @@ -48,10 +50,10 @@ func registerDiskStalledDetection(r registry.Registry) {
r.Add(registry.TestSpec{
Name: fmt.Sprintf("disk-stalled/%s", name),
Owner: registry.OwnerStorage,
Cluster: r.MakeClusterSpec(4),
Cluster: r.MakeClusterSpec(4, spec.ReuseNone()),
Timeout: 20 * time.Minute,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runDiskStalledDetection(ctx, t, c, makeStaller(t, c))
runDiskStalledDetection(ctx, t, c, makeStaller(t, c), true /* doStall */)
},
// Encryption is implemented within the virtual filesystem layer,
// just like disk-health monitoring. It's important to exercise
Expand All @@ -69,31 +71,34 @@ func registerDiskStalledDetection(r registry.Registry) {
stallLogDir := stallLogDir
stallDataDir := stallDataDir
r.Add(registry.TestSpec{
Skip: "#95886",
SkipDetails: "The test current only induces a 50us disk-stall, which cannot be reliably detected.",
Name: fmt.Sprintf(
"disk-stalled/fuse/log=%t,data=%t",
stallLogDir, stallDataDir,
),
Owner: registry.OwnerStorage,
Cluster: r.MakeClusterSpec(4),
Cluster: r.MakeClusterSpec(4, spec.ReuseNone()),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runDiskStalledDetection(ctx, t, c, &fuseDiskStaller{
t: t,
c: c,
stallLogs: stallLogDir,
stallData: stallDataDir,
})
}, stallLogDir || stallDataDir /* doStall */)
},
EncryptionSupport: registry.EncryptionMetamorphic,
})
}
}
}

func runDiskStalledDetection(ctx context.Context, t test.Test, c cluster.Cluster, s diskStaller) {
const maxSyncDur = 10 * time.Second
func runDiskStalledDetection(
ctx context.Context, t test.Test, c cluster.Cluster, s diskStaller, doStall bool,
) {
startOpts := option.DefaultStartOpts()
startOpts.RoachprodOpts.ExtraArgs = []string{
"--store", s.DataDir(),
"--log", fmt.Sprintf(`{sinks: {stderr: {filter: INFO}}, file-defaults: {dir: "%s"}}`, s.LogDir()),
}
startSettings := install.MakeClusterSettings()
startSettings.Env = append(startSettings.Env,
"COCKROACH_AUTO_BALLAST=false",
Expand All @@ -109,11 +114,13 @@ func runDiskStalledDetection(ctx context.Context, t test.Test, c cluster.Cluster
c.Start(ctx, t.L(), startOpts, startSettings, c.Range(1, 3))

// Assert the process monotonic times are as expected.
start, ok := getProcessStartMonotonic(ctx, t, c, 1)
var ok bool
var start, exit time.Duration
start, ok = getProcessStartMonotonic(ctx, t, c, 1)
if !ok {
t.Fatal("unable to retrieve process start time; did Cockroach not start?")
}
if exit, exitOk := getProcessExitMonotonic(ctx, t, c, 1); exitOk {
if exit, ok = getProcessExitMonotonic(ctx, t, c, 1); ok && exit > 0 {
t.Fatalf("process has an exit monotonic time of %d; did Cockroach already exit?", exit)
}

Expand Down Expand Up @@ -165,7 +172,9 @@ func runDiskStalledDetection(ctx context.Context, t test.Test, c cluster.Cluster
t.L().PrintfCtx(ctx, "%.2f transactions completed before stall", totalTxnsPreStall)

t.Status("inducing write stall")
m.ExpectDeath()
if doStall {
m.ExpectDeath()
}
s.Stall(ctx, c.Node(1))
defer s.Unstall(ctx, c.Node(1))

Expand All @@ -181,8 +190,10 @@ func runDiskStalledDetection(ctx context.Context, t test.Test, c cluster.Cluster
t.Status("pinging SQL connection to n1")
err := n1Conn.PingContext(ctx)
t.L().PrintfCtx(ctx, "pinging n1's connection: %s", err)
if err == nil {
if doStall && err == nil {
t.Fatal("connection to n1 is still alive")
} else if !doStall && err != nil {
t.Fatalf("connection to n1 is dead: %s", err)
}
}

Expand Down Expand Up @@ -223,22 +234,29 @@ func runDiskStalledDetection(ctx context.Context, t test.Test, c cluster.Cluster
// Unstall the stalled node. It should be able to be reaped.
s.Unstall(ctx, c.Node(1))
time.Sleep(1 * time.Second)
exit, ok := getProcessExitMonotonic(ctx, t, c, 1)
if !ok {
t.Fatalf("unable to retrieve process exit time; stall went undetected")
exit, ok = getProcessExitMonotonic(ctx, t, c, 1)
if doStall {
if !ok {
t.Fatalf("unable to retrieve process exit time; stall went undetected")
}
t.L().PrintfCtx(ctx, "node exited at %s after test start\n", exit-start)
} else if ok && exit > 0 {
t.Fatal("no stall induced, but process exited")
}
t.L().PrintfCtx(ctx, "node exited at %s after test start\n", exit-start)

// Shut down the nodes, allowing any devices to be unmounted during cleanup.
c.Stop(ctx, t.L(), option.DefaultStopOpts(), c.Range(1, 3))
}

func getProcessStartMonotonic(
ctx context.Context, t test.Test, c cluster.Cluster, nodeID int,
) (time.Duration, bool) {
) (since time.Duration, ok bool) {
return getProcessMonotonicTimestamp(ctx, t, c, nodeID, "ActiveEnterTimestampMonotonic")
}

func getProcessExitMonotonic(
ctx context.Context, t test.Test, c cluster.Cluster, nodeID int,
) (time.Duration, bool) {
) (since time.Duration, ok bool) {
return getProcessMonotonicTimestamp(ctx, t, c, nodeID, "ActiveExitTimestampMonotonic")
}

Expand All @@ -262,7 +280,7 @@ func getProcessMonotonicTimestamp(
t.Fatalf("unable to parse monotonic timestamp %q: %s", parts[1], err)
}
if u == 0 {
return 0, false
return 0, true
}
return time.Duration(u) * time.Microsecond, true
}
Expand All @@ -287,7 +305,7 @@ func (s *dmsetupDiskStaller) device() string { return getDevice(s.t, s.c.Spec())

func (s *dmsetupDiskStaller) Setup(ctx context.Context) {
dev := s.device()
s.c.Run(ctx, s.c.All(), `sudo umount /mnt/data1`)
s.c.Run(ctx, s.c.All(), `sudo umount -f /mnt/data1 || true`)
s.c.Run(ctx, s.c.All(), `sudo dmsetup remove_all`)
s.c.Run(ctx, s.c.All(), `echo "0 $(sudo blockdev --getsz `+dev+`) linear `+dev+` 0" | `+
`sudo dmsetup create data1`)
Expand Down Expand Up @@ -377,7 +395,7 @@ func (s *cgroupDiskStaller) setThroughput(
))
}

// fuseDiskStaller uses a FUSE filesystem (charybdefs) to insert an artifical
// fuseDiskStaller uses a FUSE filesystem (charybdefs) to insert an artificial
// delay on all I/O.
type fuseDiskStaller struct {
t test.Test
Expand Down Expand Up @@ -417,11 +435,14 @@ func (s *fuseDiskStaller) Setup(ctx context.Context) {
}

func (s *fuseDiskStaller) Cleanup(ctx context.Context) {
s.c.Run(ctx, s.c.All(), "sudo umount {store-dir}/faulty")
s.c.Run(ctx, s.c.All(), "sudo umount -f {store-dir}/faulty || true")
}

func (s *fuseDiskStaller) Stall(ctx context.Context, nodes option.NodeListOption) {
s.c.Run(ctx, nodes, "charybdefs-nemesis --delay")
// Stall for 2x the max sync duration. The tool expects an integer
// representing the delay time, in microseconds.
stallMicros := (2 * maxSyncDur).Microseconds()
s.c.Run(ctx, nodes, "charybdefs-nemesis", "--delay", strconv.FormatInt(stallMicros, 10))
}

func (s *fuseDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOption) {
Expand Down
5 changes: 4 additions & 1 deletion pkg/compose/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ go_test(
embed = [":compose"],
gotags = ["compose"],
tags = ["integration"],
deps = ["//pkg/build/bazel"],
deps = [
"//pkg/build/bazel",
"//pkg/util/envutil",
],
)

get_x_data(name = "get_x_data")
2 changes: 2 additions & 0 deletions pkg/compose/compare/compare/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ go_test(
"//pkg/internal/sqlsmith",
"//pkg/sql/randgen",
"//pkg/testutils",
"//pkg/util/envutil",
"//pkg/util/randutil",
"@com_github_jackc_pgx_v4//:pgx",
"@com_github_stretchr_testify//require",
],
)

Expand Down
11 changes: 11 additions & 0 deletions pkg/compose/compare/compare/compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package compare
import (
"context"
"flag"
"fmt"
"os"
"path/filepath"
"testing"
Expand All @@ -28,8 +29,10 @@ import (
"github.com/cockroachdb/cockroach/pkg/internal/sqlsmith"
"github.com/cockroachdb/cockroach/pkg/sql/randgen"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/jackc/pgx/v4"
"github.com/stretchr/testify/require"
)

var (
Expand All @@ -38,6 +41,10 @@ var (
)

func TestCompare(t *testing.T) {
// N.B. randomized SQL workload performed by this test may require CCL
var license = envutil.EnvOrDefaultString("COCKROACH_DEV_LICENSE", "")
require.NotEmptyf(t, license, "COCKROACH_DEV_LICENSE must be set")

uris := map[string]struct {
addr string
init []string
Expand All @@ -57,13 +64,17 @@ func TestCompare(t *testing.T) {
"cockroach1": {
addr: "postgresql://root@cockroach1:26257/postgres?sslmode=disable",
init: []string{
"SET CLUSTER SETTING cluster.organization = 'Cockroach Labs - Production Testing'",
fmt.Sprintf("SET CLUSTER SETTING enterprise.license = '%s'", license),
"drop database if exists postgres",
"create database postgres",
},
},
"cockroach2": {
addr: "postgresql://root@cockroach2:26257/postgres?sslmode=disable",
init: []string{
"SET CLUSTER SETTING cluster.organization = 'Cockroach Labs - Production Testing'",
fmt.Sprintf("SET CLUSTER SETTING enterprise.license = '%s'", license),
"drop database if exists postgres",
"create database postgres",
},
Expand Down
2 changes: 2 additions & 0 deletions pkg/compose/compare/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ services:
- "${COCKROACH_PATH}:/cockroach/cockroach"
test:
image: ubuntu:xenial-20170214
environment:
- COCKROACH_DEV_LICENSE=$COCKROACH_DEV_LICENSE
# compare.test is a binary built by the pkg/compose/prepare.sh in non-bazel builds
command: /compare/compare.test -each ${EACH} -test.run ${TESTS} -artifacts ${ARTIFACTS}
depends_on:
Expand Down
2 changes: 2 additions & 0 deletions pkg/compose/compose_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/build/bazel"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
)

var (
Expand Down Expand Up @@ -113,6 +114,7 @@ func TestComposeCompare(t *testing.T) {
fmt.Sprintf("COCKROACH_PATH=%s", cockroachBin),
fmt.Sprintf("COMPARE_DIR_PATH=%s", compareDir),
fmt.Sprintf("ARTIFACTS=%s", *flagArtifacts),
fmt.Sprintf("COCKROACH_DEV_LICENSE=%s", envutil.EnvOrDefaultString("COCKROACH_DEV_LICENSE", "")),
}
out, err := cmd.CombinedOutput()
if err != nil {
Expand Down
40 changes: 32 additions & 8 deletions pkg/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,14 @@ func (c *SyncedCluster) generateStartCmd(
if err != nil {
return "", err
}
keyCmd, err := c.generateKeyCmd(ctx, l, node, startOpts)
if err != nil {
return "", err
}

return execStartTemplate(startTemplateData{
LogDir: c.LogDir(node),
KeyCmd: c.generateKeyCmd(node, startOpts),
KeyCmd: keyCmd,
EnvVars: append(append([]string{
fmt.Sprintf("ROACHPROD=%s", c.roachprodEnvValue(node)),
"GOTRACEBACK=crash",
Expand Down Expand Up @@ -552,16 +556,18 @@ func (c *SyncedCluster) generateStartArgs(
args = append(args, c.generateStartFlagsSQL()...)
}

args = append(args, startOpts.ExtraArgs...)

// Argument template expansion is node specific (e.g. for {store-dir}).
e := expander{
node: node,
}
for _, arg := range startOpts.ExtraArgs {
for i, arg := range args {
expandedArg, err := e.expand(ctx, l, c, arg)
if err != nil {
return nil, err
}
args = append(args, expandedArg)
args[i] = expandedArg
}

return args, nil
Expand All @@ -585,9 +591,14 @@ func (c *SyncedCluster) generateStartFlagsKV(node Node, startOpts StartOpts) []s
args = append(args, `--store`,
fmt.Sprintf(`path=%s,attrs=store%d:node%d:node%dstore%d`, storeDir, i, node, node, i))
}
} else {
} else if startOpts.ExtraArgs[idx] == "--store=" {
// The flag and path were provided together. Strip the flag prefix.
storeDir := strings.TrimPrefix(startOpts.ExtraArgs[idx], "--store=")
storeDirs = append(storeDirs, storeDir)
} else {
// Else, the store flag and path were specified as separate arguments. The
// path is the subsequent arg.
storeDirs = append(storeDirs, startOpts.ExtraArgs[idx+1])
}

if startOpts.EncryptedStores {
Expand Down Expand Up @@ -710,9 +721,11 @@ func (c *SyncedCluster) generateInitCmd(node Node) string {
return initCmd
}

func (c *SyncedCluster) generateKeyCmd(node Node, startOpts StartOpts) string {
func (c *SyncedCluster) generateKeyCmd(
ctx context.Context, l *logger.Logger, node Node, startOpts StartOpts,
) (string, error) {
if !startOpts.EncryptedStores {
return ""
return "", nil
}

var storeDirs []string
Expand All @@ -721,9 +734,14 @@ func (c *SyncedCluster) generateKeyCmd(node Node, startOpts StartOpts) string {
storeDir := c.NodeDir(node, i)
storeDirs = append(storeDirs, storeDir)
}
} else {
} else if startOpts.ExtraArgs[storeArgIdx] == "--store=" {
// The flag and path were provided together. Strip the flag prefix.
storeDir := strings.TrimPrefix(startOpts.ExtraArgs[storeArgIdx], "--store=")
storeDirs = append(storeDirs, storeDir)
} else {
// Else, the store flag and path were specified as separate arguments. The
// path is the subsequent arg.
storeDirs = append(storeDirs, startOpts.ExtraArgs[storeArgIdx+1])
}

// Command to create the store key.
Expand All @@ -735,7 +753,13 @@ func (c *SyncedCluster) generateKeyCmd(node Node, startOpts StartOpts) string {
openssl rand -out %[1]s/aes-128.key 48;
fi;`, storeDir)
}
return keyCmd.String()

e := expander{node: node}
expanded, err := e.expand(ctx, l, c, keyCmd.String())
if err != nil {
return "", err
}
return expanded, nil
}

func (c *SyncedCluster) useStartSingleNode() bool {
Expand Down
3 changes: 1 addition & 2 deletions pkg/roachprod/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ sudo service cassandra stop;
sudo rm -rf "${charybde_dir}" "${nemesis_path}" /usr/local/bin/charybdefs{,-nemesis}
sudo mkdir -p "${charybde_dir}"
sudo chmod 777 "${charybde_dir}"
# TODO(bilal): Change URL back to scylladb/charybdefs once https://github.com/scylladb/charybdefs/pull/28 is merged.
git clone --depth 1 "https://github.com/itsbilal/charybdefs.git" "${charybde_dir}"
git clone --depth 1 --branch crl "https://github.com/cockroachdb/charybdefs.git" "${charybde_dir}"
cd "${charybde_dir}"
thrift -r --gen cpp server.thrift
Expand Down
Loading

0 comments on commit 84bb156

Please sign in to comment.