Skip to content

Commit

Permalink
Merge #26542
Browse files Browse the repository at this point in the history
26542: roachtest: quiescence with dead node r=petermattis a=tschottdorf

This test currently fails, at least in remote mode (curiously in local
mode it works, at least on my laptop).

```
--- FAIL: kv/quiescence/nodes=3 [unstable] (1258.49s)
        kv.go:160: QPS dropped by more than 80%: from 2669.32 to 1582.14
```

Fixing this test is the goal for #9446.

Release note: None

Co-authored-by: Tobias Schottdorf <[email protected]>
  • Loading branch information
craig[bot] and tbg committed Jun 8, 2018
2 parents f762b26 + 4aedb39 commit 020ce2c
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 0 deletions.
99 changes: 99 additions & 0 deletions pkg/cmd/roachtest/kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ package main
import (
"context"
"fmt"

"time"

"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

func registerKV(r *registry) {
Expand Down Expand Up @@ -72,6 +76,101 @@ func registerKV(r *registry) {
}
}

func registerKVQuiescenceDead(r *registry) {
r.Add(testSpec{
Name: "kv/quiescence/nodes=3",
Nodes: nodes(4),
Stable: false, // added 6/7/2018
Run: func(ctx context.Context, t *test, c *cluster) {
if !c.isLocal() {
c.RemountNoBarrier(ctx)
}

nodes := c.nodes - 1
c.Put(ctx, cockroach, "./cockroach", c.Range(1, nodes))
c.Put(ctx, workload, "./workload", c.Node(nodes+1))
c.Start(ctx, c.Range(1, nodes))

run := func(cmd string, lastDown bool) {
n := nodes
if lastDown {
n--
}
m := newMonitor(ctx, c, c.Range(1, n))
m.Go(func(ctx context.Context) error {
t.WorkerStatus(cmd)
defer t.WorkerStatus()
return c.RunE(ctx, c.Node(nodes+1), cmd)
})
m.Wait()
}

db := c.Conn(ctx, 1)
defer db.Close()

for {
fullReplicated := false
if err := db.QueryRow(
"SELECT min(array_length(replicas, 1)) >= 3 FROM crdb_internal.ranges",
).Scan(&fullReplicated); err != nil {
t.Fatal(err)
}
if fullReplicated {
break
}
time.Sleep(time.Second)
}

qps := func(f func()) float64 {

numInserts := func() float64 {
var v float64
if err := db.QueryRowContext(
ctx, `SELECT value FROM crdb_internal.node_metrics WHERE name = 'sql.insert.count'`,
).Scan(&v); err != nil {
t.Fatal(err)
}
return v
}

tBegin := timeutil.Now()
before := numInserts()
f()
after := numInserts()
return (after - before) / timeutil.Since(tBegin).Seconds()
}

const kv = "./workload run kv --duration=10m --read-percent=0"

// Initialize the database with ~10k ranges so that the absence of
// quiescence hits hard once a node goes down.
run("./workload run kv --init --max-ops=1 --splits 10000 --concurrency 100 {pgurl:1}", false)
run(kv+" --seed 0 {pgurl:1}", true) // warm-up
// Measure qps with all nodes up (i.e. with quiescence).
qpsAllUp := qps(func() {
run(kv+" --seed 1 {pgurl:1}", true)
})
// Gracefully shut down third node (doesn't matter whether it's graceful or not).
c.Run(ctx, c.Node(nodes), "./cockroach quit --insecure --port {pgport:3}")
c.Stop(ctx, c.Node(nodes))
// Measure qps with node down (i.e. without quiescence).
qpsOneDown := qps(func() {
// Use a different seed to make sure it's not just stepping into the
// other earlier kv invocation's footsteps.
run(kv+" --seed 2 {pgurl:1}", true)
})

if minFrac, actFrac := 0.8, qpsOneDown/qpsAllUp; actFrac < minFrac {
t.Fatalf(
"QPS dropped from %.2f to %.2f (factor of %.2f, min allowed %.2f)",
qpsAllUp, qpsOneDown, actFrac, minFrac,
)
}
c.l.printf("QPS went from %.2f to %2.f with one node down\n", qpsAllUp, qpsOneDown)
},
})
}

func registerKVSplits(r *registry) {
r.Add(testSpec{
Name: "kv/splits/nodes=3",
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func registerTests(r *registry) {
registerKV(r)
registerKVScalability(r)
registerKVSplits(r)
registerKVQuiescenceDead(r)
registerLargeRange(r)
registerRestore(r)
registerRoachmart(r)
Expand Down

0 comments on commit 020ce2c

Please sign in to comment.