Skip to content

Commit

Permalink
kvserver: fix testing knob cast type
Browse files Browse the repository at this point in the history
Previously the wrong object was being casted which caused the type
assertion to fail for NodeLivenessKnobs.  Thus the testing knob was never
attached in some test cases.  This patch ensures the correct object is
casted.  This uncovered a bug where a stale range descriptor was returned
in setup fns, so this commit also patches that by having
`checkReplicaCounts` take a pointer to a range decriptor. Lastly, due to
these fixes we found that a race between the `replicateQueue`'s snapshots and
`raftSnapshotQueue's` causes the `TestReplicateQueueDeadNonVoters` test
to timeout. A new issue #76996 was created to harden this test, but the
test will be skipped for the time being.

Release note: None
  • Loading branch information
amygao9 committed Feb 24, 2022
1 parent 24d886c commit e9d856e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
28 changes: 18 additions & 10 deletions pkg/kv/kvserver/replicate_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,15 +388,15 @@ func TestReplicateQueueUpAndDownReplicateNonVoters(t *testing.T) {
func checkReplicaCount(
ctx context.Context,
tc *testcluster.TestCluster,
rangeDesc roachpb.RangeDescriptor,
rangeDesc *roachpb.RangeDescriptor,
voterCount, nonVoterCount int,
) (bool, error) {
err := forceScanOnAllReplicationQueues(tc)
if err != nil {
log.Infof(ctx, "store.ForceReplicationScanAndProcess() failed with: %s", err)
return false, err
}
rangeDesc, err = tc.LookupRange(rangeDesc.StartKey.AsRawKey())
*rangeDesc, err = tc.LookupRange(rangeDesc.StartKey.AsRawKey())
if err != nil {
return false, err
}
Expand Down Expand Up @@ -433,15 +433,18 @@ func TestReplicateQueueDecommissioningNonVoters(t *testing.T) {
},
},
)
_, err := tc.ServerConn(0).Exec(
`SET CLUSTER SETTING server.failed_reservation_timeout='1ms'`)
require.NoError(t, err)

scratchKey := tc.ScratchRange(t)
scratchRange := tc.LookupRangeOrFatal(t, scratchKey)
_, err := tc.ServerConn(0).Exec(
_, err = tc.ServerConn(0).Exec(
`ALTER RANGE DEFAULT CONFIGURE ZONE USING num_replicas = 3, num_voters = 1`,
)
require.NoError(t, err)
require.Eventually(t, func() bool {
ok, err := checkReplicaCount(ctx, tc, scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
ok, err := checkReplicaCount(ctx, tc, &scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
if err != nil {
log.Errorf(ctx, "error checking replica count: %s", err)
return false
Expand All @@ -466,7 +469,7 @@ func TestReplicateQueueDecommissioningNonVoters(t *testing.T) {
tc.Server(0).Decommission(ctx, livenesspb.MembershipStatus_DECOMMISSIONING, beforeNodeIDs))

require.Eventually(t, func() bool {
ok, err := checkReplicaCount(ctx, tc, scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
ok, err := checkReplicaCount(ctx, tc, &scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
if err != nil {
log.Errorf(ctx, "error checking replica count: %s", err)
return false
Expand Down Expand Up @@ -520,7 +523,7 @@ func TestReplicateQueueDecommissioningNonVoters(t *testing.T) {
// replicateQueue on and ensure that these redundant non-voters are removed.
tc.ToggleReplicateQueues(true)
require.Eventually(t, func() bool {
ok, err := checkReplicaCount(ctx, tc, scratchRange, 1 /* voterCount */, 0 /* nonVoterCount */)
ok, err := checkReplicaCount(ctx, tc, &scratchRange, 1 /* voterCount */, 0 /* nonVoterCount */)
if err != nil {
log.Errorf(ctx, "error checking replica count: %s", err)
return false
Expand All @@ -535,6 +538,7 @@ func TestReplicateQueueDecommissioningNonVoters(t *testing.T) {
func TestReplicateQueueDeadNonVoters(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
skip.WithIssue(t, 76996)
skip.UnderRace(t, "takes a long time or times out under race")

ctx := context.Background()
Expand Down Expand Up @@ -564,15 +568,19 @@ func TestReplicateQueueDeadNonVoters(t *testing.T) {
},
},
)
_, err := tc.ServerConn(0).Exec(
`SET CLUSTER SETTING server.failed_reservation_timeout='1ms'`)
require.NoError(t, err)

// Setup a scratch range on a test cluster with 2 non-voters and 1 voter.
scratchKey := tc.ScratchRange(t)
scratchRange := tc.LookupRangeOrFatal(t, scratchKey)
_, err := tc.ServerConn(0).Exec(
_, err = tc.ServerConn(0).Exec(
`ALTER RANGE DEFAULT CONFIGURE ZONE USING num_replicas = 3, num_voters = 1`,
)
require.NoError(t, err)
require.Eventually(t, func() bool {
ok, err := checkReplicaCount(ctx, tc, scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
ok, err := checkReplicaCount(ctx, tc, &scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
if err != nil {
log.Errorf(ctx, "error checking replica count: %s", err)
return false
Expand Down Expand Up @@ -607,7 +615,7 @@ func TestReplicateQueueDeadNonVoters(t *testing.T) {
beforeNodeIDs := getNonVoterNodeIDs(scratchRange)
markDead(beforeNodeIDs)
require.Eventually(t, func() bool {
ok, err := checkReplicaCount(ctx, tc, scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
ok, err := checkReplicaCount(ctx, tc, &scratchRange, 1 /* voterCount */, 2 /* nonVoterCount */)
if err != nil {
log.Errorf(ctx, "error checking replica count: %s", err)
return false
Expand Down Expand Up @@ -658,7 +666,7 @@ func TestReplicateQueueDeadNonVoters(t *testing.T) {

toggleReplicationQueues(tc, true)
require.Eventually(t, func() bool {
ok, err := checkReplicaCount(ctx, tc, scratchRange, 1 /* voterCount */, 0 /* nonVoterCount */)
ok, err := checkReplicaCount(ctx, tc, &scratchRange, 1 /* voterCount */, 0 /* nonVoterCount */)
if err != nil {
log.Errorf(ctx, "error checking replica count: %s", err)
return false
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
registry.AddMetricStruct(nodeLiveness.Metrics())

nodeLivenessFn := kvserver.MakeStorePoolNodeLivenessFunc(nodeLiveness)
if nodeLivenessKnobs, ok := cfg.TestingKnobs.Store.(*kvserver.NodeLivenessTestingKnobs); ok &&
if nodeLivenessKnobs, ok := cfg.TestingKnobs.NodeLiveness.(kvserver.NodeLivenessTestingKnobs); ok &&
nodeLivenessKnobs.StorePoolNodeLivenessFn != nil {
nodeLivenessFn = nodeLivenessKnobs.StorePoolNodeLivenessFn
}
Expand Down

0 comments on commit e9d856e

Please sign in to comment.