Skip to content

Commit

Permalink
storage: deflake TestNodeLivenessStatusMap
Browse files Browse the repository at this point in the history
Prior to this patch, this test would fail `stressrace` after a few
dozen iterations. The root cause of this was the invalid call to
`t.Parallel()`, which this patch removes.

Additionally, this patch adapts TimeUntilStoreDead for each test case
to avoid flakes, and removes a previous hack obviated by this
simplification.

Release note: None

Co-authored-by: Tobias Schottdorf <[email protected]>
  • Loading branch information
knz and tbg committed Apr 24, 2019
1 parent d50ebea commit af6a6e8
Showing 1 changed file with 15 additions and 17 deletions.
32 changes: 15 additions & 17 deletions pkg/storage/node_liveness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -855,21 +855,23 @@ func TestNodeLivenessStatusMap(t *testing.T) {
// See what comes up in the status.
callerNodeLiveness := firstServer.GetNodeLiveness()

type expectedStatus struct {
type testCase struct {
nodeID roachpb.NodeID
expectedStatus storagepb.NodeLivenessStatus
}
testData := []expectedStatus{

// Below we're going to check that all statuses converge and stabilize
// to a known situation.
testData := []testCase{
{liveNodeID, storagepb.NodeLivenessStatus_LIVE},
{deadNodeID, storagepb.NodeLivenessStatus_DEAD},
{decommissioningNodeID, storagepb.NodeLivenessStatus_DECOMMISSIONING},
{removedNodeID, storagepb.NodeLivenessStatus_DECOMMISSIONED},
}

for _, test := range testData {
t.Run(test.expectedStatus.String(), func(t *testing.T) {
t.Run(fmt.Sprintf("n%d->%s", test.nodeID, test.expectedStatus), func(t *testing.T) {
nodeID, expectedStatus := test.nodeID, test.expectedStatus
t.Parallel()

testutils.SucceedsSoon(t, func() error {
// Ensure that dead nodes are quickly recognized as dead by
Expand All @@ -882,21 +884,17 @@ func TestNodeLivenessStatusMap(t *testing.T) {
storage.TimeUntilStoreDead.Override(&firstServer.ClusterSettings().SV,
storage.TestTimeUntilStoreDead)

log.Infof(ctx, "checking expected status for node %d", nodeID)
log.Infof(ctx, "checking expected status (%s) for node %d", expectedStatus, nodeID)
nodeStatuses := callerNodeLiveness.GetLivenessStatusMap()
if st, ok := nodeStatuses[nodeID]; !ok {
return fmt.Errorf("%s node not in statuses", expectedStatus)
} else {
if st != expectedStatus {
if expectedStatus == storagepb.NodeLivenessStatus_DECOMMISSIONING && st == storagepb.NodeLivenessStatus_DECOMMISSIONED {
// Server somehow shut down super-fast. Tolerating the mismatch.
return nil
}
return fmt.Errorf("unexpected status: got %s, expected %s",
st, expectedStatus)
}
st, ok := nodeStatuses[nodeID]
if !ok {
return errors.Errorf("node %d: not in statuses\n", nodeID)
}
if st != expectedStatus {
return errors.Errorf("node %d: unexpected status: got %s, expected %s\n",
nodeID, st, expectedStatus,
)
}
log.Infof(ctx, "node %d status ok", nodeID)
return nil
})
})
Expand Down

0 comments on commit af6a6e8

Please sign in to comment.