diff --git a/pkg/cmd/roachtest/decommission.go b/pkg/cmd/roachtest/decommission.go index 31ab6c44060d..885881dca6a9 100644 --- a/pkg/cmd/roachtest/decommission.go +++ b/pkg/cmd/roachtest/decommission.go @@ -309,10 +309,15 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { Multiplier: 2, } - // Partially decommission then recommission a random node, from another + // Partially decommission then recommission n1, from another // random node. Run a couple of status checks while doing so. + // We hard-code n1 to guard against the hypothetical case in which + // targetNode has no replicas (yet) to begin with, which would make + // it permanently decommissioned even with `--wait=none`. + // We can choose runNode freely (including chosing targetNode) since + // the decommission process won't succeed. { - targetNode, runNode := h.getRandNode(), h.getRandNode() + targetNode, runNode := h.nodeIDs[0], h.getRandNode() t.l.Printf("partially decommissioning n%d from n%d\n", targetNode, runNode) o, err := h.decommission(ctx, c.Node(targetNode), runNode, "--wait=none", "--format=csv") @@ -379,25 +384,36 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { // Check to see that operators aren't able to decommission into // availability. We'll undo the attempted decommissioning event by - // recommissioning the targeted nodes. + // recommissioning the targeted nodes. Note that the decommission + // attempt will not result in any individual nodes being marked + // as decommissioned, as this would only happen if *all* targeted + // nodes report no replicas, which is impossible since we're targeting + // the whole cluster here. { // Attempt to decommission all the nodes. { - runNode := h.getRandNode() - t.l.Printf("attempting to decommission all nodes from n%d\n", runNode) - o, err := h.decommission(ctx, c.All(), runNode, - "--wait=none", "--format=csv") - if err != nil { - t.Fatalf("decommission failed: %v", err) - } + // NB: the retry loop here is mostly silly, but since some of the fields below + // are async, we may for example find an 'active' node instead of 'decommissioning'. + if err := retry.WithMaxAttempts(ctx, retryOpts, 50, func() error { + runNode := h.getRandNode() + t.l.Printf("attempting to decommission all nodes from n%d\n", runNode) + o, err := h.decommission(ctx, c.All(), runNode, + "--wait=none", "--format=csv") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } - exp := [][]string{decommissionHeader} - for i := 1; i <= c.spec.NodeCount; i++ { - rowRegex := []string{strconv.Itoa(i), "true", `\d+`, "true", "decommissioning", "false"} - exp = append(exp, rowRegex) - } - if err := h.matchCSV(o, exp); err != nil { - t.Fatalf("decommission failed: %v", err) + exp := [][]string{decommissionHeader} + for i := 1; i <= c.spec.NodeCount; i++ { + rowRegex := []string{strconv.Itoa(i), "true", `\d+`, "true", "decommissioning", "false"} + exp = append(exp, rowRegex) + } + if err := h.matchCSV(o, exp); err != nil { + t.Fatalf("decommission failed: %v", err) + } + return nil + }); err != nil { + t.Fatal(err) } } @@ -468,12 +484,14 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { } } - // Fully recommission two random nodes, from a random node, randomly choosing + // Fully decommission two random nodes, from a random node, randomly choosing // between using --wait={all,none}. We pin these two nodes to not re-use // them for the block after, as they will have been fully decommissioned and // by definition, non-operational. decommissionedNodeA := h.getRandNode() - decommissionedNodeB := h.getRandNodeOtherThan(decommissionedNodeA) + h.blockFromRandNode(decommissionedNodeA) + decommissionedNodeB := h.getRandNode() // note A != B + h.blockFromRandNode(decommissionedNodeB) { targetNodeA, targetNodeB := decommissionedNodeA, decommissionedNodeB if targetNodeB < targetNodeA { @@ -516,20 +534,26 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { t.Fatal(err) } - // Check that even though two nodes are decommissioned, we still see - // them (since they remain live) in `node ls`. + // Check that the two decommissioned nodes vanish from `node ls` (since they + // will not remain live as the cluster refuses connections from them). { - runNode = h.getRandNode() - t.l.Printf("checking that `node ls` (from n%d) shows all nodes\n", runNode) - o, err := execCLI(ctx, t, c, runNode, "node", "ls", "--format=csv") - if err != nil { - t.Fatalf("node-ls failed: %v", err) - } - exp := [][]string{{"id"}} - for i := 1; i <= c.spec.NodeCount; i++ { - exp = append(exp, []string{strconv.Itoa(i)}) - } - if err := h.matchCSV(o, exp); err != nil { + if err := retry.WithMaxAttempts(ctx, retry.Options{}, 50, func() error { + runNode = h.getRandNode() + t.l.Printf("checking that `node ls` (from n%d) shows n-2 nodes\n", runNode) + o, err := execCLI(ctx, t, c, runNode, "node", "ls", "--format=csv") + if err != nil { + t.Fatalf("node-ls failed: %v", err) + } + exp := [][]string{{"id"}} + for i := 1; i <= c.spec.NodeCount; i++ { + if _, ok := h.randNodeBlocklist[i]; ok { + // Decommissioned, so should go away in due time. + continue + } + exp = append(exp, []string{strconv.Itoa(i)}) + } + return h.matchCSV(o, exp) + }); err != nil { t.Fatal(err) } } @@ -537,19 +561,24 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { // Ditto for `node status`. { runNode = h.getRandNode() - t.l.Printf("checking that `node status` (from n%d) shows all nodes\n", runNode) + t.l.Printf("checking that `node status` (from n%d) shows only live nodes\n", runNode) o, err := execCLI(ctx, t, c, runNode, "node", "status", "--format=csv") if err != nil { t.Fatalf("node-status failed: %v", err) } numCols := h.getCsvNumCols(o) - colRegex := []string{} - for i := 1; i <= c.spec.NodeCount; i++ { - colRegex = append(colRegex, strconv.Itoa(i)) - } - exp := h.expectIDsInStatusOut(colRegex, numCols) - if err := h.matchCSV(o, exp); err != nil { + if err := retry.WithMaxAttempts(ctx, retry.Options{}, 50, func() error { + colRegex := []string{} + for i := 1; i <= c.spec.NodeCount; i++ { + if _, ok := h.randNodeBlocklist[i]; ok { + continue // decommissioned + } + colRegex = append(colRegex, strconv.Itoa(i)) + } + exp := h.expectIDsInStatusOut(colRegex, numCols) + return h.matchCSV(o, exp) + }); err != nil { t.Fatal(err) } } @@ -579,8 +608,10 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { exp := [][]string{ decommissionHeader, - {strconv.Itoa(targetNodeA), "true", "0", "true", "decommissioned", "false"}, - {strconv.Itoa(targetNodeB), "true", "0", "true", "decommissioned", "false"}, + // NB: the "false" is liveness. We waited above for these nodes to + // vanish from `node ls`, so definitely not live at this point. + {strconv.Itoa(targetNodeA), "false", "0", "true", "decommissioned", "false"}, + {strconv.Itoa(targetNodeB), "false", "0", "true", "decommissioned", "false"}, decommissionFooter, } if err := h.matchCSV(o, exp); err != nil { @@ -599,6 +630,9 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { if _, err := h.recommission(ctx, c.Nodes(targetNodeA, targetNodeB), runNode); err == nil { t.Fatalf("expected recommission to fail") } + // Now stop+wipe them for good to keep the logs simple and the dead node detector happy. + c.Stop(ctx, c.Nodes(targetNodeA, targetNodeB)) + c.Wipe(ctx, c.Nodes(targetNodeA, targetNodeB)) } } @@ -621,7 +655,7 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { // everything will seem dead to the allocator at all times, so // nothing will ever happen. func() { - db := c.Conn(ctx, 1) + db := c.Conn(ctx, h.getRandNode()) defer db.Close() const stmt = "SET CLUSTER SETTING server.time_until_store_dead = '1m15s'" if _, err := db.ExecContext(ctx, stmt); err != nil { @@ -634,11 +668,14 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { // wiping it below. Roachprod attempts to initialize a cluster when // starting a "fresh" first node (without an existing bootstrap marker // on disk, which we happen to also be wiping away). - targetNode := h.getRandNodeOtherThan(decommissionedNodeA, decommissionedNodeB, firstNodeID) + targetNode := h.getRandNodeOtherThan(firstNodeID) + h.blockFromRandNode(targetNode) t.l.Printf("intentionally killing n%d to later decommission it when down\n", targetNode) c.Stop(ctx, c.Node(targetNode)) - runNode := h.getRandNodeOtherThan(targetNode) + // Pick a runNode that is still in commission and will + // remain so (or it won't be able to contact cluster). + runNode := h.getRandNode() t.l.Printf("decommissioning n%d (from n%d) in absentia\n", targetNode, runNode) if _, err := h.decommission(ctx, c.Node(targetNode), runNode, "--wait=all", "--format=csv"); err != nil { @@ -676,14 +713,15 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { // Check that (at least after a bit) the node disappears from `node // ls` because it is decommissioned and not live. if err := retry.WithMaxAttempts(ctx, retryOpts, 50, func() error { - runNode := h.getRandNodeOtherThan(targetNode) + runNode := h.getRandNode() o, err := execCLI(ctx, t, c, runNode, "node", "ls", "--format=csv") if err != nil { t.Fatalf("node-ls failed: %v", err) } var exp [][]string - for i := 1; i <= c.spec.NodeCount; i++ { + // We expect an entry for every node we haven't decommissioned yet. + for i := 1; i <= c.spec.NodeCount-len(h.randNodeBlocklist); i++ { exp = append(exp, []string{fmt.Sprintf("[^%d]", targetNode)}) } @@ -694,7 +732,7 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { // Ditto for `node status` if err := retry.WithMaxAttempts(ctx, retryOpts, 50, func() error { - runNode := h.getRandNodeOtherThan(targetNode) + runNode := h.getRandNode() o, err := execCLI(ctx, t, c, runNode, "node", "status", "--format=csv") if err != nil { t.Fatalf("node-status failed: %v", err) @@ -702,8 +740,7 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { numCols := h.getCsvNumCols(o) var expC []string - // We're checking for n-1 rows, where n is the node count. - for i := 1; i < c.spec.NodeCount; i++ { + for i := 1; i <= c.spec.NodeCount-len(h.randNodeBlocklist); i++ { expC = append(expC, fmt.Sprintf("[^%d].*", targetNode)) } exp := h.expectIDsInStatusOut(expC, numCols) @@ -719,7 +756,7 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { c.Stop(ctx, c.Node(targetNode)) c.Wipe(ctx, c.Node(targetNode)) - joinNode := targetNode%c.spec.NodeCount + 1 + joinNode := h.getRandNode() joinAddr := c.InternalAddr(ctx, c.Node(joinNode))[0] c.Start(ctx, t, c.Node(targetNode), startArgs( fmt.Sprintf("-a=--join %s", joinAddr), @@ -733,8 +770,17 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { } numCols := h.getCsvNumCols(o) var expC []string - for i := 1; i <= c.spec.NodeCount; i++ { - expC = append(expC, fmt.Sprintf("[^%d].*", targetNode)) + // The decommissioned nodes should all disappear. (We + // abuse that nodeIDs are single-digit in this test). + re := `[^` + for id := range h.randNodeBlocklist { + re += fmt.Sprint(id) + } + re += `].*` + // We expect to all the decommissioned nodes to + // disappear, but we let one rejoin as a fresh node. + for i := 1; i <= c.spec.NodeCount-len(h.randNodeBlocklist)+1; i++ { + expC = append(expC, re) } exp := h.expectIDsInStatusOut(expC, numCols) return h.matchCSV(o, exp) @@ -748,7 +794,7 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) { if err := retry.ForDuration(time.Minute, func() error { // Verify the event log has recorded exactly one decommissioned or // recommissioned event for each membership operation. - db := c.Conn(ctx, 1) + db := c.Conn(ctx, h.getRandNode()) defer db.Close() rows, err := db.Query(` @@ -837,6 +883,9 @@ type decommTestHelper struct { t *test c *cluster nodeIDs []int + // randNodeBlocklist are the nodes that won't be returned from randNode(). + // populated via blockFromRandNode(). + randNodeBlocklist map[int]struct{} } func newDecommTestHelper(t *test, c *cluster) *decommTestHelper { @@ -845,9 +894,10 @@ func newDecommTestHelper(t *test, c *cluster) *decommTestHelper { nodeIDs = append(nodeIDs, i) } return &decommTestHelper{ - t: t, - c: c, - nodeIDs: nodeIDs, + t: t, + c: c, + nodeIDs: nodeIDs, + randNodeBlocklist: map[int]struct{}{}, } } @@ -1021,23 +1071,36 @@ func (h *decommTestHelper) expectIDsInStatusOut(ids []string, numCols int) [][]s return res } +// blockFromRandNode ensures the node isn't returned from getRandNode{,OtherThan) +// in the future. +func (h *decommTestHelper) blockFromRandNode(id int) { + h.randNodeBlocklist[id] = struct{}{} +} + +// getRandNode returns a random node that hasn't +// been passed to blockFromRandNode() earlier. func (h *decommTestHelper) getRandNode() int { - return h.nodeIDs[rand.Intn(len(h.nodeIDs))] + return h.getRandNodeOtherThan() } +// getRandNodeOtherThan is like getRandNode, but additionally +// avoids returning the specified ids. func (h *decommTestHelper) getRandNodeOtherThan(ids ...int) int { + m := map[int]struct{}{} + for _, id := range ids { + m[id] = struct{}{} + } + for id := range h.randNodeBlocklist { + m[id] = struct{}{} + } + if len(m) == len(h.nodeIDs) { + h.t.Fatal("all nodes are blocked") + } for { - cur := h.nodeIDs[rand.Intn(len(h.nodeIDs))] - inBlockList := false - for _, id := range ids { - if cur == id { - inBlockList = true - } - } - if inBlockList { - continue + id := h.nodeIDs[rand.Intn(len(h.nodeIDs))] + if _, ok := m[id]; !ok { + return id } - return cur } }