From 011d1fe593cc6a21f58909e2dee4b0a096d5cf45 Mon Sep 17 00:00:00 2001 From: davemay99 Date: Wed, 11 Nov 2020 18:26:55 -0500 Subject: [PATCH 1/7] operator debug - add client node filtering arguments --- command/operator_debug.go | 96 ++++++++++++++++++++++----- command/operator_debug_test.go | 115 +++++++++++++++++++++++++++++++-- 2 files changed, 190 insertions(+), 21 deletions(-) diff --git a/command/operator_debug.go b/command/operator_debug.go index 96f7f87033b..c0a04d30655 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -33,6 +33,8 @@ type OperatorDebugCommand struct { interval time.Duration logLevel string stale bool + maxNodes int + nodeClass string nodeIDs []string serverIDs []string consul *external @@ -69,9 +71,15 @@ Debug Options: -log-level= The log level to monitor. Defaults to DEBUG. + -max-nodes= + Cap the maximum number of client nodes included in the capture. Defaults to 10, set to 0 for unlimited. + -node-id=, Comma separated list of Nomad client node ids, to monitor for logs and include pprof - profiles. Accepts id prefixes. + profiles. Accepts id prefixes, and "all" to select all nodes (up to count = max-nodes). + + -node-class= + Filter client nodes based on node class. -server-id=, Comma separated list of Nomad server names, "leader", or "all" to monitor for logs and include pprof @@ -150,6 +158,8 @@ func (c *OperatorDebugCommand) AutocompleteFlags() complete.Flags { "-duration": complete.PredictAnything, "-interval": complete.PredictAnything, "-log-level": complete.PredictAnything, + "-max-nodes": complete.PredictAnything, + "-node-class": complete.PredictAnything, "-node-id": complete.PredictAnything, "-server-id": complete.PredictAnything, "-output": complete.PredictAnything, @@ -174,6 +184,8 @@ func (c *OperatorDebugCommand) Run(args []string) int { flags.StringVar(&duration, "duration", "2m", "") flags.StringVar(&interval, "interval", "2m", "") flags.StringVar(&c.logLevel, "log-level", "DEBUG", "") + flags.IntVar(&c.maxNodes, "max-nodes", 10, "") + flags.StringVar(&c.nodeClass, "node-class", "", "") flags.StringVar(&nodeIDs, "node-id", "", "") flags.StringVar(&serverIDs, "server-id", "", "") flags.BoolVar(&c.stale, "stale", false, "") @@ -232,27 +244,67 @@ func (c *OperatorDebugCommand) Run(args []string) int { return 1 } - // Resolve node prefixes + // Search all nodes If a node class is specified without a list of node id prefixes + if c.nodeClass != "" && nodeIDs == "" { + nodeIDs = "all" + } + + // Resolve client node id prefixes + nodesFound := 0 + nodeLookupFailCount := 0 + nodeCaptureCount := 0 + for _, id := range argNodes(nodeIDs) { - id = sanitizeUUIDPrefix(id) + if id == "all" { + // Capture from all nodes using empty prefix filter + id = "" + } else { + // Capture from nodes starting with prefix id + id = sanitizeUUIDPrefix(id) + } nodes, _, err := client.Nodes().PrefixList(id) if err != nil { c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err)) return 1 } - // Return error if no nodes are found - if len(nodes) == 0 { + + // Increment fail count if no nodes are found + nodesFound = len(nodes) + if nodesFound == 0 { c.Ui.Error(fmt.Sprintf("No node(s) with prefix %q found", id)) - return 1 - } + nodeLookupFailCount++ + } else { + // Apply constraints to nodes found + for _, n := range nodes { + // Ignore nodes that do not match specified class + if c.nodeClass != "" && n.NodeClass != c.nodeClass { + continue + } - for _, n := range nodes { - c.nodeIDs = append(c.nodeIDs, n.ID) + // Add node to capture list + c.nodeIDs = append(c.nodeIDs, n.ID) + nodeCaptureCount++ + + // Stop looping when we reach the max + if c.maxNodes != 0 && nodeCaptureCount >= c.maxNodes { + break + } + } } } + // Return error if nodes were specified but none were found + if len(nodeIDs) > 0 && nodeCaptureCount == 0 { + c.Ui.Error(fmt.Sprintf("Failed to retrieve clients, 0 nodes found in list: %s", nodeIDs)) + return 1 + } + // Resolve servers members, err := client.Agent().Members() + // if err != nil { + // c.Ui.Error(fmt.Sprintf("Failed to retrieve server list -- check API address: %s", client.Address())) + // return 1 + // } c.writeJSON("version", "members.json", members, err) // We always write the error to the file, but don't range if no members found if serverIDs == "all" && members != nil { @@ -265,6 +317,8 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.serverIDs = append(c.serverIDs, id) } } + serversFound := len(members.Members) + serverCaptureCount := len(c.serverIDs) // Return error if servers were specified but not found if len(serverIDs) > 0 && len(c.serverIDs) == 0 { @@ -282,12 +336,24 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.timestamp = time.Now().UTC().Format(format) stamped := "nomad-debug-" + c.timestamp - c.Ui.Output("Starting debugger and capturing cluster data...") - c.Ui.Output(fmt.Sprintf("Capturing from servers: %v", c.serverIDs)) - c.Ui.Output(fmt.Sprintf("Capturing from client nodes: %v", c.nodeIDs)) - - c.Ui.Output(fmt.Sprintf(" Interval: '%s'", interval)) - c.Ui.Output(fmt.Sprintf(" Duration: '%s'", duration)) + // Display general info about the capture + c.Ui.Output("Starting debugger...") + c.Ui.Output("") + c.Ui.Output(fmt.Sprintf(" Servers: (%d/%d) %v", serverCaptureCount, serversFound, c.serverIDs)) + c.Ui.Output(fmt.Sprintf(" Clients: (%d/%d) %v", nodeCaptureCount, nodesFound, c.nodeIDs)) + if nodeCaptureCount == c.maxNodes { + c.Ui.Output(fmt.Sprintf(" Max node count reached (%d)", c.maxNodes)) + } + if nodeLookupFailCount > 0 { + c.Ui.Output(fmt.Sprintf("Client fail count: %v", nodeLookupFailCount)) + } + if c.nodeClass != "" { + c.Ui.Output(fmt.Sprintf(" Node Class: %s", c.nodeClass)) + } + c.Ui.Output(fmt.Sprintf(" Interval: %s", interval)) + c.Ui.Output(fmt.Sprintf(" Duration: %s", duration)) + c.Ui.Output("") + c.Ui.Output("Capturing cluster data...") // Create the output path var tmp string diff --git a/command/operator_debug_test.go b/command/operator_debug_test.go index f0806a6b892..961c24ce15f 100644 --- a/command/operator_debug_test.go +++ b/command/operator_debug_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/hashicorp/nomad/command/agent" "github.com/hashicorp/nomad/testutil" "github.com/mitchellh/cli" "github.com/stretchr/testify/assert" @@ -34,10 +35,90 @@ func TestDebugUtils(t *testing.T) { require.Equal(t, "https://127.0.0.1:8500", e.addr("foo")) } +func TestDebug_NodeClass(t *testing.T) { + // Start test server and API client + srv, _, url := testServer(t, false, nil) + defer srv.Shutdown() + + // Wait for leadership to establish + testutil.WaitForLeader(t, srv.Agent.RPC) + + // Retrieve server RPC address to join clients + srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC + t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr) + + // Setup Client 1 (nodeclass = clienta) + agentConfFunc1 := func(c *agent.Config) { + c.Region = "global" + c.EnableDebug = true + c.Server.Enabled = false + c.Client.NodeClass = "clienta" + c.Client.Enabled = true + c.Client.Servers = []string{srvRPCAddr} + } + + // Start Client 1 + client1 := agent.NewTestAgent(t, "client1", agentConfFunc1) + defer client1.Shutdown() + + // Wait for the client to connect + testutil.WaitForClient(t, srv.Agent.Server(), client1.Agent.Client()) + t.Logf("[TEST] Client1 ready") + + // Setup Client 2 (nodeclass = clientb) + agentConfFunc2 := func(c *agent.Config) { + c.Region = "global" + c.EnableDebug = true + c.Server.Enabled = false + c.Client.NodeClass = "clientb" + c.Client.Enabled = true + c.Client.Servers = []string{srvRPCAddr} + } + + // Start Client 2 + client2 := agent.NewTestAgent(t, "client2", agentConfFunc2) + defer client2.Shutdown() + + // Wait for the client to connect + testutil.WaitForClient(t, srv.Agent.Server(), client2.Agent.Client()) + t.Logf("[TEST] Client2 ready") + + // Setup Client 3 (nodeclass = clienta) + agentConfFunc3 := func(c *agent.Config) { + c.Server.Enabled = false + c.EnableDebug = false + c.Client.NodeClass = "clienta" + c.Client.Servers = []string{srvRPCAddr} + } + + // Start Client 3 + client3 := agent.NewTestAgent(t, "client3", agentConfFunc3) + defer client3.Shutdown() + + // Wait for the client to connect + testutil.WaitForClient(t, srv.Agent.Server(), client3.Agent.Client()) + t.Logf("[TEST] Client3 ready") + + // Setup mock UI + ui := cli.NewMockUi() + cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} + + // Debug on client - node class = "clienta" + code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"}) + + assert.Equal(t, 0, code) // take note of failed return code, but continue to allow buffer content checks + require.Empty(t, ui.ErrorWriter.String(), "errorwriter should be empty") + require.Contains(t, ui.OutputWriter.String(), "Starting debugger") + require.Contains(t, ui.OutputWriter.String(), "Node Class: clienta") + + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() +} + func TestDebugSuccesses(t *testing.T) { - t.Parallel() srv, _, url := testServer(t, false, nil) defer srv.Shutdown() + testutil.WaitForLeader(t, srv.Agent.RPC) ui := cli.NewMockUi() cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} @@ -47,20 +128,24 @@ func TestDebugSuccesses(t *testing.T) { // Debug on the leader code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "leader"}) assert.Equal(t, 0, code) // take note of failed return code, but continue to see why + assert.Empty(t, ui.ErrorWriter.String(), "errorwriter should be empty") require.Contains(t, ui.OutputWriter.String(), "Starting debugger") ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Debug on all servers code = cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "all"}) assert.Equal(t, 0, code) + require.Empty(t, ui.ErrorWriter.String(), "errorwriter should be empty") require.Contains(t, ui.OutputWriter.String(), "Starting debugger") ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() } func TestDebugFails(t *testing.T) { - t.Parallel() srv, _, url := testServer(t, false, nil) defer srv.Shutdown() + testutil.WaitForLeader(t, srv.Agent.RPC) ui := cli.NewMockUi() cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} @@ -68,22 +153,32 @@ func TestDebugFails(t *testing.T) { // Fails incorrect args code := cmd.Run([]string{"some", "bad", "args"}) require.Equal(t, 1, code) + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Fails illegal node ids code = cmd.Run([]string{"-node-id", "foo:bar"}) require.Equal(t, 1, code) + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Fails missing node ids - code = cmd.Run([]string{"-node-id", "abc,def"}) + code = cmd.Run([]string{"-node-id", "abc,def", "-duration", "250ms"}) require.Equal(t, 1, code) + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Fails bad durations code = cmd.Run([]string{"-duration", "foo"}) require.Equal(t, 1, code) + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Fails bad durations code = cmd.Run([]string{"-interval", "bar"}) require.Equal(t, 1, code) + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Fails existing output format := "2006-01-02-150405Z" @@ -91,19 +186,27 @@ func TestDebugFails(t *testing.T) { path := filepath.Join(os.TempDir(), stamped) os.MkdirAll(path, 0755) defer os.Remove(path) - code = cmd.Run([]string{"-output", os.TempDir()}) + // short duration to prevent timeout + code = cmd.Run([]string{"-output", os.TempDir(), "-duration", "250ms"}) require.Equal(t, 2, code) + ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() // Fails bad address code = cmd.Run([]string{"-address", url + "bogus"}) - assert.Equal(t, 1, code) + assert.Equal(t, 1, code) // take note of failed return code, but continue to see why in the OutputWriter + require.NotContains(t, ui.OutputWriter.String(), "Starting debugger") + require.Contains(t, ui.ErrorWriter.String(), "invalid address") ui.OutputWriter.Reset() + ui.ErrorWriter.Reset() } func TestDebugCapturedFiles(t *testing.T) { - t.Parallel() + // NOTE: pprof tracing/profiling cannot be run in parallel + srv, _, url := testServer(t, false, nil) defer srv.Shutdown() + testutil.WaitForLeader(t, srv.Agent.RPC) ui := cli.NewMockUi() cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} From 36549e226a7e733c2aa9010f53bc69cde96f9ce1 Mon Sep 17 00:00:00 2001 From: davemay99 Date: Wed, 11 Nov 2020 18:32:10 -0500 Subject: [PATCH 2/7] add WaitForClient helper function --- testutil/wait.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/testutil/wait.go b/testutil/wait.go index ddc53139e68..7835ed49745 100644 --- a/testutil/wait.go +++ b/testutil/wait.go @@ -5,6 +5,8 @@ import ( "os" "time" + "github.com/hashicorp/nomad/client" + "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" testing "github.com/mitchellh/go-testing-interface" @@ -95,6 +97,24 @@ func WaitForLeader(t testing.T, rpc rpcFn) { }) } +// WaitForClient waits for a client to connect to the specified server +func WaitForClient(t testing.T, server *nomad.Server, client *client.Client) { + t.Helper() + WaitForResult(func() (bool, error) { + node, err := server.State().NodeByID(nil, client.NodeID()) + if err != nil { + return false, err + } + if node == nil { + return false, fmt.Errorf("no node") + } + + return node.Status == structs.NodeStatusReady, fmt.Errorf("wrong status: %s", node.Status) + }, func(err error) { + t.Fatalf("should have a client: %v", err) + }) +} + // WaitForVotingMembers blocks until autopilot promotes all server peers // to be voting members. // From 59efe2dd2fda87f97a100a841d8cc5b80d61aa59 Mon Sep 17 00:00:00 2001 From: davemay99 Date: Wed, 11 Nov 2020 19:18:40 -0500 Subject: [PATCH 3/7] use RPC in WaitForClient to avoid unnecessary imports --- command/operator_debug_test.go | 15 +++++++++------ testutil/wait.go | 23 +++++++++++++---------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/command/operator_debug_test.go b/command/operator_debug_test.go index 961c24ce15f..901773cf84b 100644 --- a/command/operator_debug_test.go +++ b/command/operator_debug_test.go @@ -62,8 +62,9 @@ func TestDebug_NodeClass(t *testing.T) { defer client1.Shutdown() // Wait for the client to connect - testutil.WaitForClient(t, srv.Agent.Server(), client1.Agent.Client()) - t.Logf("[TEST] Client1 ready") + client1NodeID := client1.Agent.Client().NodeID() + testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID) + t.Logf("[TEST] Client1 ready, id: %s", client1NodeID) // Setup Client 2 (nodeclass = clientb) agentConfFunc2 := func(c *agent.Config) { @@ -80,8 +81,9 @@ func TestDebug_NodeClass(t *testing.T) { defer client2.Shutdown() // Wait for the client to connect - testutil.WaitForClient(t, srv.Agent.Server(), client2.Agent.Client()) - t.Logf("[TEST] Client2 ready") + client2NodeID := client2.Agent.Client().NodeID() + testutil.WaitForClient(t, srv.Agent.RPC, client2NodeID) + t.Logf("[TEST] Client2 ready, id: %s", client2NodeID) // Setup Client 3 (nodeclass = clienta) agentConfFunc3 := func(c *agent.Config) { @@ -96,8 +98,9 @@ func TestDebug_NodeClass(t *testing.T) { defer client3.Shutdown() // Wait for the client to connect - testutil.WaitForClient(t, srv.Agent.Server(), client3.Agent.Client()) - t.Logf("[TEST] Client3 ready") + client3NodeID := client3.Agent.Client().NodeID() + testutil.WaitForClient(t, srv.Agent.RPC, client3NodeID) + t.Logf("[TEST] Client3 ready, id: %s", client3NodeID) // Setup mock UI ui := cli.NewMockUi() diff --git a/testutil/wait.go b/testutil/wait.go index 7835ed49745..2314e82c40a 100644 --- a/testutil/wait.go +++ b/testutil/wait.go @@ -5,8 +5,6 @@ import ( "os" "time" - "github.com/hashicorp/nomad/client" - "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" testing "github.com/mitchellh/go-testing-interface" @@ -97,21 +95,26 @@ func WaitForLeader(t testing.T, rpc rpcFn) { }) } -// WaitForClient waits for a client to connect to the specified server -func WaitForClient(t testing.T, server *nomad.Server, client *client.Client) { +// WaitForClient blocks until the client can be found +func WaitForClient(t testing.T, rpc rpcFn, nodeID string) { t.Helper() WaitForResult(func() (bool, error) { - node, err := server.State().NodeByID(nil, client.NodeID()) + req := structs.NodeSpecificRequest{ + NodeID: nodeID, + QueryOptions: structs.QueryOptions{Region: "global"}, + } + var out structs.SingleNodeResponse + + err := rpc("Node.GetNode", &req, &out) if err != nil { return false, err } - if node == nil { - return false, fmt.Errorf("no node") + if out.Node == nil { + return false, fmt.Errorf("node not found") } - - return node.Status == structs.NodeStatusReady, fmt.Errorf("wrong status: %s", node.Status) + return out.Node.Status == structs.NodeStatusReady, nil }, func(err error) { - t.Fatalf("should have a client: %v", err) + t.Fatalf("failed to find node: %v", err) }) } From 8fb695084712280c04adc376da4478f492419a52 Mon Sep 17 00:00:00 2001 From: davemay99 Date: Wed, 11 Nov 2020 19:43:04 -0500 Subject: [PATCH 4/7] guard against nil values --- command/operator_debug.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/command/operator_debug.go b/command/operator_debug.go index c0a04d30655..59cdad4adc6 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -301,10 +301,10 @@ func (c *OperatorDebugCommand) Run(args []string) int { // Resolve servers members, err := client.Agent().Members() - // if err != nil { - // c.Ui.Error(fmt.Sprintf("Failed to retrieve server list -- check API address: %s", client.Address())) - // return 1 - // } + if err != nil { + c.Ui.Error(fmt.Sprintf("Failed to retrieve server list; err: %v", err)) + return 1 + } c.writeJSON("version", "members.json", members, err) // We always write the error to the file, but don't range if no members found if serverIDs == "all" && members != nil { @@ -317,11 +317,19 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.serverIDs = append(c.serverIDs, id) } } - serversFound := len(members.Members) - serverCaptureCount := len(c.serverIDs) + + serversFound := 0 + serverCaptureCount := 0 + + if members != nil { + serversFound = len(members.Members) + } + if c.serverIDs != nil { + serverCaptureCount = len(c.serverIDs) + } // Return error if servers were specified but not found - if len(serverIDs) > 0 && len(c.serverIDs) == 0 { + if len(serverIDs) > 0 && serverCaptureCount == 0 { c.Ui.Error(fmt.Sprintf("Failed to retrieve servers, 0 members found in list: %s", serverIDs)) return 1 } From 03764f346af9a4e7b902039b83709553ee7f9e9d Mon Sep 17 00:00:00 2001 From: davemay99 Date: Wed, 11 Nov 2020 21:47:23 -0500 Subject: [PATCH 5/7] move initialization up and shorten test duration --- command/operator_debug.go | 74 ++++++++++++++++++++-------------- command/operator_debug_test.go | 2 +- 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/command/operator_debug.go b/command/operator_debug.go index 59cdad4adc6..63f61f36ded 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -216,7 +216,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { return 1 } - // Parse the time durations + // Parse the capture duration d, err := time.ParseDuration(duration) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing duration: %s: %s", duration, err.Error())) @@ -224,6 +224,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { } c.duration = d + // Parse the capture interval i, err := time.ParseDuration(interval) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing interval: %s: %s", interval, err.Error())) @@ -231,6 +232,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { } c.interval = i + // Verify there are no extra arguments args = flags.Args() if l := len(args); l != 0 { c.Ui.Error("This command takes no arguments") @@ -238,6 +240,41 @@ func (c *OperatorDebugCommand) Run(args []string) int { return 1 } + // Initialize capture variables and structs + c.manifest = make([]string, 0) + ctx, cancel := context.WithCancel(context.Background()) + c.ctx = ctx + c.cancel = cancel + c.trap() + + // Generate timestamped file name + format := "2006-01-02-150405Z" + c.timestamp = time.Now().UTC().Format(format) + stamped := "nomad-debug-" + c.timestamp + + // Create the output directory + var tmp string + if output != "" { + // User specified output directory + tmp = filepath.Join(output, stamped) + _, err := os.Stat(tmp) + if !os.IsNotExist(err) { + c.Ui.Error("Output directory already exists") + return 2 + } + } else { + // Generate temp directory + tmp, err = ioutil.TempDir(os.TempDir(), stamped) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error creating tmp directory: %s", err.Error())) + return 2 + } + defer os.RemoveAll(tmp) + } + + c.collectDir = tmp + + // Create an instance of the API client client, err := c.Meta.Client() if err != nil { c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err.Error())) @@ -334,16 +371,6 @@ func (c *OperatorDebugCommand) Run(args []string) int { return 1 } - c.manifest = make([]string, 0) - ctx, cancel := context.WithCancel(context.Background()) - c.ctx = ctx - c.cancel = cancel - c.trap() - - format := "2006-01-02-150405Z" - c.timestamp = time.Now().UTC().Format(format) - stamped := "nomad-debug-" + c.timestamp - // Display general info about the capture c.Ui.Output("Starting debugger...") c.Ui.Output("") @@ -363,39 +390,23 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.Ui.Output("") c.Ui.Output("Capturing cluster data...") - // Create the output path - var tmp string - if output != "" { - tmp = filepath.Join(output, stamped) - _, err := os.Stat(tmp) - if !os.IsNotExist(err) { - c.Ui.Error("Output directory already exists") - return 2 - } - } else { - tmp, err = ioutil.TempDir(os.TempDir(), stamped) - if err != nil { - c.Ui.Error(fmt.Sprintf("Error creating tmp directory: %s", err.Error())) - return 2 - } - defer os.RemoveAll(tmp) - } - - c.collectDir = tmp - + // Start collecting data err = c.collect(client) if err != nil { c.Ui.Error(fmt.Sprintf("Error collecting data: %s", err.Error())) return 2 } + // Write index json/html manifest files c.writeManifest() + // Exit before archive if output directory was specified if output != "" { c.Ui.Output(fmt.Sprintf("Created debug directory: %s", c.collectDir)) return 0 } + // Create archive tarball archiveFile := stamped + ".tar.gz" err = TarCZF(archiveFile, tmp, stamped) if err != nil { @@ -403,6 +414,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { return 2 } + // Final output with name of tarball c.Ui.Output(fmt.Sprintf("Created debug archive: %s", archiveFile)) return 0 } diff --git a/command/operator_debug_test.go b/command/operator_debug_test.go index 901773cf84b..3e335fa7250 100644 --- a/command/operator_debug_test.go +++ b/command/operator_debug_test.go @@ -190,7 +190,7 @@ func TestDebugFails(t *testing.T) { os.MkdirAll(path, 0755) defer os.Remove(path) // short duration to prevent timeout - code = cmd.Run([]string{"-output", os.TempDir(), "-duration", "250ms"}) + code = cmd.Run([]string{"-output", os.TempDir(), "-duration", "50ms"}) require.Equal(t, 2, code) ui.OutputWriter.Reset() ui.ErrorWriter.Reset() From aaefc248a95ac6e6ce766542d6b698e292a0bf44 Mon Sep 17 00:00:00 2001 From: davemay99 Date: Thu, 12 Nov 2020 10:47:34 -0500 Subject: [PATCH 6/7] cleanup nodeLookupFailCount logic --- command/operator_debug.go | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/command/operator_debug.go b/command/operator_debug.go index 63f61f36ded..6a640134b34 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -310,22 +310,23 @@ func (c *OperatorDebugCommand) Run(args []string) int { if nodesFound == 0 { c.Ui.Error(fmt.Sprintf("No node(s) with prefix %q found", id)) nodeLookupFailCount++ - } else { - // Apply constraints to nodes found - for _, n := range nodes { - // Ignore nodes that do not match specified class - if c.nodeClass != "" && n.NodeClass != c.nodeClass { - continue - } + continue + } - // Add node to capture list - c.nodeIDs = append(c.nodeIDs, n.ID) - nodeCaptureCount++ + // Apply constraints to nodes found + for _, n := range nodes { + // Ignore nodes that do not match specified class + if c.nodeClass != "" && n.NodeClass != c.nodeClass { + continue + } - // Stop looping when we reach the max - if c.maxNodes != 0 && nodeCaptureCount >= c.maxNodes { - break - } + // Add node to capture list + c.nodeIDs = append(c.nodeIDs, n.ID) + nodeCaptureCount++ + + // Stop looping when we reach the max + if c.maxNodes != 0 && nodeCaptureCount >= c.maxNodes { + break } } } From 149d7d9c0592bf8a1e645604361558d6632f2f02 Mon Sep 17 00:00:00 2001 From: davemay99 Date: Thu, 12 Nov 2020 11:06:32 -0500 Subject: [PATCH 7/7] only display max node notice if we actually tried to capture nodes --- command/operator_debug.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/operator_debug.go b/command/operator_debug.go index 6a640134b34..34966fb5ca9 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -377,7 +377,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.Ui.Output("") c.Ui.Output(fmt.Sprintf(" Servers: (%d/%d) %v", serverCaptureCount, serversFound, c.serverIDs)) c.Ui.Output(fmt.Sprintf(" Clients: (%d/%d) %v", nodeCaptureCount, nodesFound, c.nodeIDs)) - if nodeCaptureCount == c.maxNodes { + if nodeCaptureCount > 0 && nodeCaptureCount == c.maxNodes { c.Ui.Output(fmt.Sprintf(" Max node count reached (%d)", c.maxNodes)) } if nodeLookupFailCount > 0 {