Skip to content

Commit

Permalink
debug: update defaults to commonly used values
Browse files Browse the repository at this point in the history
  • Loading branch information
davemay99 authored Mar 9, 2021
1 parent 03a1192 commit 83af6f5
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 54 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,6 @@ azure-hashistack.pem

# generated keys for e2e tests
e2e/terraform/keys/

# debug bundles
nomad-debug-*
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ BUG FIXES:
* ui: Fixed the rendering of interstitial components shown after processing a dynamic application sizing recommendation. [[GH-10094](https://github.com/hashicorp/nomad/pull/10094)]

IMPROVEMENTS:
* cli: Update defaults for `nomad operator debug` flags `-interval` and `-server-id` to match common usage. [[GH-10121](https://github.com/hashicorp/nomad/issues/10121)]
* consul/connect: Enable setting `local_bind_address` field on connect upstreams [[GH-6248](https://github.com/hashicorp/nomad/issues/6248)]

## 1.0.4 (February 24, 2021)
* driver/docker: Added support for optional extra container labels. [[GH-9885](https://github.com/hashicorp/nomad/issues/9885)]

Expand Down
102 changes: 59 additions & 43 deletions command/operator_debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,59 +55,26 @@ func (c *OperatorDebugCommand) Help() string {
helpText := `
Usage: nomad operator debug [options]
Build an archive containing Nomad cluster configuration and state, and Consul and Vault
status. Include logs and pprof profiles for selected servers and client nodes.
Build an archive containing Nomad cluster configuration and state, and Consul
and Vault status. Include logs and pprof profiles for selected servers and
client nodes.
If ACLs are enabled, this command will require a token with the 'node:read'
capability to run. In order to collect information, the token will also
require the 'agent:read' and 'operator:read' capabilities, as well as the
'list-jobs' capability for all namespaces. To collect pprof profiles the
token will also require 'agent:write', or enable_debug configuration set to true.
token will also require 'agent:write', or enable_debug configuration set to
true.
General Options:
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
Debug Options:
-duration=<duration>
The duration of the log monitor command. Defaults to 2m.
-interval=<interval>
The interval between snapshots of the Nomad state. If unspecified, only one snapshot is
captured.
-log-level=<level>
The log level to monitor. Defaults to DEBUG.
-max-nodes=<count>
Cap the maximum number of client nodes included in the capture. Defaults to 10, set to 0 for unlimited.
-node-id=<node>,<node>
Comma separated list of Nomad client node ids, to monitor for logs and include pprof
profiles. Accepts id prefixes, and "all" to select all nodes (up to count = max-nodes).
-node-class=<node-class>
Filter client nodes based on node class.
-pprof-duration=<duration>
Duration for pprof collection. Defaults to 1s.
-server-id=<server>,<server>
Comma separated list of Nomad server names, "leader", or "all" to monitor for logs and include pprof
profiles.
-stale=<true|false>
If "false", the default, get membership data from the cluster leader. If the cluster is in
an outage unable to establish leadership, it may be necessary to get the configuration from
a non-leader server.
-output=<path>
Path to the parent directory of the output directory. If not specified, an archive is built
in the current directory.
Consul Options:
-consul-http-addr=<addr>
The address and port of the Consul HTTP agent. Overrides the CONSUL_HTTP_ADDR environment variable.
The address and port of the Consul HTTP agent. Overrides the
CONSUL_HTTP_ADDR environment variable.
-consul-token=<token>
Token used to query Consul. Overrides the CONSUL_HTTP_TOKEN environment
Expand All @@ -133,6 +100,8 @@ Debug Options:
Path to a directory of PEM encoded CA cert files to verify the Consul
certificate. Overrides the CONSUL_CAPATH environment variable.
Vault Options:
-vault-address=<addr>
The address and port of the Vault HTTP agent. Overrides the VAULT_ADDR
environment variable.
Expand All @@ -156,6 +125,47 @@ Debug Options:
-vault-ca-path=<path>
Path to a directory of PEM encoded CA cert files to verify the Vault
certificate. Overrides the VAULT_CAPATH environment variable.
Debug Options:
-duration=<duration>
The duration of the log monitor command. Defaults to 2m.
-interval=<interval>
The interval between snapshots of the Nomad state. Set interval equal to
duration to capture a single snapshot. Defaults to 30s.
-log-level=<level>
The log level to monitor. Defaults to DEBUG.
-max-nodes=<count>
Cap the maximum number of client nodes included in the capture. Defaults
to 10, set to 0 for unlimited.
-node-id=<node>,<node>
Comma separated list of Nomad client node ids to monitor for logs, API
outputs, and pprof profiles. Accepts id prefixes, and "all" to select all
nodes (up to count = max-nodes). Defaults to "all".
-node-class=<node-class>
Filter client nodes based on node class.
-pprof-duration=<duration>
Duration for pprof collection. Defaults to 1s.
-server-id=<server>,<server>
Comma separated list of Nomad server names to monitor for logs, API
outputs, and pprof profiles. Accepts server names, "leader", or "all".
Defaults to "all".
-stale=<true|false>
If "false", the default, get membership data from the cluster leader. If
the cluster is in an outage unable to establish leadership, it may be
necessary to get the configuration from a non-leader server.
-output=<path>
Path to the parent directory of the output directory. If not specified, an
archive is built in the current directory.
`
return strings.TrimSpace(helpText)
}
Expand Down Expand Up @@ -195,12 +205,12 @@ func (c *OperatorDebugCommand) Run(args []string) int {
var nodeIDs, serverIDs string

flags.StringVar(&duration, "duration", "2m", "")
flags.StringVar(&interval, "interval", "2m", "")
flags.StringVar(&interval, "interval", "30s", "")
flags.StringVar(&c.logLevel, "log-level", "DEBUG", "")
flags.IntVar(&c.maxNodes, "max-nodes", 10, "")
flags.StringVar(&c.nodeClass, "node-class", "", "")
flags.StringVar(&nodeIDs, "node-id", "", "")
flags.StringVar(&serverIDs, "server-id", "", "")
flags.StringVar(&serverIDs, "server-id", "all", "")
flags.BoolVar(&c.stale, "stale", false, "")
flags.StringVar(&output, "output", "", "")
flags.StringVar(&pprofDuration, "pprof-duration", "1s", "")
Expand Down Expand Up @@ -246,6 +256,12 @@ func (c *OperatorDebugCommand) Run(args []string) int {
}
c.interval = i

// Validate interval
if i.Seconds() > d.Seconds() {
c.Ui.Error(fmt.Sprintf("Error parsing interval: %s is greater than duration %s", interval, duration))
return 1
}

// Parse the pprof capture duration
pd, err := time.ParseDuration(pprofDuration)
if err != nil {
Expand Down
27 changes: 16 additions & 11 deletions command/operator_debug_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func TestDebug_NodeClass(t *testing.T) {
cases := testCases{
{
name: "address=api, node-class=clienta, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"},
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
Expand All @@ -132,7 +132,7 @@ func TestDebug_NodeClass(t *testing.T) {
},
{
name: "address=api, node-class=clientb, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clientb", "-max-nodes", "2"},
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clientb", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
Expand Down Expand Up @@ -189,19 +189,19 @@ func TestDebug_ClientToServer(t *testing.T) {
var cases = testCases{
{
name: "testAgent api server",
args: []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all"},
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
{
name: "server address",
args: []string{"-address", addrServer, "-duration", "250ms", "-server-id", "all", "-node-id", "all"},
args: []string{"-address", addrServer, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
{
name: "client1 address - verify no SIGSEGV panic",
args: []string{"-address", addrClient1, "-duration", "250ms", "-server-id", "all", "-node-id", "all"},
args: []string{"-address", addrClient1, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
Expand All @@ -218,7 +218,7 @@ func TestDebug_SingleServer(t *testing.T) {
var cases = testCases{
{
name: "address=api, server-id=leader",
args: []string{"-address", url, "-duration", "250ms", "-server-id", "leader"},
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "leader"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
Expand All @@ -229,7 +229,7 @@ func TestDebug_SingleServer(t *testing.T) {
},
{
name: "address=api, server-id=all",
args: []string{"-address", url, "-duration", "250ms", "-server-id", "all"},
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
Expand Down Expand Up @@ -261,7 +261,7 @@ func TestDebug_Failures(t *testing.T) {
},
{
name: "Fails missing node ids",
args: []string{"-node-id", "abc,def", "-duration", "250ms"},
args: []string{"-node-id", "abc,def", "-duration", "250ms", "-interval", "250ms"},
expectedCode: 1,
},
{
Expand All @@ -274,6 +274,11 @@ func TestDebug_Failures(t *testing.T) {
args: []string{"-interval", "bar"},
expectedCode: 1,
},
{
name: "Fails intervals greater than duration",
args: []string{"-duration", "5m", "-interval", "10m"},
expectedCode: 1,
},
{
name: "Fails bad address",
args: []string{"-address", url + "bogus"},
Expand Down Expand Up @@ -307,7 +312,7 @@ func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}

// Debug on the leader and all client nodes
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "leader", "-node-id", "all", "-output", os.TempDir()})
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "leader", "-node-id", "all", "-output", os.TempDir()})
assert.Equal(t, 0, code)

// Bad plugin name should be escaped before it reaches the sandbox test
Expand Down Expand Up @@ -391,7 +396,7 @@ func TestDebug_ExistingOutput(t *testing.T) {
os.MkdirAll(path, 0755)
defer os.Remove(path)

code := cmd.Run([]string{"-output", os.TempDir(), "-duration", "50ms"})
code := cmd.Run([]string{"-output", os.TempDir(), "-duration", "50ms", "-interval", "50ms"})
require.Equal(t, 2, code)
}

Expand All @@ -413,7 +418,7 @@ func TestDebug_Fail_Pprof(t *testing.T) {
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}

// Debug on client - node class = "clienta"
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "all"})
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"})

assert.Equal(t, 0, code) // Pprof failure isn't fatal
require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
Expand Down

0 comments on commit 83af6f5

Please sign in to comment.