Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

connectivity: add --collect-sysdump-on-failure flag #1228

Merged
merged 2 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions connectivity/check/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/cilium/cilium-cli/connectivity/filters"
"github.com/cilium/cilium-cli/defaults"
"github.com/cilium/cilium-cli/sysdump"
)

// Action represents an individual action (e.g. a curl call) in a Scenario
Expand Down Expand Up @@ -178,9 +179,15 @@ func (a *Action) Run(f func(*Action)) {
a.printFlows(a.Source())
a.printFlows(a.Destination())
}
if a.failed && a.test.ctx.params.PauseOnFail {
a.Log("Pausing after action failure, press the Enter key to continue:")
fmt.Scanln()
if a.failed {
if a.test.ctx.params.PauseOnFail {
a.Log("Pausing after action failure, press the Enter key to continue:")
fmt.Scanln()
}

if a.test.ctx.params.CollectSysdumpOnFailure {
a.collectSysdump()
}
}
}

Expand Down Expand Up @@ -880,3 +887,15 @@ r:

a.Log()
}

func (a *Action) collectSysdump() {
collector, err := sysdump.NewCollector(a.test.ctx.K8sClient(), a.test.ctx.params.SysdumpOptions, time.Now(), a.test.ctx.version)
if err != nil {
a.Failf("Failed to create sysdump collector: %v", err)
return
}

if err = collector.Run(); err != nil {
a.Failf("Failed to collect sysdump: %v", err)
}
}
4 changes: 4 additions & 0 deletions connectivity/check/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/cilium/cilium-cli/connectivity/filters"
"github.com/cilium/cilium-cli/k8s"
"github.com/cilium/cilium-cli/sysdump"
)

type Parameters struct {
Expand Down Expand Up @@ -55,6 +56,9 @@ type Parameters struct {
HelmValuesSecretName string

DeleteCiliumOnNodes []string

CollectSysdumpOnFailure bool
SysdumpOptions sysdump.Options
}

func (p Parameters) ciliumEndpointTimeout() time.Duration {
Expand Down
5 changes: 4 additions & 1 deletion connectivity/check/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ type ConnectivityTest struct {
// Parameters to the test suite, specified by the CLI user.
params Parameters

version string

// Clients for source and destination clusters.
clients *deploymentClients

Expand Down Expand Up @@ -155,14 +157,15 @@ func (ct *ConnectivityTest) failedActions() []*Action {
}

// NewConnectivityTest returns a new ConnectivityTest.
func NewConnectivityTest(client *k8s.Client, p Parameters) (*ConnectivityTest, error) {
func NewConnectivityTest(client *k8s.Client, p Parameters, version string) (*ConnectivityTest, error) {
if err := p.validate(); err != nil {
return nil, err
}

k := &ConnectivityTest{
client: client,
params: p,
version: version,
ciliumPods: make(map[string]Pod),
echoPods: make(map[string]Pod),
clientPods: make(map[string]Pod),
Expand Down
12 changes: 11 additions & 1 deletion internal/cli/cmd/connectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cilium/cilium-cli/connectivity"
"github.com/cilium/cilium-cli/connectivity/check"
"github.com/cilium/cilium-cli/defaults"
"github.com/cilium/cilium-cli/sysdump"
)

var errInternal = errors.New("encountered internal error, exiting")
Expand All @@ -37,6 +38,11 @@ func newCmdConnectivity() *cobra.Command {

var params = check.Parameters{
Writer: os.Stdout,
SysdumpOptions: sysdump.Options{
LargeSysdumpAbortTimeout: sysdump.DefaultLargeSysdumpAbortTimeout,
LargeSysdumpThreshold: sysdump.DefaultLargeSysdumpThreshold,
Writer: os.Stdout,
},
}
var tests []string

Expand Down Expand Up @@ -65,7 +71,7 @@ func newCmdConnectivityTest() *cobra.Command {
}

// Instantiate the test harness.
cc, err := check.NewConnectivityTest(k8sClient, params)
cc, err := check.NewConnectivityTest(k8sClient, params, Version)
if err != nil {
return err
}
Expand Down Expand Up @@ -144,5 +150,9 @@ func newCmdConnectivityTest() *cobra.Command {
cmd.Flags().StringVar(&params.JSONMockImage, "json-mock-image", defaults.ConnectivityCheckJSONMockImage, "Image path to use for json mock")
cmd.Flags().StringVar(&params.DNSTestServerImage, "dns-test-server-image", defaults.ConnectivityDNSTestServerImage, "Image path to use for CoreDNS")

cmd.Flags().BoolVar(&params.CollectSysdumpOnFailure, "collect-sysdump-on-failure", false, "Collect sysdump after a test fails")

initSysdumpFlags(cmd, &params.SysdumpOptions, "sysdump-")

return cmd
}
108 changes: 56 additions & 52 deletions internal/cli/cmd/sysdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,81 +43,85 @@ func newCmdSysdump() *cobra.Command {
},
}

cmd.Flags().StringVar(&sysdumpOptions.CiliumLabelSelector,
"cilium-label-selector", sysdump.DefaultCiliumLabelSelector,
initSysdumpFlags(cmd, &sysdumpOptions, "")

return cmd
}

func initSysdumpFlags(cmd *cobra.Command, options *sysdump.Options, optionPrefix string) {
cmd.Flags().StringVar(&options.CiliumLabelSelector,
optionPrefix+"cilium-label-selector", sysdump.DefaultCiliumLabelSelector,
"The labels used to target Cilium pods")
cmd.Flags().StringVar(&sysdumpOptions.CiliumNamespace,
"cilium-namespace", "",
cmd.Flags().StringVar(&options.CiliumNamespace,
optionPrefix+"cilium-namespace", "",
"The namespace Cilium is running in")
cmd.Flags().StringVar(&sysdumpOptions.CiliumOperatorNamespace,
"cilium-operator-namespace", "",
cmd.Flags().StringVar(&options.CiliumOperatorNamespace,
optionPrefix+"cilium-operator-namespace", "",
"The namespace Cilium operator is running in")
cmd.Flags().StringVar(&sysdumpOptions.CiliumDaemonSetSelector,
"cilium-daemon-set-label-selector", sysdump.DefaultCiliumLabelSelector,
cmd.Flags().StringVar(&options.CiliumDaemonSetSelector,
optionPrefix+"cilium-daemon-set-label-selector", sysdump.DefaultCiliumLabelSelector,
"The labels used to target Cilium daemon set")
cmd.Flags().StringVar(&sysdumpOptions.CiliumOperatorLabelSelector,
"cilium-operator-label-selector", sysdump.DefaultCiliumOperatorLabelSelector,
cmd.Flags().StringVar(&options.CiliumOperatorLabelSelector,
optionPrefix+"cilium-operator-label-selector", sysdump.DefaultCiliumOperatorLabelSelector,
"The labels used to target Cilium operator pods")
cmd.Flags().StringVar(&sysdumpOptions.ClustermeshApiserverLabelSelector,
"clustermesh-apiserver-label-selector", sysdump.DefaultClustermeshApiserverLabelSelector,
cmd.Flags().StringVar(&options.ClustermeshApiserverLabelSelector,
optionPrefix+"clustermesh-apiserver-label-selector", sysdump.DefaultClustermeshApiserverLabelSelector,
"The labels used to target 'clustermesh-apiserver' pods")
cmd.Flags().BoolVar(&sysdumpOptions.Debug,
"debug", sysdump.DefaultDebug,
cmd.Flags().BoolVar(&options.Debug,
optionPrefix+"debug", sysdump.DefaultDebug,
"Whether to enable debug logging")
cmd.Flags().StringArrayVar(&sysdumpOptions.ExtraLabelSelectors,
"extra-label-selectors", nil,
cmd.Flags().StringArrayVar(&options.ExtraLabelSelectors,
optionPrefix+"extra-label-selectors", nil,
"Optional set of labels selectors used to target additional pods for log collection.")
cmd.Flags().StringVar(&sysdumpOptions.HubbleLabelSelector,
"hubble-label-selector", sysdump.DefaultHubbleLabelSelector,
cmd.Flags().StringVar(&options.HubbleLabelSelector,
optionPrefix+"hubble-label-selector", sysdump.DefaultHubbleLabelSelector,
"The labels used to target Hubble pods")
cmd.Flags().Int64Var(&sysdumpOptions.HubbleFlowsCount,
"hubble-flows-count", sysdump.DefaultHubbleFlowsCount,
cmd.Flags().Int64Var(&options.HubbleFlowsCount,
optionPrefix+"hubble-flows-count", sysdump.DefaultHubbleFlowsCount,
"Number of Hubble flows to collect. Setting to zero disables collecting Hubble flows.")
cmd.Flags().DurationVar(&sysdumpOptions.HubbleFlowsTimeout,
"hubble-flows-timeout", sysdump.DefaultHubbleFlowsTimeout,
cmd.Flags().DurationVar(&options.HubbleFlowsTimeout,
optionPrefix+"hubble-flows-timeout", sysdump.DefaultHubbleFlowsTimeout,
"Timeout for collecting Hubble flows")
cmd.Flags().StringVar(&sysdumpOptions.HubbleRelayLabelSelector,
"hubble-relay-labels", sysdump.DefaultHubbleRelayLabelSelector,
cmd.Flags().StringVar(&options.HubbleRelayLabelSelector,
optionPrefix+"hubble-relay-labels", sysdump.DefaultHubbleRelayLabelSelector,
"The labels used to target Hubble Relay pods")
cmd.Flags().StringVar(&sysdumpOptions.HubbleUILabelSelector,
"hubble-ui-labels", sysdump.DefaultHubbleUILabelSelector,
cmd.Flags().StringVar(&options.HubbleUILabelSelector,
optionPrefix+"hubble-ui-labels", sysdump.DefaultHubbleUILabelSelector,
"The labels used to target Hubble UI pods")
cmd.Flags().Int64Var(&sysdumpOptions.LogsLimitBytes,
"logs-limit-bytes", sysdump.DefaultLogsLimitBytes,
cmd.Flags().Int64Var(&options.LogsLimitBytes,
optionPrefix+"logs-limit-bytes", sysdump.DefaultLogsLimitBytes,
"The limit on the number of bytes to retrieve when collecting logs")
cmd.Flags().DurationVar(&sysdumpOptions.LogsSinceTime,
"logs-since-time", sysdump.DefaultLogsSinceTime,
cmd.Flags().DurationVar(&options.LogsSinceTime,
optionPrefix+"logs-since-time", sysdump.DefaultLogsSinceTime,
"How far back in time to go when collecting logs")
cmd.Flags().StringVar(&sysdumpOptions.NodeList,
"node-list", sysdump.DefaultNodeList,
cmd.Flags().StringVar(&options.NodeList,
optionPrefix+"node-list", sysdump.DefaultNodeList,
"Comma-separated list of node IPs or names to filter pods for which to collect gops and logs")
cmd.Flags().StringVar(&sysdumpOptions.OutputFileName,
"output-filename", sysdump.DefaultOutputFileName,
cmd.Flags().StringVar(&options.OutputFileName,
optionPrefix+"output-filename", sysdump.DefaultOutputFileName,
"The name of the resulting file (without extension)\n'<ts>' can be used as the placeholder for the timestamp")
cmd.Flags().BoolVar(&sysdumpOptions.Quick,
"quick", sysdump.DefaultQuick,
cmd.Flags().BoolVar(&options.Quick,
optionPrefix+"quick", sysdump.DefaultQuick,
"Whether to enable quick mode (i.e. skip collection of 'cilium-bugtool' output and logs)")
cmd.Flags().IntVar(&sysdumpOptions.WorkerCount,
"worker-count", sysdump.DefaultWorkerCount,
cmd.Flags().IntVar(&options.WorkerCount,
optionPrefix+"worker-count", sysdump.DefaultWorkerCount,
"The number of workers to use\nNOTE: There is a lower bound requirement on the number of workers for the sysdump operation to be effective. Therefore, for low values, the actual number of workers may be adjusted upwards.")
cmd.Flags().StringArrayVar(&sysdumpOptions.CiliumBugtoolFlags,
"cilium-bugtool-flags", nil,
cmd.Flags().StringArrayVar(&options.CiliumBugtoolFlags,
optionPrefix+"cilium-bugtool-flags", nil,
"Optional set of flags to pass to cilium-bugtool command.")
cmd.Flags().BoolVar(&sysdumpOptions.DetectGopsPID,
"detect-gops-pid", false,
cmd.Flags().BoolVar(&options.DetectGopsPID,
optionPrefix+"detect-gops-pid", false,
"Whether to automatically detect the gops agent PID.")
cmd.Flags().StringVar(&sysdumpOptions.CNIConfigDirectory,
"cni-config-directory", sysdump.DefaultCNIConfigDirectory,
cmd.Flags().StringVar(&options.CNIConfigDirectory,
optionPrefix+"cni-config-directory", sysdump.DefaultCNIConfigDirectory,
"Directory where CNI configs are located")
cmd.Flags().StringVar(&sysdumpOptions.CNIConfigMapName,
"cni-configmap-name", sysdump.DefaultCNIConfigMapName,
cmd.Flags().StringVar(&options.CNIConfigMapName,
optionPrefix+"cni-configmap-name", sysdump.DefaultCNIConfigMapName,
"The name of the CNI config map")
cmd.Flags().StringVar(&sysdumpOptions.TetragonNamespace,
"tetragon-namespace", sysdump.DefaultTetragonNamespace,
cmd.Flags().StringVar(&options.TetragonNamespace,
optionPrefix+"tetragon-namespace", sysdump.DefaultTetragonNamespace,
"The namespace Tetragon is running in")
cmd.Flags().StringVar(&sysdumpOptions.TetragonLabelSelector,
"tetragon-label-selector", sysdump.DefaultTetragonLabelSelector,
cmd.Flags().StringVar(&options.TetragonLabelSelector,
optionPrefix+"tetragon-label-selector", sysdump.DefaultTetragonLabelSelector,
"The labels used to target Tetragon pods")

return cmd
}