diff --git a/changelog/v1.18.0-rc3/collect-more-artifacts-on-ci-failure.yaml b/changelog/v1.18.0-rc3/collect-more-artifacts-on-ci-failure.yaml new file mode 100644 index 00000000000..6e1cb3b66c0 --- /dev/null +++ b/changelog/v1.18.0-rc3/collect-more-artifacts-on-ci-failure.yaml @@ -0,0 +1,9 @@ +changelog: + - type: NON_USER_FACING + description: >- + Gloo Gateway controller metrics and xds/krt snaphots are now collected and included + the test failure artifacts. + After encountering some test failures that proved difficult to debug without knowing more + about the state of the cluster, we have added additional artifacts to be collected when + a test fails. + This will help us to more easily diagnose the cause of test failures. diff --git a/pkg/utils/glooadminutils/admincli/client.go b/pkg/utils/glooadminutils/admincli/client.go index f49d994082d..7eee99091fa 100644 --- a/pkg/utils/glooadminutils/admincli/client.go +++ b/pkg/utils/glooadminutils/admincli/client.go @@ -14,6 +14,8 @@ import ( const ( InputSnapshotPath = "/snapshots/input" + xdsSnapshotPath = "/snapshots/xds" + krtSnapshotPath = "/snapshots/krt" ) // Client is a utility for executing requests against the Gloo Admin API @@ -84,6 +86,16 @@ func (c *Client) InputSnapshotCmd(ctx context.Context) cmdutils.Cmd { return c.Command(ctx, curl.WithPath(InputSnapshotPath)) } +// XdsSnapshotCmd returns the cmdutils.Cmd that can be run, and will execute a request against the XDS Snapshot path +func (c *Client) XdsSnapshotCmd(ctx context.Context) cmdutils.Cmd { + return c.Command(ctx, curl.WithPath(xdsSnapshotPath)) +} + +// KrtSnapshotCmd returns the cmdutils.Cmd that can be run, and will execute a request against the KRT Snapshot path +func (c *Client) KrtSnapshotCmd(ctx context.Context) cmdutils.Cmd { + return c.Command(ctx, curl.WithPath(krtSnapshotPath)) +} + // GetInputSnapshot returns the data that is available at the input snapshot endpoint func (c *Client) GetInputSnapshot(ctx context.Context) ([]interface{}, error) { var outLocation threadsafe.Buffer diff --git a/test/helpers/kube_dump.go b/test/helpers/kube_dump.go index b04b9d88fac..059e88486ba 100644 --- a/test/helpers/kube_dump.go +++ b/test/helpers/kube_dump.go @@ -15,6 +15,8 @@ import ( "github.com/solo-io/go-utils/threadsafe" "github.com/solo-io/gloo/pkg/utils/kubeutils/kubectl" + "github.com/solo-io/gloo/pkg/utils/kubeutils/portforward" + "github.com/solo-io/gloo/pkg/utils/requestutils/curl" "github.com/onsi/ginkgo/v2" "github.com/solo-io/gloo/pkg/cliutil/install" @@ -22,18 +24,14 @@ import ( gateway_defaults "github.com/solo-io/gloo/projects/gateway/pkg/defaults" "github.com/solo-io/gloo/projects/gloo/pkg/defaults" - "github.com/solo-io/skv2/codegen/util" + "github.com/solo-io/gloo/projects/gloo/pkg/servers/admin" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -var ( - kubeOutDir = filepath.Join(util.GetModuleRoot(), "_output", "kube2e-artifacts") - envoyOutDir = filepath.Join(kubeOutDir, "envoy-dump") -) - -// StandardGlooDumpOnFail creates adump of the kubernetes state and certain envoy data from the admin interface when a test fails +// StandardGlooDumpOnFail creates adump of the kubernetes state and certain envoy data from +// the admin interface when a test fails. // Look at `KubeDumpOnFail` && `EnvoyDumpOnFail` for more details -func StandardGlooDumpOnFail(out io.Writer, proxies ...metav1.ObjectMeta) func() { +func StandardGlooDumpOnFail(outLog io.Writer, outDir string, proxies ...metav1.ObjectMeta) func() { return func() { var namespaces []string for _, proxy := range proxies { @@ -42,29 +40,31 @@ func StandardGlooDumpOnFail(out io.Writer, proxies ...metav1.ObjectMeta) func() } } - KubeDumpOnFail(out, namespaces...)() - EnvoyDumpOnFail(out, proxies...)() + KubeDumpOnFail(outLog, outDir, namespaces...)() + ControllerDumpOnFail(outLog, outDir, namespaces...)() + EnvoyDumpOnFail(outLog, outDir, proxies...)() + + fmt.Printf("Test failed. Logs and cluster state are available in %s\n", outDir) } } // KubeDumpOnFail creates a small dump of the kubernetes state when a test fails. // This is useful for debugging test failures. -// The dump is written to _output/kube2e-artifacts. // The dump includes: // - docker state // - process state // - kubernetes state // - logs from all pods in the given namespaces // - yaml representations of all solo.io CRs in the given namespaces -func KubeDumpOnFail(out io.Writer, namespaces ...string) func() { +func KubeDumpOnFail(outLog io.Writer, outDir string, namespaces ...string) func() { return func() { - setupOutDir(kubeOutDir) + setupOutDir(outDir) - recordDockerState(fileAtPath(filepath.Join(kubeOutDir, "docker-state.log"))) - recordProcessState(fileAtPath(filepath.Join(kubeOutDir, "process-state.log"))) - recordKubeState(fileAtPath(filepath.Join(kubeOutDir, "kube-state.log"))) + recordDockerState(fileAtPath(filepath.Join(outDir, "docker-state.log"))) + recordProcessState(fileAtPath(filepath.Join(outDir, "process-state.log"))) + recordKubeState(fileAtPath(filepath.Join(outDir, "kube-state.log"))) - recordKubeDump(namespaces...) + recordKubeDump(outDir, namespaces...) } } @@ -110,11 +110,49 @@ func recordKubeState(f *os.File) { defer f.Close() kubeCli := &install.CmdKubectl{} - kubeState, err := kubeCli.KubectlOut(nil, "get", "all", "-A") + kubeState, err := kubeCli.KubectlOut(nil, "get", "all", "-A", "-o", "wide") if err != nil { f.WriteString("*** Unable to get kube state ***\n") return } + + resourcesToGet := []string{ + // Kubernetes resources + "secrets", + // Kube GW API resources + "gateways.gateway.networking.k8s.io", + "gatewayclasses.gateway.networking.k8s.io", + "httproutes.gateway.networking.k8s.io", + "referencegrants.gateway.networking.k8s.io", + // GG Kube GW resources + "gatewayparameters.gateway.gloo.solo.io", + "listeneroptions.gateway.solo.io", // only implemented for kube gw as of now + "httplisteneroptions.gateway.solo.io", // only implemented for kube gw as of now + // GG Gloo resources + "graphqlapis.graphql.gloo.solo.io", + "proxies.gloo.solo.io", + "settings.gloo.solo.io", + "upstreamgroups.gloo.solo.io", + "upstreams.gloo.solo.io", + // GG Edge GW resources + "gateways.gateway.solo.io", + "httpgateways.gateway.solo.io", + "tcpgateways.gateway.solo.io", + "virtualservices.gateway.solo.io", + // Shared GW resources + "routeoptions.gateway.solo.io", + "virtualhostoptions.gateway.solo.io", + // Dataplane extensions resources + "authconfigs.enterprise.gloo.solo.io", + "ratelimitconfigs.ratelimit.solo.io", + } + + kubeResources, err := kubeCli.KubectlOut(nil, "get", strings.Join(resourcesToGet, ","), "-A", "-owide") + if err != nil { + f.WriteString("*** Unable to get kube resources ***. Reason: " + err.Error() + " \n") + return + } + // Describe everything to identify the reason for issues such as Pods, LoadBalancers stuck in pending state // (insufficient resources, unable to acquire an IP), etc. // Ie: More context around the output of the previous command `kubectl get all -A` @@ -123,28 +161,32 @@ func recordKubeState(f *os.File) { f.WriteString("*** Unable to get kube describe ***. Reason: " + err.Error() + " \n") return } + kubeEndpointsState, err := kubeCli.KubectlOut(nil, "get", "endpoints", "-A") if err != nil { f.WriteString("*** Unable to get endpoint state ***. Reason: " + err.Error() + " \n") return } + f.WriteString("*** Kube state ***\n") f.WriteString(string(kubeState) + "\n") + f.WriteString(string(kubeResources) + "\n") f.WriteString(string(kubeDescribe) + "\n") f.WriteString(string(kubeEndpointsState) + "\n") + f.WriteString("*** End Kube state ***\n") } -func recordKubeDump(namespaces ...string) { +func recordKubeDump(outDir string, namespaces ...string) { // for each namespace, create a namespace directory that contains... for _, ns := range namespaces { // ...a pod logs subdirectoy - if err := recordPods(filepath.Join(kubeOutDir, ns, "_pods"), ns); err != nil { + if err := recordPods(filepath.Join(outDir, ns, "_pods"), ns); err != nil { fmt.Printf("error recording pod logs: %f, \n", err) } // ...and a subdirectory for each solo.io CRD with non-zero resources - if err := recordCRs(filepath.Join(kubeOutDir, ns), ns); err != nil { + if err := recordCRs(filepath.Join(outDir, ns), ns); err != nil { fmt.Printf("error recording pod logs: %f, \n", err) } } @@ -283,18 +325,123 @@ func kubeList(namespace string, target string) ([]string, string, error) { return toReturn, "", nil } +// ControllerDumpOnFail creates a small dump of the controller state when a test fails. +// This is useful for debugging test failures. +func ControllerDumpOnFail(outLog io.Writer, outDir string, namespaces ...string) func() { + return func() { + for _, ns := range namespaces { + namespaceOutDir := filepath.Join(outDir, ns) + setupOutDir(namespaceOutDir) + + // Get the Gloo Gateway controller logs + controllerLogsFilePath := filepath.Join(namespaceOutDir, "controller.log") + controllerLogsFile, err := os.OpenFile(controllerLogsFilePath, + os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) + if err != nil { + fmt.Printf("error opening controller log file: %f\n", err) + } + + controllerLogsCmd := kubectl.NewCli().WithReceiver(controllerLogsFile).Command(context.Background(), + "-n", ns, "logs", "deployment/gloo", "-c", "gloo", "--tail=1000") + err = controllerLogsCmd.Run().Cause() + if err != nil { + fmt.Printf("error running controller logs command: %f\n", err) + } + + // podStdOut := bytes.NewBuffer(nil) + // podStdErr := bytes.NewBuffer(nil) + + // Fetch the name of the Gloo Gateway controller pod + getGlooPodNameCmd := i.Actions.Kubectl().Command(ctx, "get", "pod", "-n", i.Metadata.InstallNamespace, + "--selector", "gloo=gloo", "--output", "jsonpath='{.items[0].metadata.name}'") + cmdErr := getGlooPodNameCmd.WithStdout(podStdOut).WithStderr(podStdErr).Run() + if cmdErr != nil { + i.Assertions.Require.NoError(cmdErr) + } + + // Clean up and check the output + glooPodName := strings.Trim(podStdOut.String(), "'") + if glooPodName == "" { + i.Assertions.Require.NoError(fmt.Errorf("failed to get the Gloo Gateway controller pod name: %s", + podStdErr.String())) + } + + // Get the metrics from the Gloo Gateway controller pod and write them to a file + metricsFilePath := filepath.Join(failureDir, "metrics.log") + metricsFile, err := os.OpenFile(metricsFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) + i.Assertions.Require.NoError(err) + + // Using an ephemeral debug pod fetch the metrics from the Gloo Gateway controller + metricsCmd := i.Actions.Kubectl().Command(ctx, "debug", "-n", i.Metadata.InstallNamespace, + "-it", "--image=curlimages/curl:7.83.1", glooPodName, "--", + "curl", "http://localhost:9091/metrics") + cmdErr = metricsCmd.WithStdout(metricsFile).WithStderr(metricsFile).Run() + if cmdErr != nil { + i.Assertions.Require.NoError(cmdErr) + } + + // Open a port-forward to the Gloo Gateway controller pod's admin port + portForwarder, err := i.Actions.Kubectl().StartPortForward(ctx, + portforward.WithDeployment("gloo", i.Metadata.InstallNamespace), + portforward.WithPorts(int(admin.AdminPort), int(admin.AdminPort)), + ) + i.Assertions.Require.NoError(err) + + defer func() { + portForwarder.Close() + portForwarder.WaitForStop() + }() + + adminClient := admincli.NewClient(). + WithReceiver(io.Discard). + WithCurlOptions( + curl.WithRetries(3, 0, 10), + curl.WithPort(int(admin.AdminPort)), + ) + + // Get krt snapshot from the Gloo Gateway controller pod and write it to a file + krtSnapshotFilePath := filepath.Join(failureDir, "krt_snapshot.log") + krtSnapshotFile, err := os.OpenFile(krtSnapshotFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) + i.Assertions.Require.NoError(err) + + cmdErr = adminClient.KrtSnapshotCmd(ctx). + WithStdout(krtSnapshotFile). + WithStderr(krtSnapshotFile). + Run() + if cmdErr != nil { + i.Assertions.Require.NoError(cmdErr) + } + + // Get xds snapshot from the Gloo Gateway controller pod and write it to a file + xdsSnapshotFilePath := filepath.Join(failureDir, "xds_snapshot.log") + xdsSnapshotFile, err := os.OpenFile(xdsSnapshotFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) + i.Assertions.Require.NoError(err) + + cmdErr = adminClient.XdsSnapshotCmd(ctx). + WithStdout(xdsSnapshotFile). + WithStderr(xdsSnapshotFile). + Run() + if cmdErr != nil { + i.Assertions.Require.NoError(cmdErr) + } + + } + } +} + // EnvoyDumpOnFail creates a small dump of the envoy admin interface when a test fails. // This is useful for debugging test failures. -// The dump is written to _output/envoy-dump. // The dump includes: // - config dump // - stats // - clusters // - listeners -func EnvoyDumpOnFail(_ io.Writer, proxies ...metav1.ObjectMeta) func() { +func EnvoyDumpOnFail(_ io.Writer, outDir string, proxies ...metav1.ObjectMeta) func() { return func() { - setupOutDir(envoyOutDir) for _, proxy := range proxies { + envoyOutDir := filepath.Join(outDir, proxy.Namespace, proxy.Name) + setupOutDir(envoyOutDir) + proxyName := proxy.GetName() if proxyName == "" { proxyName = gateway_defaults.GatewayProxyName @@ -304,13 +451,23 @@ func EnvoyDumpOnFail(_ io.Writer, proxies ...metav1.ObjectMeta) func() { proxyNamespace = defaults.GlooSystem } - adminCli, shutdown, _ := admincli.NewPortForwardedClient(context.Background(), fmt.Sprintf("deployment/%s", proxyName), proxyNamespace) + adminCli, shutdown, err := admincli.NewPortForwardedClient(context.Background(), + fmt.Sprintf("deployment/%s", proxyName), proxyNamespace) + if err != nil { + fmt.Printf("error creating admin cli: %f\n", err) + return + } + defer shutdown() - adminCli.ConfigDumpCmd(context.Background(), nil).WithStdout(fileAtPath(filepath.Join(envoyOutDir, "config.log"))).Run().Cause() - adminCli.StatsCmd(context.Background()).WithStdout(fileAtPath(filepath.Join(envoyOutDir, "stats.log"))).Run().Cause() - adminCli.ClustersCmd(context.Background()).WithStdout(fileAtPath(filepath.Join(envoyOutDir, "clusters.log"))).Run().Cause() - adminCli.ListenersCmd(context.Background()).WithStdout(fileAtPath(filepath.Join(envoyOutDir, "listeners.log"))).Run().Cause() + adminCli.ConfigDumpCmd(context.Background(), nil). + WithStdout(fileAtPath(filepath.Join(envoyOutDir, "config.log"))).Run().Cause() + adminCli.StatsCmd(context.Background()). + WithStdout(fileAtPath(filepath.Join(envoyOutDir, "stats.log"))).Run().Cause() + adminCli.ClustersCmd(context.Background()). + WithStdout(fileAtPath(filepath.Join(envoyOutDir, "clusters.log"))).Run().Cause() + adminCli.ListenersCmd(context.Background()). + WithStdout(fileAtPath(filepath.Join(envoyOutDir, "listeners.log"))).Run().Cause() } } } diff --git a/test/kube2e/gateway/gateway_suite_test.go b/test/kube2e/gateway/gateway_suite_test.go index f98a9db07b3..de60c581679 100644 --- a/test/kube2e/gateway/gateway_suite_test.go +++ b/test/kube2e/gateway/gateway_suite_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/solo-io/gloo/test/kubernetes/testutils/cluster" + "github.com/solo-io/skv2/codegen/util" kubetestclients "github.com/solo-io/gloo/test/kubernetes/testutils/clients" @@ -63,7 +64,10 @@ func StartTestHelper() { testHelper, err = kube2e.GetTestHelper(ctx, namespace) Expect(err).NotTo(HaveOccurred()) - skhelpers.RegisterPreFailHandler(helpers.StandardGlooDumpOnFail(GinkgoWriter, metav1.ObjectMeta{Namespace: testHelper.InstallNamespace})) + + outDir := filepath.Join(util.GetModuleRoot(), "_output", "kube2e-artifacts") + skhelpers.RegisterPreFailHandler(helpers.StandardGlooDumpOnFail(GinkgoWriter, outDir, + metav1.ObjectMeta{Namespace: testHelper.InstallNamespace})) kubeCli = kubectl.NewCli().WithReceiver(GinkgoWriter) diff --git a/test/kube2e/gloo/gloo_suite_test.go b/test/kube2e/gloo/gloo_suite_test.go index e8f13f8bc7f..206094c271f 100644 --- a/test/kube2e/gloo/gloo_suite_test.go +++ b/test/kube2e/gloo/gloo_suite_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/solo-io/gloo/test/kubernetes/testutils/cluster" + "github.com/solo-io/skv2/codegen/util" "github.com/solo-io/gloo/pkg/utils/kubeutils/kubectl" @@ -65,7 +66,10 @@ var _ = BeforeSuite(func() { testHelper, err = kube2e.GetTestHelper(ctx, namespace) Expect(err).NotTo(HaveOccurred()) testHelper.SetKubeCli(kubectl.NewCli().WithReceiver(GinkgoWriter)) - skhelpers.RegisterPreFailHandler(helpers.StandardGlooDumpOnFail(GinkgoWriter, metav1.ObjectMeta{Namespace: testHelper.InstallNamespace})) + + outDir := filepath.Join(util.GetModuleRoot(), "_output", "kube2e-artifacts") + skhelpers.RegisterPreFailHandler(helpers.StandardGlooDumpOnFail(GinkgoWriter, outDir, + metav1.ObjectMeta{Namespace: testHelper.InstallNamespace})) // Allow skipping of install step for running multiple times if !glootestutils.ShouldSkipInstall() { diff --git a/test/kube2e/upgrade/upgrade_suite_test.go b/test/kube2e/upgrade/upgrade_suite_test.go index afed671e336..2650a226d30 100644 --- a/test/kube2e/upgrade/upgrade_suite_test.go +++ b/test/kube2e/upgrade/upgrade_suite_test.go @@ -52,7 +52,8 @@ var _ = BeforeSuite(func() { testHelper, err := kube2e.GetTestHelper(suiteCtx, namespace) Expect(err).NotTo(HaveOccurred()) - skhelpers.RegisterPreFailHandler(helpers.StandardGlooDumpOnFail(GinkgoWriter, + outDir := filepath.Join(util.GetModuleRoot(), "_output", "kube2e-artifacts") + skhelpers.RegisterPreFailHandler(helpers.StandardGlooDumpOnFail(GinkgoWriter, outDir, metav1.ObjectMeta{Namespace: "upgrade"}, metav1.ObjectMeta{Namespace: testHelper.InstallNamespace}, metav1.ObjectMeta{Namespace: "other-ns"})) diff --git a/test/kubernetes/e2e/features/virtualhost_options/vhost_opt_suite.go b/test/kubernetes/e2e/features/virtualhost_options/vhost_opt_suite.go index bf0f3bb0fc4..7958a14ba3b 100644 --- a/test/kubernetes/e2e/features/virtualhost_options/vhost_opt_suite.go +++ b/test/kubernetes/e2e/features/virtualhost_options/vhost_opt_suite.go @@ -225,6 +225,8 @@ func (s *testingSuite) TestConfigureVirtualHostOptionsWithSectionNameManualSetup []string{"conflict with more specific or older VirtualHostOptions"}, defaults.KubeGatewayReporter, ) + + s.Assert().Equal(true, false, "intentionally failing to trigger dump, remove when done debugging") } // The goal here is to test the behavior when multiple VHOs are targeting a gateway without sectionName. The expected diff --git a/test/kubernetes/e2e/test.go b/test/kubernetes/e2e/test.go index 7690d5cb474..943652c0b94 100644 --- a/test/kubernetes/e2e/test.go +++ b/test/kubernetes/e2e/test.go @@ -1,6 +1,7 @@ package e2e import ( + "bytes" "context" "errors" "fmt" @@ -8,10 +9,11 @@ import ( "os" "path/filepath" "runtime" - "strings" "testing" "time" + "github.com/solo-io/gloo/pkg/utils/kubeutils/kubectl" + "github.com/solo-io/gloo/test/helpers" "github.com/solo-io/gloo/test/kubernetes/testutils/actions" "github.com/solo-io/gloo/test/kubernetes/testutils/assertions" "github.com/solo-io/gloo/test/kubernetes/testutils/cluster" @@ -19,6 +21,7 @@ import ( "github.com/solo-io/gloo/test/kubernetes/testutils/helper" testruntime "github.com/solo-io/gloo/test/kubernetes/testutils/runtime" "github.com/solo-io/gloo/test/testutils" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // MustTestHelper returns the SoloTestHelper used for e2e tests @@ -227,7 +230,6 @@ func (i *TestInstallation) UninstallGlooGateway(ctx context.Context, uninstallFn // PreFailHandler is the function that is invoked if a test in the given TestInstallation fails func (i *TestInstallation) PreFailHandler(ctx context.Context) { - // This is a work in progress // The idea here is we want to accumulate ALL information about this TestInstallation into a single directory // That way we can upload it in CI, or inspect it locally @@ -240,80 +242,20 @@ func (i *TestInstallation) PreFailHandler(ctx context.Context) { i.Assertions.Require.NoError(err) } - glooLogFilePath := filepath.Join(failureDir, "gloo.log") - glooLogFile, err := os.OpenFile(glooLogFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) - i.Assertions.Require.NoError(err) - defer glooLogFile.Close() - - glooLogsCmd := i.Actions.Kubectl().Command(ctx, "logs", "-n", i.Metadata.InstallNamespace, "deployments/gloo") - _ = glooLogsCmd.WithStdout(glooLogFile).WithStderr(glooLogFile).Run() - - edgeGatewayLogFilePath := filepath.Join(failureDir, "edge_gateway.log") - edgeGatewayLogFile, err := os.OpenFile(edgeGatewayLogFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) + namespaces, err := i.Actions.Kubectl().Namespaces(ctx) i.Assertions.Require.NoError(err) - defer edgeGatewayLogFile.Close() - kubeGatewayLogFilePath := filepath.Join(failureDir, "kube_gateway.log") - kubeGatewayLogFile, err := os.OpenFile(kubeGatewayLogFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) - i.Assertions.Require.NoError(err) - defer kubeGatewayLogFile.Close() + proxies := []metav1.ObjectMeta{} + for _, ns := range namespaces { + proxies, err = fetchAndAddProxies(i.Actions.Kubectl(), ns, "gloo=kube-gateway", proxies) + i.Assertions.Require.NoError(err) - namespaces, err := i.Actions.Kubectl().Namespaces(ctx) - i.Assertions.Require.NoError(err) - for _, n := range namespaces { - edgeGatewayLogFile.WriteString(fmt.Sprintf("Logs for edge gateway proxies in namespace %s\n", n)) - edgeGatewayLogsCmd := i.Actions.Kubectl().Command(ctx, "logs", "--all-containers", "--namespace", n, "--prefix", "-l", "gloo=gateway-proxy") - _ = edgeGatewayLogsCmd.WithStdout(edgeGatewayLogFile).WithStderr(edgeGatewayLogFile).Run() - edgeGatewayLogFile.WriteString("----------------------------------------------------------------------------------------------------------\n") - - kubeGatewayLogFile.WriteString(fmt.Sprintf("Logs for kube gateway proxies in namespace %s\n", n)) - kubeGatewayLogsCmd := i.Actions.Kubectl().Command(ctx, "logs", "--all-containers", "--namespace", n, "--prefix", "-l", "gloo=kube-gateway") - _ = kubeGatewayLogsCmd.WithStdout(kubeGatewayLogFile).WithStderr(kubeGatewayLogFile).Run() - kubeGatewayLogFile.WriteString("----------------------------------------------------------------------------------------------------------\n") + proxies, err = fetchAndAddProxies(i.Actions.Kubectl(), ns, "gloo=gateway-proxy", proxies) + i.Assertions.Require.NoError(err) } - clusterStateFilePath := filepath.Join(failureDir, "cluster_state.log") - clusterStateFile, err := os.OpenFile(clusterStateFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, os.ModePerm) - i.Assertions.Require.NoError(err) - defer clusterStateFile.Close() - - kubectlGetAllCmd := i.Actions.Kubectl().Command(ctx, "get", "all", "-A", "-owide") - _ = kubectlGetAllCmd.WithStdout(clusterStateFile).WithStderr(clusterStateFile).Run() - clusterStateFile.WriteString("\n") - - resourcesToGet := []string{ - // Kubernetes resources - "secrets", - // Kube GW API resources - "gateways.gateway.networking.k8s.io", - "gatewayclasses.gateway.networking.k8s.io", - "httproutes.gateway.networking.k8s.io", - "referencegrants.gateway.networking.k8s.io", - // GG Kube GW resources - "gatewayparameters.gateway.gloo.solo.io", - "listeneroptions.gateway.solo.io", // only implemented for kube gw as of now - "httplisteneroptions.gateway.solo.io", // only implemented for kube gw as of now - // GG Gloo resources - "graphqlapis.graphql.gloo.solo.io", - "proxies.gloo.solo.io", - "settings.gloo.solo.io", - "upstreamgroups.gloo.solo.io", - "upstreams.gloo.solo.io", - // GG Edge GW resources - "gateways.gateway.solo.io", - "httpgateways.gateway.solo.io", - "tcpgateways.gateway.solo.io", - "virtualservices.gateway.solo.io", - // Shared GW resources - "routeoptions.gateway.solo.io", - "virtualhostoptions.gateway.solo.io", - // Dataplane extensions resources - "authconfigs.enterprise.gloo.solo.io", - "ratelimitconfigs.ratelimit.solo.io", - } - kubectlGetResourcesCmd := i.Actions.Kubectl().Command(ctx, "get", strings.Join(resourcesToGet, ","), "-A", "-owide") - _ = kubectlGetResourcesCmd.WithStdout(clusterStateFile).WithStderr(clusterStateFile).Run() - clusterStateFile.WriteString("\n") + // Dump the logs and state of the cluster + helpers.StandardGlooDumpOnFail(os.Stdout, failureDir, proxies...)() } // GeneratedFiles is a collection of files that are generated during the execution of a set of tests @@ -349,3 +291,30 @@ func MustGeneratedFiles(tmpDirId, clusterId string) GeneratedFiles { FailureDir: failureDir, } } + +func fetchAndAddProxies(kubectl *kubectl.Cli, namespace string, label string, + proxies []metav1.ObjectMeta) ([]metav1.ObjectMeta, error) { + + stdout := bytes.NewBuffer(nil) + stderr := bytes.NewBuffer(nil) + + // get all deployments in the namespace with the label + lookupKubeGatewaysCmd := kubectl.Command(context.Background(), "get", + "deployment", "-n", namespace, "--selector", "gloo=kube-gateway", + "--no-headers", "-o", `custom-columns=":metadata.name"`) + err := lookupKubeGatewaysCmd.WithStdout(stdout).WithStderr(stderr). + Run().Cause() + if err != nil { + return nil, fmt.Errorf("failed to get proxies: %s (%s)", err, stderr.String()) + } + + // iterate lines in the output and append to the list of proxies + for _, line := range bytes.Split(stdout.Bytes(), []byte("\n")) { + proxyName := string(bytes.TrimSpace(line)) + if proxyName != "" { + proxies = append(proxies, metav1.ObjectMeta{Namespace: namespace, Name: proxyName}) + } + } + + return proxies, nil +} diff --git a/test/kubernetes/e2e/tests/manifests/common-recommendations.yaml b/test/kubernetes/e2e/tests/manifests/common-recommendations.yaml index 2529898d53a..d3d707f480b 100644 --- a/test/kubernetes/e2e/tests/manifests/common-recommendations.yaml +++ b/test/kubernetes/e2e/tests/manifests/common-recommendations.yaml @@ -74,6 +74,8 @@ gloo: limits: cpu: 1000m memory: 10Gi + stats: + enabled: true # enable stats server for gloo so we can collect the metrics in CI # Configuration for the statically deployed gateway-proxy that ships by default with Gloo Gateway gatewayProxies: