diff --git a/pkg/discovery/nodes.go b/pkg/discovery/nodes.go index 3f74e7d45..ccdc7ce3d 100644 --- a/pkg/discovery/nodes.go +++ b/pkg/discovery/nodes.go @@ -17,15 +17,16 @@ limitations under the License. package discovery import ( + "context" "encoding/json" "os" "path" + "time" "github.com/heptio/sonobuoy/pkg/config" "github.com/sirupsen/logrus" v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" ) type nodeData struct { @@ -34,67 +35,74 @@ type nodeData struct { HealthzStatus int `json:"healthzStatus,omitempty"` } +// getNodeEndpoint returns the response from pinging a node endpoint +func getNodeEndpoint(client rest.Interface, nodeName, endpoint string) (rest.Result, error) { + // TODO(chuckha) make this timeout configurable + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(30*time.Second)) + defer cancel() + req := client. + Get(). + Context(ctx). + Resource("nodes"). + Name(nodeName). + SubResource("proxy"). + Suffix(endpoint) + + // Get the configz endpoint, put the result in the nodeData + result := req.Do() + if result.Error() != nil { + logrus.Warningf("Could not get %v endpoint for node %v: %v", endpoint, nodeName, result.Error()) + } + return result, result.Error() +} + // gatherNodeData collects non-resource information about a node through the // kubernetes API. That is, its `healthz` and `configz` endpoints, which are // not "resources" per se, although they are accessible through the apiserver. -func gatherNodeData(kubeClient kubernetes.Interface, cfg *config.Config) error { +func gatherNodeData(nodeNames []string, restclient rest.Interface, cfg *config.Config) error { logrus.Info("Collecting Node Configuration and Health...") - nodelist, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{}) - if err != nil { - return err - } - - for _, node := range nodelist.Items { - // We hit the master on /api/v1/proxy/nodes/ to gather node - // information without having to reinvent auth - proxypath := "/api/v1/proxy/nodes/" + node.Name - restclient := kubeClient.CoreV1().RESTClient() - - out := path.Join(cfg.OutputDir(), HostsLocation, node.Name) - logrus.Infof("Creating host results for %v under %v\n", node.Name, out) - if err = os.MkdirAll(out, 0755); err != nil { + for _, name := range nodeNames { + // Create the output for each node + out := path.Join(cfg.OutputDir(), HostsLocation, name) + logrus.Infof("Creating host results for %v under %v\n", name, out) + if err := os.MkdirAll(out, 0755); err != nil { return err } - _, err = untypedQuery(out, "configz.json", func() (interface{}, error) { - var configz map[string]interface{} - - // Get the configz endpoint, put the result in the nodeData - request := restclient.Get().RequestURI(proxypath + "/configz") - if result, err := request.Do().Raw(); err == nil { - json.Unmarshal(result, &configz) - } else { - logrus.Warningf("Could not get configz endpoint for node %v: %v", node.Name, err) + _, err := untypedQuery(out, "configz.json", func() (interface{}, error) { + data := make(map[string]interface{}) + result, err := getNodeEndpoint(restclient, name, "configz") + if err != nil { + return data, err } - return configz, err + resultBytes, err := result.Raw() + if err != nil { + return data, err + } + json.Unmarshal(resultBytes, &data) + return data, err }) if err != nil { return err } _, err = untypedQuery(out, "healthz.json", func() (interface{}, error) { - // Since health is just an int, we wrap it in a JSON object that looks like - // `{"status":200}` - health := make(map[string]interface{}) - var healthstatus int - - // Get the healthz endpoint too. We care about the response code in this - // case, not the body. - request := restclient.Get().RequestURI(proxypath + "/healthz") - if result := request.Do(); result.Error() == nil { - result.StatusCode(&healthstatus) - health["status"] = healthstatus - } else { - logrus.Warningf("Could not get healthz endpoint for node %v: %v", node.Name, result.Error()) + data := make(map[string]interface{}) + result, err := getNodeEndpoint(restclient, name, "healthz") + if err != nil { + return data, err } - return health, err + var healthstatus int + result.StatusCode(&healthstatus) + data["status"] = healthstatus + return data, nil }) if err != nil { return err } } - return err + return nil } diff --git a/pkg/discovery/queries.go b/pkg/discovery/queries.go index 7d8a7d251..81a2a1aa0 100644 --- a/pkg/discovery/queries.go +++ b/pkg/discovery/queries.go @@ -17,6 +17,7 @@ limitations under the License. package discovery import ( + "fmt" "os" "path" "time" @@ -358,29 +359,44 @@ func QueryClusterResources(kubeClient kubernetes.Interface, recorder *QueryRecor return untypedQuery(cfg.OutputDir(), "serverversion.json", objqry) } timedQuery(recorder, "serverversion", "", query) + continue case "ServerGroups": objqry := func() (interface{}, error) { return kubeClient.Discovery().ServerGroups() } query := func() (time.Duration, error) { return untypedQuery(cfg.OutputDir(), "servergroups.json", objqry) } timedQuery(recorder, "servergroups", "", query) + continue case "Nodes": // cfg.Nodes configures whether users want to gather the Nodes resource in the // cluster, but we also use that option to guide whether we get node data such // as configz and healthz endpoints. + // TODO(chuckha) Use a separate configuration like NodeConfiguration for node configz/healthz to make + // this switch flow less confusing. "Nodes" is responsible for too much. + // NOTE: Node data collection is an aggregated time b/c propagating that detail back up // is odd and would pollute some of the output. + start := time.Now() - err := gatherNodeData(kubeClient, cfg) + nodeList, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{}) + if err != nil { + errlog.LogError(fmt.Errorf("failed to get node list: %v", err)) + // Do not return or continue because we also want to query nodes as resources + break + } + nodeNames := make([]string, nodeList.Size()) + for i, node := range nodeList.Items { + nodeNames[i] = node.Name + } + err = gatherNodeData(nodeNames, kubeClient.CoreV1().RESTClient(), cfg) duration := time.Since(start) recorder.RecordQuery("Nodes", "", duration, err) - fallthrough - default: - lister := func() (runtime.Object, error) { return queryNonNsResource(resourceKind, kubeClient) } - query := func() (time.Duration, error) { return objListQuery(outdir+"/", resourceKind+".json", lister) } - timedQuery(recorder, resourceKind, "", query) + // do not continue because we want to now query nodes as resources } + lister := func() (runtime.Object, error) { return queryNonNsResource(resourceKind, kubeClient) } + query := func() (time.Duration, error) { return objListQuery(outdir+"/", resourceKind+".json", lister) } + timedQuery(recorder, resourceKind, "", query) } return nil