Skip to content

Commit

Permalink
Updates node configz and healthz endpoints
Browse files Browse the repository at this point in the history
Fixes #465

The proxy subresource was moved in k8s v1.10

Signed-off-by: Chuck Ha <[email protected]>
  • Loading branch information
Chuck Ha committed Jul 10, 2018
1 parent 3d46d23 commit 69abeb6
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 48 deletions.
92 changes: 50 additions & 42 deletions pkg/discovery/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ limitations under the License.
package discovery

import (
"context"
"encoding/json"
"os"
"path"
"time"

"github.com/heptio/sonobuoy/pkg/config"
"github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)

type nodeData struct {
Expand All @@ -34,67 +35,74 @@ type nodeData struct {
HealthzStatus int `json:"healthzStatus,omitempty"`
}

// getNodeEndpoint returns the response from pinging a node endpoint
func getNodeEndpoint(client rest.Interface, nodeName, endpoint string) (rest.Result, error) {
// TODO(chuckha) make this timeout configurable
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(30*time.Second))
defer cancel()
req := client.
Get().
Context(ctx).
Resource("nodes").
Name(nodeName).
SubResource("proxy").
Suffix(endpoint)

// Get the configz endpoint, put the result in the nodeData
result := req.Do()
if result.Error() != nil {
logrus.Warningf("Could not get %v endpoint for node %v: %v", endpoint, nodeName, result.Error())
}
return result, result.Error()
}

// gatherNodeData collects non-resource information about a node through the
// kubernetes API. That is, its `healthz` and `configz` endpoints, which are
// not "resources" per se, although they are accessible through the apiserver.
func gatherNodeData(kubeClient kubernetes.Interface, cfg *config.Config) error {
func gatherNodeData(nodeNames []string, restclient rest.Interface, cfg *config.Config) error {
logrus.Info("Collecting Node Configuration and Health...")

nodelist, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return err
}

for _, node := range nodelist.Items {
// We hit the master on /api/v1/proxy/nodes/<node> to gather node
// information without having to reinvent auth
proxypath := "/api/v1/proxy/nodes/" + node.Name
restclient := kubeClient.CoreV1().RESTClient()

out := path.Join(cfg.OutputDir(), HostsLocation, node.Name)
logrus.Infof("Creating host results for %v under %v\n", node.Name, out)
if err = os.MkdirAll(out, 0755); err != nil {
for _, name := range nodeNames {
// Create the output for each node
out := path.Join(cfg.OutputDir(), HostsLocation, name)
logrus.Infof("Creating host results for %v under %v\n", name, out)
if err := os.MkdirAll(out, 0755); err != nil {
return err
}

_, err = untypedQuery(out, "configz.json", func() (interface{}, error) {
var configz map[string]interface{}

// Get the configz endpoint, put the result in the nodeData
request := restclient.Get().RequestURI(proxypath + "/configz")
if result, err := request.Do().Raw(); err == nil {
json.Unmarshal(result, &configz)
} else {
logrus.Warningf("Could not get configz endpoint for node %v: %v", node.Name, err)
_, err := untypedQuery(out, "configz.json", func() (interface{}, error) {
data := make(map[string]interface{})
result, err := getNodeEndpoint(restclient, name, "configz")
if err != nil {
return data, err
}

return configz, err
resultBytes, err := result.Raw()
if err != nil {
return data, err
}
json.Unmarshal(resultBytes, &data)
return data, err
})
if err != nil {
return err
}

_, err = untypedQuery(out, "healthz.json", func() (interface{}, error) {
// Since health is just an int, we wrap it in a JSON object that looks like
// `{"status":200}`
health := make(map[string]interface{})
var healthstatus int

// Get the healthz endpoint too. We care about the response code in this
// case, not the body.
request := restclient.Get().RequestURI(proxypath + "/healthz")
if result := request.Do(); result.Error() == nil {
result.StatusCode(&healthstatus)
health["status"] = healthstatus
} else {
logrus.Warningf("Could not get healthz endpoint for node %v: %v", node.Name, result.Error())
data := make(map[string]interface{})
result, err := getNodeEndpoint(restclient, name, "healthz")
if err != nil {
return data, err
}
return health, err
var healthstatus int
result.StatusCode(&healthstatus)
data["status"] = healthstatus
return data, nil
})
if err != nil {
return err
}
}

return err
return nil
}
28 changes: 22 additions & 6 deletions pkg/discovery/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package discovery

import (
"fmt"
"os"
"path"
"time"
Expand Down Expand Up @@ -358,29 +359,44 @@ func QueryClusterResources(kubeClient kubernetes.Interface, recorder *QueryRecor
return untypedQuery(cfg.OutputDir(), "serverversion.json", objqry)
}
timedQuery(recorder, "serverversion", "", query)
continue
case "ServerGroups":
objqry := func() (interface{}, error) { return kubeClient.Discovery().ServerGroups() }
query := func() (time.Duration, error) {
return untypedQuery(cfg.OutputDir(), "servergroups.json", objqry)
}
timedQuery(recorder, "servergroups", "", query)
continue
case "Nodes":
// cfg.Nodes configures whether users want to gather the Nodes resource in the
// cluster, but we also use that option to guide whether we get node data such
// as configz and healthz endpoints.

// TODO(chuckha) Use a separate configuration like NodeConfiguration for node configz/healthz to make
// this switch flow less confusing. "Nodes" is responsible for too much.

// NOTE: Node data collection is an aggregated time b/c propagating that detail back up
// is odd and would pollute some of the output.

start := time.Now()
err := gatherNodeData(kubeClient, cfg)
nodeList, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
errlog.LogError(fmt.Errorf("failed to get node list: %v", err))
// Do not return or continue because we also want to query nodes as resources
break
}
nodeNames := make([]string, nodeList.Size())
for i, node := range nodeList.Items {
nodeNames[i] = node.Name
}
err = gatherNodeData(nodeNames, kubeClient.CoreV1().RESTClient(), cfg)
duration := time.Since(start)
recorder.RecordQuery("Nodes", "", duration, err)
fallthrough
default:
lister := func() (runtime.Object, error) { return queryNonNsResource(resourceKind, kubeClient) }
query := func() (time.Duration, error) { return objListQuery(outdir+"/", resourceKind+".json", lister) }
timedQuery(recorder, resourceKind, "", query)
// do not continue because we want to now query nodes as resources
}
lister := func() (runtime.Object, error) { return queryNonNsResource(resourceKind, kubeClient) }
query := func() (time.Duration, error) { return objListQuery(outdir+"/", resourceKind+".json", lister) }
timedQuery(recorder, resourceKind, "", query)
}

return nil
Expand Down

0 comments on commit 69abeb6

Please sign in to comment.