Skip to content

Commit

Permalink
Merge pull request #312 from yguo0905/wait-apiserver
Browse files Browse the repository at this point in the history
Cherry pick #308 to v0.6: Support waiting for kube-apiserver to be ready with timout during NPD startup
  • Loading branch information
k8s-ci-robot authored Jul 15, 2019
2 parents df2bc3d + 938f2a8 commit 89ab58b
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 3 deletions.
19 changes: 19 additions & 0 deletions cmd/node_problem_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/golang/glog"
"github.com/spf13/pflag"

"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/node-problem-detector/cmd/options"
"k8s.io/node-problem-detector/pkg/custompluginmonitor"
"k8s.io/node-problem-detector/pkg/problemclient"
Expand Down Expand Up @@ -95,7 +96,25 @@ func main() {
startHTTPServer(p, npdo)
}

// This function may be blocked (until a timeout occurs) before
// kube-apiserver becomes ready.
glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
glog.Warningf("kube-apiserver did not become ready: timed out on waiting for kube-apiserver to return the node object: %v", err)
}

if err := p.Run(); err != nil {
glog.Fatalf("Problem detector failed with error: %v", err)
}
}

func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
// If NPD can get the node object from kube-apiserver, the server is
// ready and the RBAC permission is set correctly.
if _, err := c.GetNode(); err == nil {
return true, nil
}
return false, nil
})
}
9 changes: 9 additions & 0 deletions cmd/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"

"net/url"

Expand All @@ -46,6 +47,12 @@ type NodeProblemDetectorOptions struct {
ServerPort int
// ServerAddress is the address to bind the node problem detector server.
ServerAddress string
// APIServerWaitTimeout is the timeout on waiting for kube-apiserver to be
// ready.
APIServerWaitTimeout time.Duration
// APIServerWaitInterval is the interval between the checks on the
// readiness of kube-apiserver.
APIServerWaitInterval time.Duration

// application options

Expand All @@ -65,6 +72,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
[]string{}, "List of paths to custom plugin monitor config files, comma separated.")
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
"", "Custom URI used to connect to Kubernetes ApiServer")
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
fs.StringVar(&npdo.HostnameOverride, "hostname-override",
"", "Custom node name used to override hostname")
Expand Down
6 changes: 5 additions & 1 deletion pkg/problemclient/fake_problem_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"reflect"
"sync"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
)

// FakeProblemClient is a fake problem client for debug.
Expand Down Expand Up @@ -92,3 +92,7 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
// Eventf does nothing now.
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
}

func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
return nil, fmt.Errorf("GetNode() not implemented")
}
11 changes: 9 additions & 2 deletions pkg/problemclient/problem_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/pkg/api/legacyscheme"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/clock"
Expand All @@ -47,6 +47,9 @@ type Client interface {
SetConditions(conditions []v1.NodeCondition) error
// Eventf reports the event.
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
// GetNode returns the Node object of the node on which the
// node-problem-detector runs.
GetNode() (*v1.Node, error)
}

type nodeProblemClient struct {
Expand Down Expand Up @@ -79,7 +82,7 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
}

func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
node, err := c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
node, err := c.GetNode()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -116,6 +119,10 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
}

func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
}

// generatePatch generates condition patch
func generatePatch(conditions []v1.NodeCondition) ([]byte, error) {
raw, err := json.Marshal(&conditions)
Expand Down

0 comments on commit 89ab58b

Please sign in to comment.