Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Remove PingWithTimeout test in node controller and manager
Browse files Browse the repository at this point in the history
sushrk committed Nov 29, 2024
1 parent b5699de commit 53da46b
Showing 4 changed files with 9 additions and 52 deletions.
31 changes: 1 addition & 30 deletions controllers/core/node_controller.go
Original file line number Diff line number Diff line change
@@ -15,21 +15,18 @@ package controllers

import (
"context"
"net/http"
"time"

"github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
"github.com/google/uuid"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -124,31 +121,5 @@ func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHe

func (r *NodeReconciler) Check() healthz.Checker {
r.Log.Info("Node controller's healthz subpath was added")
return func(req *http.Request) error {
// if the reconciler is not ready, using the simple ping to test
// this can test the referenced cached pod datastore
if !r.Conditions.GetPodDataStoreSyncStatus() {
r.Log.V(1).Info("***** node controller healthz enpoint tested Simple Ping *****")
return nil
}

err := rcHealthz.PingWithTimeout(func(c chan<- error) {
// when the reconciler is ready, testing the reconciler with a fake node request
pingRequest := &ctrl.Request{
NamespacedName: types.NamespacedName{
Namespace: corev1.NamespaceDefault,
Name: uuid.New().String(),
},
}

// expecting to 'return ctrl.Result{}, client.IgnoreNotFound(err)'
// IgnoreNotFound returns nil on NotFound errors.
// this can test the pod cached datastore and node cached datastore
_, rErr := r.Reconcile(r.Context, *pingRequest)
r.Log.V(1).Info("***** node controller healthz endpoint tested Reconcile *****")
c <- rErr
}, r.Log)

return err
}
return rcHealthz.SimplePing("node reconciler", r.Log)
}
2 changes: 1 addition & 1 deletion pkg/aws/ec2/api/wrapper.go
Original file line number Diff line number Diff line change
@@ -676,7 +676,7 @@ func (e *ec2Wrapper) DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkIn
})
}
// Add jitter to avoid EC2 API throttling in the account
time.Sleep(wait.Jitter(500*time.Millisecond, 0.5))
time.Sleep(wait.Jitter(300*time.Millisecond, 0.5))
return true

}); err != nil {
10 changes: 5 additions & 5 deletions pkg/config/loader.go
Original file line number Diff line number Diff line change
@@ -49,15 +49,15 @@ const (
// Tested: 12 + 8 limits (not seeing significant degradation from 15+8)
// Larger number seems not make latency better than 12+8
UserServiceClientQPS = 12
UserServiceClientQPSBurst = 8
UserServiceClientQPSBurst = 18

// EC2 API QPS for instance service client
InstanceServiceClientQPS = 5
InstanceServiceClientBurst = 7
InstanceServiceClientQPS = 12
InstanceServiceClientBurst = 18

// API Server QPS
DefaultAPIServerQPS = 10
DefaultAPIServerBurst = 15
DefaultAPIServerQPS = 20
DefaultAPIServerBurst = 30
)

// LoadResourceConfig returns the Resource Configuration for all resources managed by the VPC Resource Controller. Currently
18 changes: 2 additions & 16 deletions pkg/node/manager/manager.go
Original file line number Diff line number Diff line change
@@ -16,7 +16,6 @@ package manager
import (
"errors"
"fmt"
"net/http"
"strings"
"sync"
"time"
@@ -30,7 +29,6 @@ import (
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/resource"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils"
asyncWorker "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/worker"
"github.com/google/uuid"
"github.com/samber/lo"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
@@ -559,18 +557,6 @@ func (m *manager) removeNodeSafe(nodeName string) {
}

func (m *manager) check() healthz.Checker {
// instead of using SimplePing, testing the node cache from manager makes the test more accurate
return func(req *http.Request) error {
err := rcHealthz.PingWithTimeout(func(c chan<- error) {
randomName := uuid.New().String()
_, found := m.GetNode(randomName)
m.Log.V(1).Info("health check tested ping GetNode to check on datastore cache in node manager successfully", "TesedNodeName", randomName, "NodeFound", found)
var ping interface{}
m.worker.SubmitJob(ping)
m.Log.V(1).Info("health check tested ping SubmitJob with a nil job to check on worker queue in node manager successfully")
c <- nil
}, m.Log)

return err
}
// Use SimplePing in v1.4.6-internal patch
return rcHealthz.SimplePing("node manager", m.Log)
}

0 comments on commit 53da46b

Please sign in to comment.