From bd090e0719a7404ad4510f9c91dae5c3611030e2 Mon Sep 17 00:00:00 2001 From: Yash Thakkar Date: Mon, 18 Nov 2024 15:05:16 -0800 Subject: [PATCH] Updating Release 1.6 branch. (#494) * remove global exclusion for G108,G114 and add nosec in code (#404) * Update controller_auth_proxy_patch.yaml (#405) Update the reference from gcr.io to registry.k8s.io > kube-rbac-proxy is moving to registry.k8s.io/kubebuilder/kube-rbac-proxy (from gcr.io/kubebuilder/kube-rbac-proxy) because GCR is being sunset. We need to update these references. * Fix log which causes panic (#407) * Fix log which causes panic * Consistent key name * consistent naming * updating ginkgo and gomega * Bump github.com/prometheus/common from 0.51.1 to 0.53.0 Bumps [github.com/prometheus/common](https://github.com/prometheus/common) from 0.51.1 to 0.53.0. - [Release notes](https://github.com/prometheus/common/releases) - [Commits](https://github.com/prometheus/common/compare/v0.51.1...v0.53.0) --- updated-dependencies: - dependency-name: github.com/prometheus/common dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Bump github.com/prometheus/client_model from 0.6.0 to 0.6.1 (#432) Bumps [github.com/prometheus/client_model](https://github.com/prometheus/client_model) from 0.6.0 to 0.6.1. - [Release notes](https://github.com/prometheus/client_model/releases) - [Commits](https://github.com/prometheus/client_model/compare/v0.6.0...v0.6.1) --- updated-dependencies: - dependency-name: github.com/prometheus/client_model dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/onsi/ginkgo/v2 from 2.17.2 to 2.19.0 (#431) Bumps [github.com/onsi/ginkgo/v2](https://github.com/onsi/ginkgo) from 2.17.2 to 2.19.0. - [Release notes](https://github.com/onsi/ginkgo/releases) - [Changelog](https://github.com/onsi/ginkgo/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/ginkgo/compare/v2.17.2...v2.19.0) --- updated-dependencies: - dependency-name: github.com/onsi/ginkgo/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * QPS and busrt adjustment (#436) * readme update for events (#453) * Set controller user-agent to vpc-resource-controller/git-version (#455) * update user-agent string. * Use AppName instead of ControllerName. * Add security group pods scale test in ginkgo (#457) * Add security group pods scale test in ginkgo * Add instructions to run scale tests manually * fix typo in README * Passing page limit to cach config instead of override. (#452) * passing page limit to cache config * adding error log to optimized list watcher * importing vpc pkg * pods will requeue for reconcile if nodes are not managed and requested eni (#463) * pod will requeue for reconcile if nodes are not managed and requested eni * log statement change * looping through all container for eni requests * adding ut for utils function * add CNINode integration tests (#479) * add CNINode integration tests * address PR comments * updating log statements * add retry in VerifyCNINode * Bump go.uber.org/zap from 1.26.0 to 1.27.0 (#480) Bumps [go.uber.org/zap](https://github.com/uber-go/zap) from 1.26.0 to 1.27.0. - [Release notes](https://github.com/uber-go/zap/releases) - [Changelog](https://github.com/uber-go/zap/blob/master/CHANGELOG.md) - [Commits](https://github.com/uber-go/zap/compare/v1.26.0...v1.27.0) --- updated-dependencies: - dependency-name: go.uber.org/zap dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * increasing timeout for few integration test (#486) * Skipping health check on nodes if EC2 returns throttling errors (#485) * updating limits.go for supported ec2 instance type #491 * Bump github.com/samber/lo from 1.39.0 to 1.47.0 (#481) Bumps [github.com/samber/lo](https://github.com/samber/lo) from 1.39.0 to 1.47.0. - [Release notes](https://github.com/samber/lo/releases) - [Commits](https://github.com/samber/lo/compare/v1.39.0...v1.47.0) --- updated-dependencies: - dependency-name: github.com/samber/lo dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --------- Signed-off-by: dependabot[bot] Co-authored-by: Sushmitha Ravikumar <58063229+sushrk@users.noreply.github.com> Co-authored-by: Senthil Kumaran Co-authored-by: Garvin Pang Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Hao Zhou --- .github/workflows/presubmit.yaml | 2 +- README.md | 11 +- .../default/controller_auth_proxy_patch.yaml | 2 +- controllers/core/node_controller.go | 6 + controllers/core/pod_controller.go | 7 +- controllers/core/pod_controller_test.go | 3 +- controllers/custom/builder.go | 11 +- controllers/custom/custom_controller.go | 27 +- go.mod | 26 +- go.sum | 53 ++- main.go | 6 +- .../pkg/node/manager/mock_manager.go | 14 + pkg/aws/vpc/limits.go | 362 +++++++++++++++++- pkg/config/loader.go | 11 +- pkg/node/manager/manager.go | 38 +- pkg/node/manager/manager_test.go | 33 ++ pkg/provider/branch/provider.go | 10 +- pkg/resource/introspect.go | 2 +- pkg/utils/errors.go | 1 + pkg/utils/helper.go | 15 + pkg/utils/helper_test.go | 100 +++++ pkg/utils/set.go | 12 + scripts/test/create-cluster-karpenter.sh | 147 +++++++ scripts/test/delete-cluster-karpenter.sh | 23 ++ scripts/test/run-integration-tests.sh | 1 + test/README.md | 43 ++- test/framework/framework.go | 63 +-- .../resource/aws/autoscaling/manager.go | 64 ++++ test/framework/resource/aws/ec2/manager.go | 58 ++- test/framework/resource/k8s/node/manager.go | 10 + test/framework/resource/k8s/node/wrapper.go | 67 +++- test/framework/utils/poll.go | 3 +- test/framework/utils/resource.go | 1 + .../integration/cninode/cninode_suite_test.go | 50 +++ test/integration/cninode/cninode_test.go | 106 +++++ .../perpodsg/perpodsg_suite_test.go | 29 +- test/integration/perpodsg/perpodsg_test.go | 23 -- test/integration/scale/pod_scale_test.go | 89 +++++ test/integration/scale/scale_suite_test.go | 57 +++ .../integration/windows/windows_suite_test.go | 2 +- test/integration/windows/windows_test.go | 4 +- 41 files changed, 1420 insertions(+), 172 deletions(-) create mode 100755 scripts/test/create-cluster-karpenter.sh create mode 100755 scripts/test/delete-cluster-karpenter.sh create mode 100644 test/framework/resource/aws/autoscaling/manager.go create mode 100644 test/integration/cninode/cninode_suite_test.go create mode 100644 test/integration/cninode/cninode_test.go create mode 100644 test/integration/scale/pod_scale_test.go create mode 100644 test/integration/scale/scale_suite_test.go diff --git a/.github/workflows/presubmit.yaml b/.github/workflows/presubmit.yaml index 0557b973..7fefded3 100644 --- a/.github/workflows/presubmit.yaml +++ b/.github/workflows/presubmit.yaml @@ -67,5 +67,5 @@ jobs: - name: Install `gosec` run: go install github.com/securego/gosec/v2/cmd/gosec@latest - name: Run Gosec Security Scanner - run: ~/go/bin/gosec -exclude-dir test -exclude-generated -severity medium -exclude=G108,G114 ./... + run: ~/go/bin/gosec -exclude-dir test -exclude-generated -severity medium ./... diff --git a/README.md b/README.md index a1d19d30..a2d10c61 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,16 @@ Controller running on EKS Control Plane for managing Branch & Trunk Network Interface for [Kubernetes Pod](https://kubernetes.io/docs/concepts/workloads/pods/) using the [Security Group for Pod](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) feature and IPv4 Address Management(IPAM) of [Windows Nodes](https://docs.aws.amazon.com/eks/latest/userguide/windows-support.html). -The controller broadcasts its version to nodes. Describing any node will provide the version information in node `Events`. The mapping between the controller's version and the cluster's platform version is also available in release notes. +The controller broadcasts its version to nodes. Describing any node will provide the version information in node `Events`. The mapping between the controller's version and the cluster's platform version is also available in release notes. Please be aware that kubernetes events last for one hour in general and you may have to check the version information events in newly created nodes. + +Version events example: +``` +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ControllerVersionNotice 2m58s vpc-resource-controller The node is managed by VPC resource controller version v1.4.9 + Normal NodeTrunkInitiated 2m55s vpc-resource-controller The node has trunk interface initialized successfully +``` ## Security Group for Pods diff --git a/config/default/controller_auth_proxy_patch.yaml b/config/default/controller_auth_proxy_patch.yaml index 14202b6a..017fa153 100644 --- a/config/default/controller_auth_proxy_patch.yaml +++ b/config/default/controller_auth_proxy_patch.yaml @@ -10,7 +10,7 @@ spec: spec: containers: - name: kube-rbac-proxy - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.5.0 + image: registry.k8s.io/kubebuilder/kube-rbac-proxy:v0.5.0 args: - "--secure-listen-address=0.0.0.0:8443" - "--upstream=http://127.0.0.1:8080/" diff --git a/controllers/core/node_controller.go b/controllers/core/node_controller.go index 7440939b..3f565b07 100644 --- a/controllers/core/node_controller.go +++ b/controllers/core/node_controller.go @@ -168,6 +168,12 @@ func (r *NodeReconciler) Check() healthz.Checker { return nil } + if r.Manager.SkipHealthCheck() { + // node manager observes EC2 error on processing node, pausing reconciler check to avoid stressing the system + r.Log.Info("due to EC2 error, node controller skips node reconciler health check for now") + return nil + } + err := rcHealthz.PingWithTimeout(func(c chan<- error) { // when the reconciler is ready, testing the reconciler with a fake node request pingRequest := &ctrl.Request{ diff --git a/controllers/core/pod_controller.go b/controllers/core/pod_controller.go index 03ab6dc1..fc8ab5c5 100644 --- a/controllers/core/pod_controller.go +++ b/controllers/core/pod_controller.go @@ -27,6 +27,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/resource" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" "github.com/google/uuid" "github.com/go-logr/logr" @@ -56,7 +57,7 @@ type PodReconciler struct { var ( PodRequeueRequest = ctrl.Result{Requeue: true, RequeueAfter: time.Second} - MaxPodConcurrentReconciles = 10 + MaxPodConcurrentReconciles = 20 ) // Reconcile handles create/update/delete event by delegating the request to the handler @@ -112,6 +113,10 @@ func (r *PodReconciler) Reconcile(request custom.Request) (ctrl.Result, error) { logger.V(1).Info("pod's node is not yet initialized by the manager, will retry", "Requested", request.NamespacedName.String(), "Cached pod name", pod.ObjectMeta.Name, "Cached pod namespace", pod.ObjectMeta.Namespace) return PodRequeueRequest, nil } else if !node.IsManaged() { + if utils.PodHasENIRequest(pod) { + r.Log.Info("pod's node is not managed, but has eni request, will retry", "Requested", request.NamespacedName.String(), "Cached pod name", pod.ObjectMeta.Name, "Cached pod namespace", pod.ObjectMeta.Namespace) + return PodRequeueRequest, nil + } logger.V(1).Info("pod's node is not managed, skipping pod event", "Requested", request.NamespacedName.String(), "Cached pod name", pod.ObjectMeta.Name, "Cached pod namespace", pod.ObjectMeta.Namespace) return ctrl.Result{}, nil } else if !node.IsReady() { diff --git a/controllers/core/pod_controller_test.go b/controllers/core/pod_controller_test.go index 12fed741..02883f4d 100644 --- a/controllers/core/pod_controller_test.go +++ b/controllers/core/pod_controller_test.go @@ -16,6 +16,7 @@ package controllers import ( "errors" "testing" + "time" "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/custom" mock_condition "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/condition" @@ -188,7 +189,7 @@ func TestPodReconciler_Reconcile_NonManaged(t *testing.T) { result, err := mock.PodReconciler.Reconcile(mockReq) assert.NoError(t, err) - assert.Equal(t, result, controllerruntime.Result{}) + assert.Equal(t, controllerruntime.Result{Requeue: true, RequeueAfter: time.Second}, result) } // TestPodReconciler_Reconcile_NoNodeAssigned tests that the request for a Pod with no Node assigned diff --git a/controllers/custom/builder.go b/controllers/custom/builder.go index 181c9bc8..232f1d42 100644 --- a/controllers/custom/builder.go +++ b/controllers/custom/builder.go @@ -113,15 +113,16 @@ func (b *Builder) Complete(reconciler Reconciler) (healthz.Checker, error) { workqueue.DefaultControllerRateLimiter(), b.options.Name) optimizedListWatch := newOptimizedListWatcher(b.ctx, b.clientSet.CoreV1().RESTClient(), - b.converter.Resource(), b.options.Namespace, b.options.PageLimit, b.converter) + b.converter.Resource(), b.options.Namespace, b.converter, b.log.WithName("listWatcher")) // Create the config for low level controller with the custom converter // list and watch config := &cache.Config{ - Queue: cache.NewDeltaFIFO(b.converter.Indexer, b.dataStore), - ListerWatcher: optimizedListWatch, - ObjectType: b.converter.ResourceType(), - FullResyncPeriod: b.options.ResyncPeriod, + Queue: cache.NewDeltaFIFO(b.converter.Indexer, b.dataStore), + ListerWatcher: optimizedListWatch, + WatchListPageSize: int64(b.options.PageLimit), + ObjectType: b.converter.ResourceType(), + FullResyncPeriod: b.options.ResyncPeriod, Process: func(obj interface{}, _ bool) error { // from oldest to newest for _, d := range obj.(cache.Deltas) { diff --git a/controllers/custom/custom_controller.go b/controllers/custom/custom_controller.go index b3bfeee5..df98b968 100644 --- a/controllers/custom/custom_controller.go +++ b/controllers/custom/custom_controller.go @@ -21,6 +21,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition" "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -178,23 +179,26 @@ func (c *CustomController) WaitForCacheSync(controller cache.Controller) { // newOptimizedListWatcher returns a list watcher with a custom list function that converts the // response for each page using the converter function and returns a general watcher -func newOptimizedListWatcher(ctx context.Context, restClient cache.Getter, resource string, namespace string, limit int, - converter Converter) *cache.ListWatch { +func newOptimizedListWatcher(ctx context.Context, restClient cache.Getter, resource string, namespace string, + converter Converter, log logr.Logger) *cache.ListWatch { listFunc := func(options metav1.ListOptions) (runtime.Object, error) { list, err := restClient.Get(). Namespace(namespace). Resource(resource). - // This needs to be done because just setting the limit using option's - // Limit is being overridden and the response is returned without pagination. VersionedParams(&metav1.ListOptions{ - Limit: int64(limit), + Limit: options.Limit, Continue: options.Continue, }, metav1.ParameterCodec). Do(ctx). Get() if err != nil { - return list, err + if statusErr, ok := err.(*apierrors.StatusError); ok { + log.Error(err, "List operation error", "code", statusErr.Status().Code) + } else { + log.Error(err, "List operation error") + } + return nil, err } // Strip down the the list before passing the paginated response back to // the pager function @@ -206,11 +210,20 @@ func newOptimizedListWatcher(ctx context.Context, restClient cache.Getter, resou // before storing the object in the data store. watchFunc := func(options metav1.ListOptions) (watch.Interface, error) { options.Watch = true - return restClient.Get(). + watch, err := restClient.Get(). Namespace(namespace). Resource(resource). VersionedParams(&options, metav1.ParameterCodec). Watch(ctx) + if err != nil { + if statusErr, ok := err.(*apierrors.StatusError); ok { + log.Error(err, "Watch operation error", "code", statusErr.Status().Code) + } else { + log.Error(err, "Watch operation error") + } + return nil, err + } + return watch, err } return &cache.ListWatch{ListFunc: listFunc, WatchFunc: watchFunc} } diff --git a/go.mod b/go.mod index bd7c9b41..02828cbf 100644 --- a/go.mod +++ b/go.mod @@ -9,14 +9,14 @@ require ( github.com/go-logr/zapr v1.3.0 github.com/golang/mock v1.6.0 github.com/google/uuid v1.6.0 - github.com/onsi/ginkgo/v2 v2.17.1 - github.com/onsi/gomega v1.31.1 + github.com/onsi/ginkgo/v2 v2.19.0 + github.com/onsi/gomega v1.33.1 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.19.0 - github.com/prometheus/client_model v0.6.0 - github.com/prometheus/common v0.52.2 + github.com/prometheus/client_model v0.6.1 + github.com/prometheus/common v0.53.0 github.com/stretchr/testify v1.9.0 - go.uber.org/zap v1.26.0 + go.uber.org/zap v1.27.0 golang.org/x/time v0.5.0 gomodules.xyz/jsonpatch/v2 v2.4.0 k8s.io/api v0.29.3 @@ -26,6 +26,7 @@ require ( ) require ( + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect @@ -42,13 +43,12 @@ require ( github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.22.3 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20230323073829-e72429f035bd // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect github.com/imdario/mergo v0.3.13 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -60,16 +60,16 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect - github.com/samber/lo v1.39.0 + github.com/samber/lo v1.47.0 github.com/spf13/pflag v1.0.5 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d - golang.org/x/net v0.23.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.17.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/protobuf v1.33.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index eda6752f..1ac3bbd8 100644 --- a/go.sum +++ b/go.sum @@ -31,8 +31,8 @@ github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2Kv github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -52,8 +52,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk= -github.com/google/pprof v0.0.0-20230323073829-e72429f035bd/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= @@ -91,33 +91,32 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= -github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= -github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= -github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= -github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= -github.com/prometheus/common v0.52.2 h1:LW8Vk7BccEdONfrJBDffQGRtpSzi5CQaRZGtboOO2ck= -github.com/prometheus/common v0.52.2/go.mod h1:lrWtQx+iDfn2mbH5GUzlH9TSHyfZpHkSiG1W7y3sF2Q= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE= +github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= -github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc= +github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -131,8 +130,8 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= -go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -150,8 +149,8 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -168,18 +167,18 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -188,8 +187,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= -golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/main.go b/main.go index c034481b..51c070c1 100644 --- a/main.go +++ b/main.go @@ -17,7 +17,7 @@ import ( "flag" "fmt" "net/http" - _ "net/http/pprof" + _ "net/http/pprof" // #nosec G108 "os" "time" @@ -194,8 +194,7 @@ func main() { if enableProfiling { // To use the profiler - https://golang.org/pkg/net/http/pprof/ go func() { - setupLog.Info("starting profiler", - "error", http.ListenAndServe("localhost:6060", nil)) + setupLog.Info("starting profiler", "error", http.ListenAndServe("localhost:6060", nil)) // #nosec G114 }() } @@ -203,6 +202,7 @@ func main() { // Set the API Server QPS and Burst kubeConfig.QPS = config.DefaultAPIServerQPS kubeConfig.Burst = config.DefaultAPIServerBurst + kubeConfig.UserAgent = fmt.Sprintf("%s/%s", ec2API.AppName, version.GitVersion) setupLog.Info("starting the controller with leadership setting", "leader mode enabled", enableLeaderElection, diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/node/manager/mock_manager.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/node/manager/mock_manager.go index 092caf34..cf22bdfa 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/node/manager/mock_manager.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/node/manager/mock_manager.go @@ -102,6 +102,20 @@ func (mr *MockManagerMockRecorder) GetNode(arg0 interface{}) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNode", reflect.TypeOf((*MockManager)(nil).GetNode), arg0) } +// SkipHealthCheck mocks base method. +func (m *MockManager) SkipHealthCheck() bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SkipHealthCheck") + ret0, _ := ret[0].(bool) + return ret0 +} + +// SkipHealthCheck indicates an expected call of SkipHealthCheck. +func (mr *MockManagerMockRecorder) SkipHealthCheck() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SkipHealthCheck", reflect.TypeOf((*MockManager)(nil).SkipHealthCheck)) +} + // UpdateNode mocks base method. func (m *MockManager) UpdateNode(arg0 string) error { m.ctrl.T.Helper() diff --git a/pkg/aws/vpc/limits.go b/pkg/aws/vpc/limits.go index 387b1aa7..58649a5b 100644 --- a/pkg/aws/vpc/limits.go +++ b/pkg/aws/vpc/limits.go @@ -17,7 +17,7 @@ // so we can get this information at runtime. // Code generated by go generate; DO NOT EDIT. -// This file was generated at 2024-09-20T20:38:24Z +// This file was generated at 2024-11-12T06:00:34Z // WARNING: please add @ellistarn, @bwagner5, or @jonathan-innis from aws/karpenter to reviewers // if you are updating this file since Karpenter is depending on this file to calculate max pods. @@ -2770,6 +2770,186 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "c8g.12xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.16xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.24xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.2xlarge": { + Interface: 4, + IPv4PerInterface: 15, + IsTrunkingCompatible: true, + BranchInterface: 38, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 4, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.48xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.4xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.8xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.large": { + Interface: 3, + IPv4PerInterface: 10, + IsTrunkingCompatible: true, + BranchInterface: 9, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 3, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.medium": { + Interface: 2, + IPv4PerInterface: 4, + IsTrunkingCompatible: true, + BranchInterface: 4, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 2, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "c8g.metal-24xl": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, + "c8g.metal-48xl": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, + "c8g.xlarge": { + Interface: 4, + IPv4PerInterface: 15, + IsTrunkingCompatible: true, + BranchInterface: 18, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 4, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, "d2.2xlarge": { Interface: 4, IPv4PerInterface: 15, @@ -7580,6 +7760,186 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "m8g.12xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.16xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.24xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.2xlarge": { + Interface: 4, + IPv4PerInterface: 15, + IsTrunkingCompatible: true, + BranchInterface: 38, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 4, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.48xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.4xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.8xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.large": { + Interface: 3, + IPv4PerInterface: 10, + IsTrunkingCompatible: true, + BranchInterface: 9, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 3, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.medium": { + Interface: 2, + IPv4PerInterface: 4, + IsTrunkingCompatible: true, + BranchInterface: 4, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 2, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "m8g.metal-24xl": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, + "m8g.metal-48xl": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, + "m8g.xlarge": { + Interface: 4, + IPv4PerInterface: 15, + IsTrunkingCompatible: true, + BranchInterface: 18, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 4, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, "mac1.metal": { Interface: 8, IPv4PerInterface: 30, diff --git a/pkg/config/loader.go b/pkg/config/loader.go index 94f36ee0..833af06a 100644 --- a/pkg/config/loader.go +++ b/pkg/config/loader.go @@ -50,15 +50,16 @@ const ( // Tested: 12 + 8 limits (not seeing significant degradation from 15+8) // Larger number seems not make latency better than 12+8 UserServiceClientQPS = 12 - UserServiceClientQPSBurst = 8 + UserServiceClientQPSBurst = 18 // EC2 API QPS for instance service client - InstanceServiceClientQPS = 5 - InstanceServiceClientBurst = 7 + InstanceServiceClientQPS = 12 + InstanceServiceClientBurst = 18 // API Server QPS - DefaultAPIServerQPS = 10 - DefaultAPIServerBurst = 15 + // Use the same values as default client (https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/client/config/config.go#L85) + DefaultAPIServerQPS = 20 + DefaultAPIServerBurst = 30 ) // LoadResourceConfig returns the Resource Configuration for all resources managed by the VPC Resource Controller. Currently diff --git a/pkg/node/manager/manager.go b/pkg/node/manager/manager.go index 2759e775..b6539877 100644 --- a/pkg/node/manager/manager.go +++ b/pkg/node/manager/manager.go @@ -57,6 +57,7 @@ type manager struct { worker asyncWorker.Worker conditions condition.Conditions controllerVersion string + stopHealthCheckAt time.Time } // Manager to perform operation on list of managed/un-managed node @@ -66,6 +67,7 @@ type Manager interface { UpdateNode(nodeName string) error DeleteNode(nodeName string) error CheckNodeForLeakedENIs(nodeName string) + SkipHealthCheck() bool } // AsyncOperation is operation on a node after the lock has been released. @@ -96,6 +98,8 @@ type AsyncOperationJob struct { nodeName string } +const pausingHealthCheckDuration = 10 * time.Minute + // NewNodeManager returns a new node manager func NewNodeManager(logger logr.Logger, resourceManager resource.ResourceManager, wrapper api.Wrapper, worker asyncWorker.Worker, conditions condition.Conditions, controllerVersion string, healthzHandler *rcHealthz.HealthzHandler) (Manager, error) { @@ -425,6 +429,10 @@ func (m *manager) performAsyncOperation(job interface{}) (ctrl.Result, error) { utils.SendNodeEventWithNodeName(m.wrapper.K8sAPI, asyncJob.nodeName, utils.VersionNotice, fmt.Sprintf("The node is managed by VPC resource controller version %s", m.controllerVersion), v1.EventTypeNormal, m.Log) err = asyncJob.node.InitResources(m.resourceManager) if err != nil { + if pauseHealthCheckOnError(err) && !m.SkipHealthCheck() { + m.setStopHealthCheck() + log.Info("node manager sets a pause on health check due to observing a EC2 error", "error", err.Error()) + } log.Error(err, "removing the node from cache as it failed to initialize") m.removeNodeSafe(asyncJob.nodeName) // if initializing node failed, we want to make this visible although the manager will retry @@ -565,12 +573,36 @@ func (m *manager) check() healthz.Checker { randomName := uuid.New().String() _, found := m.GetNode(randomName) m.Log.V(1).Info("health check tested ping GetNode to check on datastore cache in node manager successfully", "TesedNodeName", randomName, "NodeFound", found) - var ping interface{} - m.worker.SubmitJob(ping) - m.Log.V(1).Info("health check tested ping SubmitJob with a nil job to check on worker queue in node manager successfully") + if m.SkipHealthCheck() { + m.Log.Info("due to EC2 error, node manager skips node worker queue health check for now") + } else { + var ping interface{} + m.worker.SubmitJob(ping) + m.Log.V(1).Info("health check tested ping SubmitJob with a nil job to check on worker queue in node manager successfully") + } c <- nil }, m.Log) return err } } + +func (m *manager) SkipHealthCheck() bool { + m.lock.RLock() + defer m.lock.RUnlock() + + return time.Since(m.stopHealthCheckAt) < pausingHealthCheckDuration +} + +func (m *manager) setStopHealthCheck() { + m.lock.Lock() + defer m.lock.Unlock() + + m.stopHealthCheckAt = time.Now() +} + +func pauseHealthCheckOnError(err error) bool { + return lo.ContainsBy(utils.PauseHealthCheckErrors, func(e string) bool { + return strings.Contains(err.Error(), e) + }) +} diff --git a/pkg/node/manager/manager_test.go b/pkg/node/manager/manager_test.go index c8450927..a580f125 100644 --- a/pkg/node/manager/manager_test.go +++ b/pkg/node/manager/manager_test.go @@ -17,6 +17,7 @@ import ( "errors" "fmt" "testing" + "time" "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" rcV1alpha1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" @@ -684,6 +685,38 @@ func Test_performAsyncOperation_fail(t *testing.T) { assert.NoError(t, err) } +func Test_performAsyncOperation_fail_pausingHealthCheck(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + mock := NewMock(ctrl, map[string]node.Node{nodeName: managedNode}) + + job := AsyncOperationJob{ + node: mock.MockNode, + nodeName: nodeName, + op: Init, + } + + mock.MockNode.EXPECT().InitResources(mock.MockResourceManager).Return(&node.ErrInitResources{ + Err: errors.New("RequestLimitExceeded: Request limit exceeded.\n\tstatus code: 503, request id: 123-123-123-123-123"), + }).Times(2) + mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(v1Node, nil).Times(2) + mock.MockK8sAPI.EXPECT().BroadcastEvent(v1Node, utils.VersionNotice, fmt.Sprintf("The node is managed by VPC resource controller version %s", mock.Manager.controllerVersion), v1.EventTypeNormal).Times(2) + + _, err := mock.Manager.performAsyncOperation(job) + time.Sleep(time.Millisecond * 100) + assert.True(t, mock.Manager.SkipHealthCheck()) + assert.NotContains(t, mock.Manager.dataStore, nodeName) // It should be cleared from cache + assert.NoError(t, err) + + time.Sleep(time.Second * 2) + _, err = mock.Manager.performAsyncOperation(job) + assert.NoError(t, err) + time.Sleep(time.Millisecond * 100) + assert.True(t, mock.Manager.SkipHealthCheck()) + assert.True(t, time.Since(mock.Manager.stopHealthCheckAt) > time.Second*2 && time.Since(mock.Manager.stopHealthCheckAt) < time.Second*3) +} + // Test_isPodENICapacitySet test if the pod-eni capacity then true is returned func Test_isPodENICapacitySet(t *testing.T) { ctrl := gomock.NewController(t) diff --git a/pkg/provider/branch/provider.go b/pkg/provider/branch/provider.go index b52f9504..4028300b 100644 --- a/pkg/provider/branch/provider.go +++ b/pkg/provider/branch/provider.go @@ -246,7 +246,7 @@ func (b *branchENIProvider) DeleteNode(nodeName string) (ctrl.Result, error) { trunkENI.DeleteAllBranchENIs() b.removeTrunkFromCache(nodeName) - b.log.Info("de-initialized resource provider successfully", "node name", nodeName) + b.log.Info("de-initialized resource provider successfully", "nodeName", nodeName) return ctrl.Result{}, nil } @@ -276,7 +276,7 @@ func (b *branchENIProvider) ReconcileNode(nodeName string) bool { log := b.log.WithValues("node", nodeName) if !isPresent { // return true to set the node next clean up asap since we don't know why trunk is missing - log.Info("no trunk ENI is pointing to the given node", "NodeName", nodeName) + log.Info("no trunk ENI is pointing to the given node", "nodeName", nodeName) return true } podList, err := b.apiWrapper.PodAPI.ListPods(nodeName) @@ -288,7 +288,7 @@ func (b *branchENIProvider) ReconcileNode(nodeName string) bool { } foundLeakedENI := trunkENI.Reconcile(podList.Items) - log.Info("completed reconcile node cleanup on branch ENIs", "NodeName", nodeName) + log.Info("completed reconcile node cleanup on branch ENIs", "nodeName", nodeName) return foundLeakedENI } @@ -348,7 +348,7 @@ func (b *branchENIProvider) CreateAndAnnotateResources(podNamespace string, podN "Security Groups %v", securityGroups), v1.EventTypeNormal) } - log := b.log.WithValues("pod namespace", pod.Namespace, "pod name", pod.Name, "node name", pod.Spec.NodeName) + log := b.log.WithValues("pod namespace", pod.Namespace, "pod name", pod.Name, "nodeName", pod.Spec.NodeName) start := time.Now() trunkENI, isPresent := b.getTrunkFromCache(pod.Spec.NodeName) @@ -411,7 +411,7 @@ func (b *branchENIProvider) DeleteBranchUsedByPods(nodeName string, UID string) // trunk cache is local map with lock. it shouldn't return not found error if trunk exists // if the node's trunk is not found, we shouldn't retry // worst case we rely on node based clean up goroutines to clean branch ENIs up - b.log.Info("failed to find trunk ENI for the node %s", nodeName) + b.log.Info("failed to find trunk ENI for the node", "nodeName", nodeName) return ctrl.Result{}, nil } diff --git a/pkg/resource/introspect.go b/pkg/resource/introspect.go index c34a5cad..0e7f795a 100644 --- a/pkg/resource/introspect.go +++ b/pkg/resource/introspect.go @@ -46,7 +46,7 @@ func (i *IntrospectHandler) Start(_ context.Context) error { mux.HandleFunc(GetResourcesSummaryPath, i.ResourceSummaryHandler) // Should this be a fatal error? - err := http.ListenAndServe(i.BindAddress, mux) + err := http.ListenAndServe(i.BindAddress, mux) // #nosec G114 if err != nil { i.Log.Error(err, "failed to run introspect API") } diff --git a/pkg/utils/errors.go b/pkg/utils/errors.go index 0b89093d..a4458d42 100644 --- a/pkg/utils/errors.go +++ b/pkg/utils/errors.go @@ -23,6 +23,7 @@ var ( ErrInsufficientCidrBlocks = errors.New("InsufficientCidrBlocks: The specified subnet does not have enough free cidr blocks to satisfy the request") ErrMsgProviderAndPoolNotFound = "cannot find the instance provider and pool from the cache" NotRetryErrors = []string{InsufficientCidrBlocksReason} + PauseHealthCheckErrors = []string{"RequestLimitExceeded"} ) // ShouldRetryOnError returns true if the error is retryable, else returns false diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 14fda665..2a17b685 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -21,6 +21,7 @@ import ( vpcresourcesv1beta1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" "github.com/aws/aws-sdk-go/aws/arn" @@ -232,3 +233,17 @@ func GetSourceAcctAndArn(roleARN, region, clusterName string) (string, string, s sourceArn := fmt.Sprintf("arn:%s:eks:%s:%s:cluster/%s", parsedArn.Partition, region, parsedArn.AccountID, clusterName) return parsedArn.AccountID, parsedArn.Partition, sourceArn, nil } + +// PodHasENIRequest will return true if first container of pod spec has request for eni indicating +// it needs trunk interface from vpc-rc +func PodHasENIRequest(pod *corev1.Pod) bool { + if pod == nil { + return false + } + for _, container := range pod.Spec.Containers { + if _, hasEniRequest := container.Resources.Requests[config.ResourceNamePodENI]; hasEniRequest { + return true + } + } + return false +} diff --git a/pkg/utils/helper_test.go b/pkg/utils/helper_test.go index aee3659b..2a603b75 100644 --- a/pkg/utils/helper_test.go +++ b/pkg/utils/helper_test.go @@ -18,9 +18,12 @@ import ( "github.com/samber/lo" "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" vpcresourcesv1beta1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" ) // TestRemoveDuplicatedSg tests if RemoveDuplicatedSg func works as expected. @@ -579,3 +582,100 @@ func TestGetSourceAcctAndArn_NoRegion(t *testing.T) { assert.Equal(t, "", part, "correct partiton should be retrieved") } + +func TestPodHasENIRequest(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + expected bool + }{ + { + name: "Pod with ENI request in first container", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + config.ResourceNamePodENI: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + name: "Pod with multiple containers, no ENI request", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + }, + }, + }, + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + }, + }, + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + }, + expected: false, + }, + { + name: "Pod without ENI request", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + expected: false, + }, + { + name: "Pod with empty containers", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + }, + }, + expected: false, + }, + { + name: "Nil pod", + pod: nil, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := PodHasENIRequest(tt.pod) + if result != tt.expected { + t.Errorf("PodHasENIRequest() = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/utils/set.go b/pkg/utils/set.go index ab7a037e..326ffa6d 100644 --- a/pkg/utils/set.go +++ b/pkg/utils/set.go @@ -13,6 +13,10 @@ package utils +import ( + "github.com/aws/aws-sdk-go/service/ec2" +) + // Difference returns a-b, elements present in a and not in b func Difference[T comparable](a, b []T) (diff []T) { m := make(map[T]struct{}) @@ -35,3 +39,11 @@ func GetKeyValSlice(m map[string]string) (key []string, val []string) { } return } + +func GetTagKeyValueMap(tagSet []*ec2.Tag) map[string]string { + m := make(map[string]string) + for _, tag := range tagSet { + m[*tag.Key] = *tag.Value + } + return m +} diff --git a/scripts/test/create-cluster-karpenter.sh b/scripts/test/create-cluster-karpenter.sh new file mode 100755 index 00000000..07880452 --- /dev/null +++ b/scripts/test/create-cluster-karpenter.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +# Create EKS cluster with Karpenter using eksctl +set -eo pipefail + +SCRIPTS_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) +source "$SCRIPTS_DIR/lib/common.sh" +check_is_installed eksctl +check_is_installed helm +check_is_installed aws + + +export KARPENTER_NAMESPACE="kube-system" +export KARPENTER_VERSION="1.0.1" +export K8S_VERSION="1.30" + +export AWS_PARTITION="aws" # if you are not using standard partitions, you may need to configure to aws-cn / aws-us-gov +export CLUSTER_NAME="${USER}-sgp-scaletest" +export AWS_DEFAULT_REGION="us-west-2" +export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)" +export TEMPOUT="$(mktemp)" + +# Deploy CFN stack to enable Karpenter to create and manage nodes +echo "Deploying Karpenter CFN stack" +curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > "${TEMPOUT}" \ +&& aws cloudformation deploy \ + --stack-name "Karpenter-${CLUSTER_NAME}" \ + --template-file "${TEMPOUT}" \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides "ClusterName=${CLUSTER_NAME}" + +# Create EKS cluster +echo "Creating EKS cluster" +eksctl create cluster -f - < # Update the kube-config path +ginkgo -v --timeout 30m -- --cluster-kubeconfig=$KUBE_CONFIG_PATH --cluster-name=$CLUSTER_NAME --aws-region=$AWS_REGION --aws-vpc-id=$VPC_ID +``` + +##### 3. Delete EKS cluster and other resources. + +The below script uninstalls Karpenter on the clusters, deletes the CFN stack, and finally deletes the EKS cluster. +``` +./scripts/test/delete-cluster-karpenter.sh +``` + +References: +1. Karpenter Getting Started Guide: https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/ \ No newline at end of file diff --git a/test/framework/framework.go b/test/framework/framework.go index f2f65be2..af650606 100644 --- a/test/framework/framework.go +++ b/test/framework/framework.go @@ -17,6 +17,7 @@ import ( eniConfig "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" cninode "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" sgp "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/aws/autoscaling" ec2Manager "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/aws/ec2" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/configmap" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/controller" @@ -42,21 +43,22 @@ import ( ) type Framework struct { - Options Options - K8sClient client.Client - ec2Client *ec2.EC2 - DeploymentManager deployment.Manager - PodManager pod.Manager - EC2Manager *ec2Manager.Manager - SAManager serviceaccount.Manager - NSManager namespace.Manager - SGPManager *sgpManager.Manager - SVCManager service.Manager - JobManager jobs.Manager - NodeManager node.Manager - ControllerManager controller.Manager - RBACManager rbac.Manager - ConfigMapManager configmap.Manager + Options Options + K8sClient client.Client + ec2Client *ec2.EC2 + DeploymentManager deployment.Manager + PodManager pod.Manager + EC2Manager *ec2Manager.Manager + SAManager serviceaccount.Manager + NSManager namespace.Manager + SGPManager *sgpManager.Manager + SVCManager service.Manager + JobManager jobs.Manager + NodeManager node.Manager + ControllerManager controller.Manager + RBACManager rbac.Manager + ConfigMapManager configmap.Manager + AutoScalingManager autoscaling.Manager } func New(options Options) *Framework { @@ -91,20 +93,21 @@ func New(options Options) *Framework { ec2 := ec2.New(sess, &aws.Config{Region: aws.String(options.AWSRegion)}) return &Framework{ - K8sClient: k8sClient, - ec2Client: ec2, - PodManager: pod.NewManager(k8sClient, k8sSchema, config), - DeploymentManager: deployment.NewManager(k8sClient), - EC2Manager: ec2Manager.NewManager(ec2, options.AWSVPCID), - SAManager: serviceaccount.NewManager(k8sClient, config), - NSManager: namespace.NewManager(k8sClient), - SGPManager: sgpManager.NewManager(k8sClient), - SVCManager: service.NewManager(k8sClient), - JobManager: jobs.NewManager(k8sClient), - NodeManager: node.NewManager(k8sClient), - ControllerManager: controller.NewManager(k8sClient), - RBACManager: rbac.NewManager(k8sClient), - ConfigMapManager: configmap.NewManager(k8sClient), - Options: options, + K8sClient: k8sClient, + ec2Client: ec2, + PodManager: pod.NewManager(k8sClient, k8sSchema, config), + DeploymentManager: deployment.NewManager(k8sClient), + EC2Manager: ec2Manager.NewManager(ec2, options.AWSVPCID), + SAManager: serviceaccount.NewManager(k8sClient, config), + NSManager: namespace.NewManager(k8sClient), + SGPManager: sgpManager.NewManager(k8sClient), + SVCManager: service.NewManager(k8sClient), + JobManager: jobs.NewManager(k8sClient), + NodeManager: node.NewManager(k8sClient), + ControllerManager: controller.NewManager(k8sClient), + RBACManager: rbac.NewManager(k8sClient), + ConfigMapManager: configmap.NewManager(k8sClient), + AutoScalingManager: autoscaling.NewManager(sess), + Options: options, } } diff --git a/test/framework/resource/aws/autoscaling/manager.go b/test/framework/resource/aws/autoscaling/manager.go new file mode 100644 index 00000000..4f3f1730 --- /dev/null +++ b/test/framework/resource/aws/autoscaling/manager.go @@ -0,0 +1,64 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package autoscaling + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/autoscaling/autoscalingiface" +) + +type Manager interface { + DescribeAutoScalingGroup(autoScalingGroupName string) ([]*autoscaling.Group, error) + UpdateAutoScalingGroup(asgName string, desiredSize, minSize, maxSize int64) error +} + +type defaultManager struct { + autoscalingiface.AutoScalingAPI +} + +func NewManager(session *session.Session) Manager { + return &defaultManager{ + AutoScalingAPI: autoscaling.New(session), + } +} + +func (d defaultManager) DescribeAutoScalingGroup(autoScalingGroupName string) ([]*autoscaling.Group, error) { + describeAutoScalingGroupIp := &autoscaling.DescribeAutoScalingGroupsInput{ + AutoScalingGroupNames: aws.StringSlice([]string{autoScalingGroupName}), + } + asg, err := d.AutoScalingAPI.DescribeAutoScalingGroups(describeAutoScalingGroupIp) + if err != nil { + return nil, err + } + if len(asg.AutoScalingGroups) == 0 { + return nil, fmt.Errorf("failed to find asg %s", autoScalingGroupName) + } + + return asg.AutoScalingGroups, nil +} + +func (d defaultManager) UpdateAutoScalingGroup(asgName string, desiredSize, minSize, maxSize int64) error { + updateASGInput := &autoscaling.UpdateAutoScalingGroupInput{ + AutoScalingGroupName: aws.String(asgName), + DesiredCapacity: aws.Int64(desiredSize), + MaxSize: aws.Int64(maxSize), + MinSize: aws.Int64(minSize), + } + _, err := d.AutoScalingAPI.UpdateAutoScalingGroup(updateASGInput) + return err +} diff --git a/test/framework/resource/aws/ec2/manager.go b/test/framework/resource/aws/ec2/manager.go index bbfa69d4..be6d0c3a 100644 --- a/test/framework/resource/aws/ec2/manager.go +++ b/test/framework/resource/aws/ec2/manager.go @@ -17,8 +17,8 @@ import ( "context" "fmt" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" @@ -240,3 +240,59 @@ func (d *Manager) GetPrivateIPv4AddressAndPrefix(instanceID string) ([]string, [ return secondaryIPAddresses, ipV4Prefixes, err } + +func (d *Manager) CreateAndAttachNetworkInterface(subnetID, instanceID, instanceType string) (string, error) { + createENIOp, err := d.ec2Client.CreateNetworkInterface(&ec2.CreateNetworkInterfaceInput{ + SubnetId: aws.String(subnetID), + Description: aws.String("VPC-Resource-Controller integration test ENI"), + }) + if err != nil { + return "", err + } + nwInterfaceID := *createENIOp.NetworkInterface.NetworkInterfaceId + // for test just use the max index - 2 (as trunk maybe attached to max index) + indexID := vpc.Limits[instanceType].NetworkCards[0].MaximumNetworkInterfaces - 2 + _, err = d.ec2Client.AttachNetworkInterface(&ec2.AttachNetworkInterfaceInput{ + InstanceId: aws.String(instanceID), + NetworkInterfaceId: aws.String(nwInterfaceID), + DeviceIndex: aws.Int64(indexID), + }) + return nwInterfaceID, err +} + +func (d *Manager) TerminateInstances(instanceID string) error { + _, err := d.ec2Client.TerminateInstances(&ec2.TerminateInstancesInput{ + InstanceIds: []*string{&instanceID}, + }) + return err +} + +func (d *Manager) DescribeNetworkInterface(nwInterfaceID string) error { + _, err := d.ec2Client.DescribeNetworkInterfaces(&ec2.DescribeNetworkInterfacesInput{ + NetworkInterfaceIds: []*string{&nwInterfaceID}, + }) + return err +} +func (d *Manager) DeleteNetworkInterface(nwInterfaceID string) error { + _, err := d.ec2Client.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ + NetworkInterfaceId: aws.String(nwInterfaceID), + }) + return err +} +func (d *Manager) ReCreateSG(securityGroupName string, ctx context.Context) (string, error) { + groupID, err := d.GetSecurityGroupID(securityGroupName) + // If the security group already exists, no error will be returned + // We need to delete the security Group in this case so ingres/egress + // rules from last run don't interfere with the current test + if err == nil { + if err = d.DeleteSecurityGroup(ctx, groupID); err != nil { + return "", err + } + } + // If error is not nil, then the Security Group doesn't exists, we need + // to create new rule + if groupID, err = d.CreateSecurityGroup(securityGroupName); err != nil { + return "", err + } + return groupID, nil +} diff --git a/test/framework/resource/k8s/node/manager.go b/test/framework/resource/k8s/node/manager.go index 6b3f42c9..c69c74c3 100644 --- a/test/framework/resource/k8s/node/manager.go +++ b/test/framework/resource/k8s/node/manager.go @@ -15,6 +15,7 @@ package node import ( "context" + "strings" cninode "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" @@ -32,6 +33,7 @@ type Manager interface { GetNodeList() (*v1.NodeList, error) GetCNINode(node *v1.Node) (*cninode.CNINode, error) GetCNINodeList() (*cninode.CNINodeList, error) + GetInstanceID(node *v1.Node) string } type defaultManager struct { @@ -117,3 +119,11 @@ func (d *defaultManager) GetNodeList() (*v1.NodeList, error) { err := d.k8sClient.List(context.TODO(), list) return list, err } + +func (d *defaultManager) GetInstanceID(node *v1.Node) string { + if node.Spec.ProviderID != "" { + id := strings.Split(node.Spec.ProviderID, "/") + return id[len(id)-1] + } + return "" +} diff --git a/test/framework/resource/k8s/node/wrapper.go b/test/framework/resource/k8s/node/wrapper.go index e9cff281..a486d63a 100644 --- a/test/framework/resource/k8s/node/wrapper.go +++ b/test/framework/resource/k8s/node/wrapper.go @@ -15,30 +15,71 @@ package node import ( "context" + "fmt" + cninode "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" ) -func GetNodeAndWaitTillCapacityPresent(manager Manager, ctx context.Context, os string, expectedResource string) *v1.NodeList { - +func GetNodeAndWaitTillCapacityPresent(manager Manager, os string, expectedResource string) *v1.NodeList { observedNodeList := &v1.NodeList{} var err error - err = wait.Poll(utils.PollIntervalShort, utils.ResourceCreationTimeout, func() (bool, error) { - By("checking nodes have capacity present") - observedNodeList, err = manager.GetNodesWithOS(os) - Expect(err).ToNot(HaveOccurred()) - for _, node := range observedNodeList.Items { - _, found := node.Status.Allocatable[v1.ResourceName(expectedResource)] - if !found { - return false, nil + err = wait.PollUntilContextTimeout(context.Background(), utils.PollIntervalShort, utils.ResourceCreationTimeout, true, + func(ctx context.Context) (bool, error) { + By("checking nodes have capacity present") + observedNodeList, err = manager.GetNodesWithOS(os) + Expect(err).ToNot(HaveOccurred()) + for _, node := range observedNodeList.Items { + _, found := node.Status.Allocatable[v1.ResourceName(expectedResource)] + if !found { + return false, nil + } } - } - return true, nil - }) + return true, nil + }) Expect(err).ToNot(HaveOccurred()) return observedNodeList } + +// VerifyCNINode checks if the number of CNINodes is equal to number of nodes in the cluster, and verifies 1:1 mapping between CNINode and Node objects +// Returns nil if count and 1:1 mapping exists, else returns error +func VerifyCNINode(manager Manager) error { + var cniNodeList *cninode.CNINodeList + var nodeList *v1.NodeList + var err error + By("checking number of CNINodes match number of nodes in the cluster") + err = wait.PollUntilContextTimeout(context.Background(), utils.PollIntervalShort, utils.PollTimeout, true, + func(ctx context.Context) (bool, error) { + if cniNodeList, err = manager.GetCNINodeList(); err != nil { + return false, nil + } + if nodeList, err = manager.GetNodeList(); err != nil { + return false, nil + } + if len(nodeList.Items) != len(cniNodeList.Items) { + return false, nil + } + return true, nil + }) + if err != nil { + return fmt.Errorf("number of CNINodes does not match number of nodes in the cluster") + } + By("checking CNINode list matches node list") + nameMatched := true + for _, node := range nodeList.Items { + if !lo.ContainsBy(cniNodeList.Items, func(cniNode cninode.CNINode) bool { + return cniNode.Name == node.Name + }) { + nameMatched = false + } + } + if !nameMatched { + return fmt.Errorf("CNINode list does not match node list") + } + return nil +} diff --git a/test/framework/utils/poll.go b/test/framework/utils/poll.go index cb343b16..ec62f251 100644 --- a/test/framework/utils/poll.go +++ b/test/framework/utils/poll.go @@ -19,9 +19,10 @@ const ( PollIntervalShort = 2 * time.Second PollIntervalMedium = 10 * time.Second PollIntervalLong = 20 * time.Second + PollTimeout = 30 * time.Second // ResourceCreationTimeout is the number of seconds till the controller waits // for the resource creation to complete - ResourceCreationTimeout = 120 * time.Second + ResourceCreationTimeout = 180 * time.Second // Windows Container Images are much larger in size and pulling them the first // time takes much longer, so have higher timeout for Windows Pod to be Ready WindowsPodsCreationTimeout = 240 * time.Second diff --git a/test/framework/utils/resource.go b/test/framework/utils/resource.go index 6a4e6614..6e7eab43 100644 --- a/test/framework/utils/resource.go +++ b/test/framework/utils/resource.go @@ -15,4 +15,5 @@ package utils const ( ResourceNamePrefix = "vpc-resource-controller-integration-" + TestNameSpace = "test-ns" ) diff --git a/test/integration/cninode/cninode_suite_test.go b/test/integration/cninode/cninode_suite_test.go new file mode 100644 index 00000000..83411fbb --- /dev/null +++ b/test/integration/cninode/cninode_suite_test.go @@ -0,0 +1,50 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cninode_test + +import ( + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/node" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCNINode(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "CNINode Test Suite") +} + +var frameWork *framework.Framework +var _ = BeforeSuite(func() { + By("creating a framework") + frameWork = framework.New(framework.GlobalOptions) + + By("verify at least 2 nodes are available") + nodeList, err := frameWork.NodeManager.GetNodeList() + Expect(err).ToNot(HaveOccurred()) + Expect(len(nodeList.Items)).To(BeNumerically(">", 1)) + + By("verify CNINode count") + err = node.VerifyCNINode(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) +}) + +// Verify CNINode count before and after test remains same +var _ = AfterSuite(func() { + By("verify CNINode count") + err := node.VerifyCNINode(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) +}) diff --git a/test/integration/cninode/cninode_test.go b/test/integration/cninode/cninode_test.go new file mode 100644 index 00000000..8173a348 --- /dev/null +++ b/test/integration/cninode/cninode_test.go @@ -0,0 +1,106 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cninode_test + +import ( + "context" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/node" + testUtils "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/util/wait" +) + +var _ = Describe("[CANARY]CNINode test", func() { + Describe("CNINode count verification on adding or removing node", func() { + var oldDesiredSize int64 + var oldMinSize int64 + var oldMaxSize int64 + var newSize int64 + var asgName string + BeforeEach(func() { + By("getting autoscaling group name") + asgName = ListNodesAndGetAutoScalingGroupName() + asg, err := frameWork.AutoScalingManager.DescribeAutoScalingGroup(asgName) + Expect(err).ToNot(HaveOccurred()) + oldDesiredSize = *asg[0].DesiredCapacity + oldMinSize = *asg[0].MinSize + oldMaxSize = *asg[0].MaxSize + }) + AfterEach(func() { + By("restoring ASG desiredCapacity, minSize, maxSize after test") + err := frameWork.AutoScalingManager.UpdateAutoScalingGroup(asgName, oldDesiredSize, oldMinSize, oldMaxSize) + Expect(err).ToNot(HaveOccurred()) + Expect(WaitTillNodeSizeUpdated(int(oldDesiredSize))).Should(Succeed()) + }) + + Context("when new node is added", func() { + It("it should create new CNINode", func() { + newSize = oldDesiredSize + 1 + // Update ASG to set desiredSize + By("adding new node") + err := frameWork.AutoScalingManager.UpdateAutoScalingGroup(asgName, newSize, oldMinSize, newSize) + Expect(err).ToNot(HaveOccurred()) + Expect(WaitTillNodeSizeUpdated(int(newSize))).Should(Succeed()) + Expect(node.VerifyCNINode(frameWork.NodeManager)).Should(Succeed()) + }) + }) + Context("when existing node is removed", func() { + It("it should delete CNINode", func() { + newSize = oldDesiredSize - 1 + // Update ASG to set new minSize and new maxSize + By("removing existing node") + err := frameWork.AutoScalingManager.UpdateAutoScalingGroup(asgName, newSize, newSize, oldMaxSize) + Expect(err).ToNot(HaveOccurred()) + Expect(WaitTillNodeSizeUpdated(int(newSize))).Should(Succeed()) + Expect(node.VerifyCNINode(frameWork.NodeManager)).Should(Succeed()) + }) + }) + }) +}) + +func ListNodesAndGetAutoScalingGroupName() string { + By("getting instance details") + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeList.Items).ToNot(BeEmpty()) + instanceID := frameWork.NodeManager.GetInstanceID(&nodeList.Items[0]) + Expect(instanceID).ToNot(BeEmpty()) + instance, err := frameWork.EC2Manager.GetInstanceDetails(instanceID) + Expect(err).ToNot(HaveOccurred()) + tags := utils.GetTagKeyValueMap(instance.Tags) + val, ok := tags["aws:autoscaling:groupName"] + Expect(ok).To(BeTrue()) + return val +} + +// Verifies (linux) node size is updated after ASG is updated +func WaitTillNodeSizeUpdated(desiredSize int) error { + By("waiting till node list is updated") + err := wait.PollUntilContextTimeout(context.Background(), testUtils.PollIntervalShort, testUtils.ResourceCreationTimeout, true, + func(ctx context.Context) (bool, error) { + nodes, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) // since we are only updating the linux ASG in the test + if err != nil { + return false, nil + } + if len(nodes.Items) != desiredSize { + return false, nil + } + return true, nil + }) + return err +} diff --git a/test/integration/perpodsg/perpodsg_suite_test.go b/test/integration/perpodsg/perpodsg_suite_test.go index c31c6003..584e6b55 100644 --- a/test/integration/perpodsg/perpodsg_suite_test.go +++ b/test/integration/perpodsg/perpodsg_suite_test.go @@ -47,33 +47,18 @@ var _ = BeforeSuite(func() { ctx = context.Background() verify = verifier.NewPodVerification(frameWork, ctx) - securityGroupID1 = reCreateSGIfAlreadyExists(utils.ResourceNamePrefix + "sg-1") - securityGroupID2 = reCreateSGIfAlreadyExists(utils.ResourceNamePrefix + "sg-2") + securityGroupID1, err = frameWork.EC2Manager.ReCreateSG(utils.ResourceNamePrefix+"sg-1", ctx) + Expect(err).ToNot(HaveOccurred()) + securityGroupID2, err = frameWork.EC2Manager.ReCreateSG(utils.ResourceNamePrefix+"sg-2", ctx) + Expect(err).ToNot(HaveOccurred()) - nodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "linux", + nodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "linux", config.ResourceNamePodENI) + err = node.VerifyCNINode(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) }) var _ = AfterSuite(func() { Expect(frameWork.EC2Manager.DeleteSecurityGroup(ctx, securityGroupID1)).To(Succeed()) Expect(frameWork.EC2Manager.DeleteSecurityGroup(ctx, securityGroupID2)).To(Succeed()) }) - -func reCreateSGIfAlreadyExists(securityGroupName string) string { - groupID, err := frameWork.EC2Manager.GetSecurityGroupID(securityGroupName) - // If the security group already exists, no error will be returned - // We need to delete the security Group in this case so ingres/egress - // rules from last run don't interfere with the current test - if err == nil { - By("deleting the older security group" + groupID) - err = frameWork.EC2Manager.DeleteSecurityGroup(ctx, groupID) - Expect(err).ToNot(HaveOccurred()) - } - // If error is not nil, then the Security Group doesn't exists, we need - // to create new rule - By("creating a new security group with name " + securityGroupName) - groupID, err = frameWork.EC2Manager.CreateSecurityGroup(securityGroupName) - Expect(err).ToNot(HaveOccurred()) - - return groupID -} diff --git a/test/integration/perpodsg/perpodsg_test.go b/test/integration/perpodsg/perpodsg_test.go index 4c34ddf8..ef8f51e4 100644 --- a/test/integration/perpodsg/perpodsg_test.go +++ b/test/integration/perpodsg/perpodsg_test.go @@ -37,29 +37,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -var _ = Describe("CNINode Veification", func() { - Describe("verify CNINode mapping to nodes", func() { - Context("when nodes are ready", func() { - It("should have same number of CNINode no matter which mode", func() { - cniNodes, err := frameWork.NodeManager.GetCNINodeList() - Expect(err).NotTo(HaveOccurred()) - nodes, err := frameWork.NodeManager.GetNodeList() - Expect(err).NotTo(HaveOccurred()) - Expect(len(nodes.Items)).To(Equal(len(cniNodes.Items))) - nameMatched := true - for _, node := range nodes.Items { - if !lo.ContainsBy(cniNodes.Items, func(cniNode cninode.CNINode) bool { - return cniNode.Name == node.Name - }) { - nameMatched = false - } - } - Expect(nameMatched).To(BeTrue()) - }) - }) - }) -}) - var _ = Describe("Branch ENI Pods", func() { var ( securityGroupPolicy *v1beta1.SecurityGroupPolicy diff --git a/test/integration/scale/pod_scale_test.go b/test/integration/scale/pod_scale_test.go new file mode 100644 index 00000000..aff3a4e9 --- /dev/null +++ b/test/integration/scale/pod_scale_test.go @@ -0,0 +1,89 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package scale_test + +import ( + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/manifest" + deploymentWrapper "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/deployment" + sgpWrapper "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/sgp" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/apps/v1" +) + +var _ = Describe("Security group per pod scale test", func() { + var ( + sgpLabelKey string + sgpLabelValue string + securityGroups []string + securityGroupPolicy *v1beta1.SecurityGroupPolicy + err error + ) + + BeforeEach(func() { + sgpLabelKey = "role" + sgpLabelValue = "db" + securityGroups = []string{securityGroupID} + }) + + JustBeforeEach(func() { + // create SGP + securityGroupPolicy, err = manifest.NewSGPBuilder(). + Namespace(namespace). + PodMatchLabel(sgpLabelKey, sgpLabelValue). + SecurityGroup(securityGroups).Build() + Expect(err).NotTo(HaveOccurred()) + }) + + JustAfterEach(func() { + By("deleting security group policy") + err = frameWork.SGPManager.DeleteAndWaitTillSecurityGroupIsDeleted(ctx, securityGroupPolicy) + Expect(err).NotTo(HaveOccurred()) + }) + + Describe("creating deployment", func() { + var deployment *v1.Deployment + + JustBeforeEach(func() { + deployment = manifest.NewDefaultDeploymentBuilder(). + Namespace(namespace). + Replicas(1000). + PodLabel(sgpLabelKey, sgpLabelValue).Build() + }) + + JustAfterEach(func() { + By("deleting the deployment") + err = frameWork.DeploymentManager.DeleteAndWaitUntilDeploymentDeleted(ctx, deployment) + Expect(err).ToNot(HaveOccurred()) + time.Sleep(time.Minute) // allow time for pods to terminate + }) + + Context("when deployment is created", func() { + It("should have all the pods running", MustPassRepeatedly(3), func() { + start := time.Now() + sgpWrapper.CreateSecurityGroupPolicy(frameWork.K8sClient, ctx, securityGroupPolicy) + deploymentWrapper. + CreateAndWaitForDeploymentToStart(frameWork.DeploymentManager, ctx, deployment) + duration := time.Since(start) + verify.VerifyNetworkingOfAllPodUsingENI(namespace, sgpLabelKey, sgpLabelValue, + securityGroups) + Expect(duration.Minutes()).To(BeNumerically("<", 5.5)) + }) + }) + }) + +}) diff --git a/test/integration/scale/scale_suite_test.go b/test/integration/scale/scale_suite_test.go new file mode 100644 index 00000000..4c0e528c --- /dev/null +++ b/test/integration/scale/scale_suite_test.go @@ -0,0 +1,57 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package scale_test + +import ( + "context" + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + verifier "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/verify" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var frameWork *framework.Framework +var verify *verifier.PodVerification +var ctx context.Context +var securityGroupID string +var err error +var namespace = "podsg-scale-" + utils.TestNameSpace + +func TestScale(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Scale Test Suite") +} + +var _ = BeforeSuite(func() { + By("creating a framework") + frameWork = framework.New(framework.GlobalOptions) + ctx = context.Background() + verify = verifier.NewPodVerification(frameWork, ctx) + + // create test namespace + Expect(frameWork.NSManager.CreateNamespace(ctx, namespace)).To(Succeed()) + // create test security group + securityGroupID, err = frameWork.EC2Manager.ReCreateSG(utils.ResourceNamePrefix+"sg", ctx) + Expect(err).ToNot(HaveOccurred()) +}) + +var _ = AfterSuite(func() { + // delete test namespace + Expect(frameWork.NSManager.DeleteAndWaitTillNamespaceDeleted(ctx, namespace)).To(Succeed()) + // delete test security group + Expect(frameWork.EC2Manager.DeleteSecurityGroup(ctx, securityGroupID)).To(Succeed()) +}) diff --git a/test/integration/windows/windows_suite_test.go b/test/integration/windows/windows_suite_test.go index 60b147d1..bebd99eb 100644 --- a/test/integration/windows/windows_suite_test.go +++ b/test/integration/windows/windows_suite_test.go @@ -67,7 +67,7 @@ var _ = BeforeSuite(func() { } By("getting the list of Windows node") - windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "windows", + windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "windows", config.ResourceNameIPAddress) By("getting the instance ID for the first node") diff --git a/test/integration/windows/windows_test.go b/test/integration/windows/windows_test.go index dd96965b..ea85c479 100644 --- a/test/integration/windows/windows_test.go +++ b/test/integration/windows/windows_test.go @@ -228,7 +228,7 @@ var _ = Describe("Windows Integration Test", func() { } JustBeforeEach(func() { - windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "windows", + windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "windows", config.ResourceNameIPAddress) instanceID = manager.GetNodeInstanceID(&windowsNodeList.Items[0]) nodeName = windowsNodeList.Items[0].Name @@ -455,7 +455,7 @@ var _ = Describe("Windows Integration Test", func() { bufferForCoolDown = time.Second * 30 - windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "windows", + windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "windows", config.ResourceNameIPAddress) instanceID = manager.GetNodeInstanceID(&windowsNodeList.Items[0]) nodeName = windowsNodeList.Items[0].Name