diff --git a/PROJECT b/PROJECT index b9b09ae0..22571eb8 100644 --- a/PROJECT +++ b/PROJECT @@ -19,6 +19,7 @@ resources: version: v1beta1 - api: crdVersion: v1 + controller: true domain: k8s.aws group: vpcresources kind: CNINode diff --git a/apis/vpcresources/v1alpha1/cninode_types.go b/apis/vpcresources/v1alpha1/cninode_types.go index 8555f14a..7954673a 100644 --- a/apis/vpcresources/v1alpha1/cninode_types.go +++ b/apis/vpcresources/v1alpha1/cninode_types.go @@ -35,6 +35,8 @@ type Feature struct { // CNINodeSpec defines the desired state of CNINode type CNINodeSpec struct { Features []Feature `json:"features,omitempty"` + // Additional tag key/value added to all network interfaces provisioned by the vpc-resource-controller and VPC-CNI + Tags map[string]string `json:"tags,omitempty"` } // CNINodeStatus defines the managed VPC resources. diff --git a/apis/vpcresources/v1alpha1/zz_generated.deepcopy.go b/apis/vpcresources/v1alpha1/zz_generated.deepcopy.go index 3fbda7f0..30d7a9ed 100644 --- a/apis/vpcresources/v1alpha1/zz_generated.deepcopy.go +++ b/apis/vpcresources/v1alpha1/zz_generated.deepcopy.go @@ -89,6 +89,13 @@ func (in *CNINodeSpec) DeepCopyInto(out *CNINodeSpec) { *out = make([]Feature, len(*in)) copy(*out, *in) } + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNINodeSpec. diff --git a/config/crd/bases/vpcresources.k8s.aws_cninodes.yaml b/config/crd/bases/vpcresources.k8s.aws_cninodes.yaml index 393a50ab..0b4566da 100644 --- a/config/crd/bases/vpcresources.k8s.aws_cninodes.yaml +++ b/config/crd/bases/vpcresources.k8s.aws_cninodes.yaml @@ -56,6 +56,12 @@ spec: type: string type: object type: array + tags: + additionalProperties: + type: string + description: Additional tag key/value added to all network interfaces + provisioned by the vpc-resource-controller and VPC-CNI + type: object type: object status: description: CNINodeStatus defines the managed VPC resources. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index b292d57b..5a87d016 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -61,6 +61,8 @@ rules: - create - get - list + - patch + - update - watch - apiGroups: - vpcresources.k8s.aws diff --git a/controllers/core/node_controller.go b/controllers/core/node_controller.go index 8a1f8b0e..326e53cb 100644 --- a/controllers/core/node_controller.go +++ b/controllers/core/node_controller.go @@ -20,6 +20,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager" @@ -36,15 +37,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/healthz" ) -// MaxNodeConcurrentReconciles is the number of go routines that can invoke -// Reconcile in parallel. Since Node Reconciler, performs local operation -// on cache only a single go routine should be sufficient. Using more than -// one routines to help high rate churn and larger nodes groups restarting -// when the controller has to be restarted for various reasons. -const ( - MaxNodeConcurrentReconciles = 10 -) - // NodeReconciler reconciles a Node object type NodeReconciler struct { client.Client @@ -117,7 +109,7 @@ func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHe return ctrl.NewControllerManagedBy(mgr). For(&corev1.Node{}). - WithOptions(controller.Options{MaxConcurrentReconciles: MaxNodeConcurrentReconciles}). + WithOptions(controller.Options{MaxConcurrentReconciles: config.MaxNodeConcurrentReconciles}). Owns(&v1alpha1.CNINode{}). Complete(r) } diff --git a/controllers/crds/cninode_controller.go b/controllers/crds/cninode_controller.go new file mode 100644 index 00000000..109aa359 --- /dev/null +++ b/controllers/crds/cninode_controller.go @@ -0,0 +1,252 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crds + +import ( + "context" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + prometheusRegistered = false + recreateCNINodeCallCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "recreate_cniNode_call_count", + Help: "The number of requests made by controller to recreate CNINode when node exists", + }, + ) + recreateCNINodeErrCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "recreate_cniNode_err_count", + Help: "The number of requests that failed when controller tried to recreate the CNINode", + }, + ) +) + +func prometheusRegister() { + prometheusRegistered = true + + metrics.Registry.MustRegister( + recreateCNINodeCallCount, + recreateCNINodeErrCount) + + prometheusRegistered = true +} + +// CNINodeReconciler reconciles a CNINode object +type CNINodeReconciler struct { + client.Client + Scheme *runtime.Scheme + Context context.Context + Log logr.Logger + EC2Wrapper ec2API.EC2Wrapper + K8sAPI k8s.K8sWrapper + ClusterName string + VPCID string + FinalizerManager k8s.FinalizerManager +} + +//+kubebuilder:rbac:groups=vpcresources.k8s.aws,resources=cninodes,verbs=get;list;watch;create;update;patch; + +// Reconcile handles CNINode create/update/delete events +// Reconciler will add the finalizer and cluster name tag if it does not exist and finalize on CNINode on deletion to clean up leaked resource on node +func (r *CNINodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + cniNode := &v1alpha1.CNINode{} + if err := r.Client.Get(ctx, req.NamespacedName, cniNode); err != nil { + if errors.IsNotFound(err) { + r.Log.Info("CNINode is deleted", "CNINode", req.NamespacedName) + } + // Ignore not found error + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + nodeFound := true + node := &v1.Node{} + if err := r.Client.Get(ctx, req.NamespacedName, node); err != nil { + if errors.IsNotFound(err) { + nodeFound = false + } else { + r.Log.Error(err, "failed to get the node object in CNINode reconciliation, will retry") + // Requeue request so it can be retried + return ctrl.Result{}, err + } + } + + if cniNode.GetDeletionTimestamp().IsZero() { + shouldPatch := false + cniNodeCopy := cniNode.DeepCopy() + // Add cluster name tag if it does not exist + val, ok := cniNode.Spec.Tags[config.CNINodeClusterNameKey] + if !ok || val != r.ClusterName { + if len(cniNodeCopy.Spec.Tags) != 0 { + cniNodeCopy.Spec.Tags[config.CNINodeClusterNameKey] = r.ClusterName + } else { + cniNodeCopy.Spec.Tags = map[string]string{ + config.CNINodeClusterNameKey: r.ClusterName, + } + } + shouldPatch = true + } + // if node exists, get & add OS label if it does not exist on CNINode + if nodeFound { + nodeLabelOS := node.ObjectMeta.Labels[config.NodeLabelOS] + val, ok = cniNode.ObjectMeta.Labels[config.NodeLabelOS] + if !ok || val != nodeLabelOS { + if len(cniNodeCopy.ObjectMeta.Labels) != 0 { + cniNodeCopy.ObjectMeta.Labels[config.NodeLabelOS] = nodeLabelOS + } else { + cniNodeCopy.ObjectMeta.Labels = map[string]string{ + config.NodeLabelOS: nodeLabelOS, + } + } + shouldPatch = true + } + } + + if shouldPatch { + r.Log.Info("patching CNINode to add required fields Tags and Labels", "cninode", cniNode.Name) + return ctrl.Result{}, r.Client.Patch(ctx, cniNodeCopy, client.MergeFromWithOptions(cniNode, client.MergeFromWithOptimisticLock{})) + } + + // Add finalizer if it does not exist + if err := r.FinalizerManager.AddFinalizers(ctx, cniNode, config.NodeTerminationFinalizer); err != nil { + r.Log.Error(err, "failed to add finalizer on CNINode, will retry", "cniNode", cniNode.Name, "finalizer", config.NodeTerminationFinalizer) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + + } else { // CNINode is marked for deletion + if !nodeFound { + // node is also deleted, proceed with running the cleanup routine and remove the finalizer + + // run cleanup for Linux nodes only + if val, ok := cniNode.ObjectMeta.Labels[config.NodeLabelOS]; ok && val == config.OSLinux { + r.Log.Info("running the finalizer routine on cniNode", "cniNode", cniNode.Name) + cleaner := &cleanup.NodeTerminationCleaner{ + NodeName: cniNode.Name, + } + cleaner.ENICleaner = &cleanup.ENICleaner{ + EC2Wrapper: r.EC2Wrapper, + Manager: cleaner, + VPCID: r.VPCID, + Log: ctrl.Log.WithName("eniCleaner").WithName("node"), + } + + if err := cleaner.DeleteLeakedResources(); err != nil { + r.Log.Error(err, "failed to cleanup resources during node termination, request will be requeued") + // Return err if failed to delete leaked ENIs on node so it can be retried + return ctrl.Result{}, err + } + } + + if err := r.FinalizerManager.RemoveFinalizers(ctx, cniNode, config.NodeTerminationFinalizer); err != nil { + r.Log.Error(err, "failed to remove finalizer on CNINode, will retry", "cniNode", cniNode.Name, "finalizer", config.NodeTerminationFinalizer) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } else { + // node exists, do not run the cleanup routine(periodic cleanup routine will anyway delete leaked ENIs), remove the finalizer + // to proceed with object deletion, and recreate similar object + + // Create a copy without deletion timestamp for creation + newCNINode := &v1alpha1.CNINode{ + ObjectMeta: metav1.ObjectMeta{ + Name: cniNode.Name, + Namespace: "", + OwnerReferences: cniNode.OwnerReferences, + // TODO: should we include finalizers at object creation or let controller patch it on Create/Update event? + Finalizers: cniNode.Finalizers, + }, + Spec: cniNode.Spec, + } + + if err := r.FinalizerManager.RemoveFinalizers(ctx, cniNode, config.NodeTerminationFinalizer); err != nil { + r.Log.Error(err, "failed to remove finalizer on CNINode on node deletion, will retry") + return ctrl.Result{}, err + } + // wait till CNINode is deleted before recreation as the new object will be created with same name to avoid "object already exists" error + if err := r.waitTillCNINodeDeleted(k8s.NamespacedName(newCNINode)); err != nil { + // raise event if CNINode could not be deleted after removing the finalizer + r.K8sAPI.BroadcastEvent(cniNode, utils.CNINodeDeleteFailed, "CNINode deletion failed and object could not be recreated by the vpc-resource-controller, will retry", + v1.EventTypeWarning) + // requeue here to check if CNINode deletion is successful and retry CNINode deletion if node exists + return ctrl.Result{}, err + } + + r.Log.Info("creating CNINode after it has been deleted as node still exists", "cniNode", newCNINode.Name) + recreateCNINodeCallCount.Inc() + if err := r.createCNINodeFromObj(ctx, newCNINode); err != nil { + recreateCNINodeErrCount.Inc() + // raise event on node publish warning that CNINode is deleted and could not be recreated by controller + utils.SendNodeEventWithNodeName(r.K8sAPI, node.Name, utils.CNINodeCreateFailed, + "CNINode was deleted and failed to be recreated by the vpc-resource-controller", v1.EventTypeWarning, r.Log) + // return nil as deleted and we cannot recreate the object now + return ctrl.Result{}, nil + } + r.Log.Info("successfully recreated CNINode", "cniNode", newCNINode.Name) + } + } + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *CNINodeReconciler) SetupWithManager(mgr ctrl.Manager) error { + if !prometheusRegistered { + prometheusRegister() + } + return ctrl.NewControllerManagedBy(mgr). + For(&v1alpha1.CNINode{}). + WithOptions(controller.Options{MaxConcurrentReconciles: config.MaxNodeConcurrentReconciles}). + Complete(r) +} + +// waitTillCNINodeDeleted waits for CNINode to be deleted with timeout and returns error +func (r *CNINodeReconciler) waitTillCNINodeDeleted(nameSpacedCNINode types.NamespacedName) error { + oldCNINode := &v1alpha1.CNINode{} + + return wait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, time.Second*3, true, func(ctx context.Context) (bool, error) { + if err := r.Client.Get(ctx, nameSpacedCNINode, oldCNINode); err != nil && errors.IsNotFound(err) { + return true, nil + } + return false, nil + }) +} + +// createCNINodeFromObj will create CNINode with backoff and returns error if CNINode is not recreated +func (r *CNINodeReconciler) createCNINodeFromObj(ctx context.Context, newCNINode client.Object) error { + return retry.OnError(retry.DefaultBackoff, func(error) bool { return true }, + func() error { + return r.Client.Create(ctx, newCNINode) + }) +} diff --git a/controllers/crds/suite_test.go b/controllers/crds/suite_test.go new file mode 100644 index 00000000..d0d49241 --- /dev/null +++ b/controllers/crds/suite_test.go @@ -0,0 +1,79 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crds + +import ( + "path/filepath" + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + done := make(chan interface{}) + go func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: false, + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = v1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + }() + Eventually(done, 60).Should(BeClosed()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/go.mod b/go.mod index 1c748137..ff7849cf 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/google/go-cmp v0.6.0 // indirect + github.com/google/go-cmp v0.6.0 github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20230323073829-e72429f035bd // indirect github.com/imdario/mergo v0.3.13 // indirect diff --git a/main.go b/main.go index 5e15688c..83043cb8 100644 --- a/main.go +++ b/main.go @@ -26,8 +26,10 @@ import ( vpcresourcesv1beta1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/apps" corecontroller "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/core" + crdcontroller "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/crds" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/api" ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + eniCleaner "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" @@ -317,7 +319,7 @@ func main() { nodeManagerWorkers := asyncWorkers.NewDefaultWorkerPool("node async workers", 10, 1, ctrl.Log.WithName("node async workers"), ctx) nodeManager, err := manager.NewNodeManager(ctrl.Log.WithName("node manager"), resourceManager, - apiWrapper, nodeManagerWorkers, controllerConditions, version.GitVersion, healthzHandler) + apiWrapper, nodeManagerWorkers, controllerConditions, clusterName, version.GitVersion, healthzHandler) if err != nil { ctrl.Log.Error(err, "failed to init node manager") @@ -338,12 +340,17 @@ func main() { os.Exit(1) } - if err := (&ec2API.ENICleaner{ - EC2Wrapper: ec2Wrapper, + cleaner := &eniCleaner.ClusterENICleaner{ ClusterName: clusterName, - Log: ctrl.Log.WithName("eni cleaner"), - VPCID: vpcID, - }).SetupWithManager(ctx, mgr, healthzHandler); err != nil { + } + cleaner.ENICleaner = &eniCleaner.ENICleaner{ + EC2Wrapper: ec2Wrapper, + Manager: cleaner, + VPCID: vpcID, + Log: ctrl.Log.WithName("eniCleaner").WithName("cluster"), + } + + if err := cleaner.SetupWithManager(ctx, mgr, healthzHandler); err != nil { setupLog.Error(err, "unable to start eni cleaner") os.Exit(1) } @@ -393,6 +400,21 @@ func main() { os.Exit(1) } + finalizerManager := k8s.NewDefaultFinalizerManager(mgr.GetClient(), ctrl.Log.WithName("finalizer manager")) + if err = (&crdcontroller.CNINodeReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Context: ctx, + Log: ctrl.Log.WithName("controllers").WithName("CNINode"), + EC2Wrapper: ec2Wrapper, + K8sAPI: k8sApi, + ClusterName: clusterName, + VPCID: vpcID, + FinalizerManager: finalizerManager, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "CNINode") + os.Exit(1) + } // +kubebuilder:scaffold:builder setupLog.Info("setting up webhook server") webhookServer := mgr.GetWebhookServer() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go index 19f7e104..6bd94eb3 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go @@ -195,6 +195,20 @@ func (mr *MockEC2APIHelperMockRecorder) DetachNetworkInterfaceFromInstance(arg0 return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterfaceFromInstance", reflect.TypeOf((*MockEC2APIHelper)(nil).DetachNetworkInterfaceFromInstance), arg0) } +// DisassociateTrunkInterface mocks base method. +func (m *MockEC2APIHelper) DisassociateTrunkInterface(arg0 *string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DisassociateTrunkInterface", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// DisassociateTrunkInterface indicates an expected call of DisassociateTrunkInterface. +func (mr *MockEC2APIHelperMockRecorder) DisassociateTrunkInterface(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DisassociateTrunkInterface", reflect.TypeOf((*MockEC2APIHelper)(nil).DisassociateTrunkInterface), arg0) +} + // GetBranchNetworkInterface mocks base method. func (m *MockEC2APIHelper) GetBranchNetworkInterface(arg0, arg1 *string) ([]*ec2.NetworkInterface, error) { m.ctrl.T.Helper() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go index f40d94c6..53515c5d 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go @@ -182,6 +182,21 @@ func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfaces(arg0 interface{} return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfaces", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfaces), arg0) } +// DescribeNetworkInterfacesPages mocks base method. +func (m *MockEC2Wrapper) DescribeNetworkInterfacesPages(arg0 *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DescribeNetworkInterfacesPages", arg0) + ret0, _ := ret[0].([]*ec2.NetworkInterface) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DescribeNetworkInterfacesPages indicates an expected call of DescribeNetworkInterfacesPages. +func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfacesPages(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfacesPages", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfacesPages), arg0) +} + // DescribeSubnets mocks base method. func (m *MockEC2Wrapper) DescribeSubnets(arg0 *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) { m.ctrl.T.Helper() @@ -227,6 +242,20 @@ func (mr *MockEC2WrapperMockRecorder) DetachNetworkInterface(arg0 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DetachNetworkInterface), arg0) } +// DisassociateTrunkInterface mocks base method. +func (m *MockEC2Wrapper) DisassociateTrunkInterface(arg0 *ec2.DisassociateTrunkInterfaceInput) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DisassociateTrunkInterface", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// DisassociateTrunkInterface indicates an expected call of DisassociateTrunkInterface. +func (mr *MockEC2WrapperMockRecorder) DisassociateTrunkInterface(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DisassociateTrunkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DisassociateTrunkInterface), arg0) +} + // ModifyNetworkInterfaceAttribute mocks base method. func (m *MockEC2Wrapper) ModifyNetworkInterfaceAttribute(arg0 *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) { m.ctrl.T.Helper() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_k8swrapper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_k8swrapper.go index 9b376ede..93e151d5 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_k8swrapper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_k8swrapper.go @@ -96,17 +96,17 @@ func (mr *MockK8sWrapperMockRecorder) BroadcastEvent(arg0, arg1, arg2, arg3 inte } // CreateCNINode mocks base method. -func (m *MockK8sWrapper) CreateCNINode(arg0 *v10.Node) error { +func (m *MockK8sWrapper) CreateCNINode(arg0 *v10.Node, arg1 string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateCNINode", arg0) + ret := m.ctrl.Call(m, "CreateCNINode", arg0, arg1) ret0, _ := ret[0].(error) return ret0 } // CreateCNINode indicates an expected call of CreateCNINode. -func (mr *MockK8sWrapperMockRecorder) CreateCNINode(arg0 interface{}) *gomock.Call { +func (mr *MockK8sWrapperMockRecorder) CreateCNINode(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateCNINode", reflect.TypeOf((*MockK8sWrapper)(nil).CreateCNINode), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateCNINode", reflect.TypeOf((*MockK8sWrapper)(nil).CreateCNINode), arg0, arg1) } // GetCNINode mocks base method. diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go index ac4b1c73..59aae85c 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go @@ -64,18 +64,6 @@ func (mr *MockTrunkENIMockRecorder) CreateAndAssociateBranchENIs(arg0, arg1, arg return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateAndAssociateBranchENIs", reflect.TypeOf((*MockTrunkENI)(nil).CreateAndAssociateBranchENIs), arg0, arg1, arg2) } -// DeleteAllBranchENIs mocks base method. -func (m *MockTrunkENI) DeleteAllBranchENIs() { - m.ctrl.T.Helper() - m.ctrl.Call(m, "DeleteAllBranchENIs") -} - -// DeleteAllBranchENIs indicates an expected call of DeleteAllBranchENIs. -func (mr *MockTrunkENIMockRecorder) DeleteAllBranchENIs() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAllBranchENIs", reflect.TypeOf((*MockTrunkENI)(nil).DeleteAllBranchENIs)) -} - // DeleteCooledDownENIs mocks base method. func (m *MockTrunkENI) DeleteCooledDownENIs() { m.ctrl.T.Helper() diff --git a/pkg/aws/ec2/api/cleanup/eni_cleanup.go b/pkg/aws/ec2/api/cleanup/eni_cleanup.go new file mode 100644 index 00000000..edc58558 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/eni_cleanup.go @@ -0,0 +1,221 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + + ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/go-logr/logr" + kerrors "k8s.io/apimachinery/pkg/util/errors" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" +) + +// NetworkInterfaceManager interface allows to define the ENI filters and checks if ENI should be deleted for different callers like in the periodic cleanup routine or +// during node termination +type NetworkInterfaceManager interface { + GetENITagFilters() []*ec2.Filter + ShouldDeleteENI(eniID *string) bool + UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) + UpdateCleanupMetrics(vpcrcAvailableCount int, vpccniAvailableCount int, leakedENICount int) +} + +type ENICleaner struct { + EC2Wrapper api.EC2Wrapper + Manager NetworkInterfaceManager + VPCID string + Log logr.Logger +} + +// common filters for describing network interfaces +var CommonNetworkInterfaceFilters = []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, +} + +// ClusterENICleaner periodically deletes leaked network interfaces(provisioned by the controller or VPC-CNI) in the cluster +type ClusterENICleaner struct { + ClusterName string + shutdown bool + ctx context.Context + availableENIs map[string]struct{} + *ENICleaner +} + +func (e *ClusterENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { + e.ctx = ctx + e.availableENIs = make(map[string]struct{}) + healthzHandler.AddControllersHealthCheckers( + map[string]healthz.Checker{ + "health-interface-cleaner": rcHealthz.SimplePing("interface cleanup", e.Log), + }, + ) + + return mgr.Add(e) +} + +// StartENICleaner starts the ENI Cleaner routine that cleans up dangling ENIs created by the controller +func (e *ClusterENICleaner) Start(ctx context.Context) error { + e.Log.Info("starting eni clean up routine") + + // Start routine to listen for shut down signal, on receiving the signal it set shutdown to true + go func() { + <-ctx.Done() + e.shutdown = true + }() + // Perform ENI cleanup after fixed time intervals till shut down variable is set to true on receiving the shutdown + // signal + for !e.shutdown { + e.DeleteLeakedResources() + time.Sleep(config.ENICleanUpInterval) + } + + return nil +} + +// DeleteLeakedResources describes all the network interfaces in available status that are created by the controller or VPC-CNI +// This is called by periodically by ClusterENICleaner which deletes available ENIs cluster-wide, and by the NodeTermination cleaner on node termination +// The available ENIs are deleted if ShouldDeleteENI is true, defined in the respective cleaners +// The function also updates metrics for the periodic cleanup routine and the node termination cleanup +func (e *ENICleaner) DeleteLeakedResources() error { + var errors []error + availableENIs := make(map[string]struct{}) + vpcrcAvailableCount := 0 + vpccniAvailableCount := 0 + leakedENICount := 0 + + filters := CommonNetworkInterfaceFilters + // Append the VPC-ID deep filter for the paginated call + filters = append(filters, []*ec2.Filter{ + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(e.VPCID)}, + }, + }...) + // get cleaner specific filters + filters = append(filters, e.Manager.GetENITagFilters()...) + describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ + Filters: filters, + } + for { + describeNetworkInterfaceOp, err := e.EC2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfaceIp) + if err != nil { + e.Log.Error(err, "failed to describe network interfaces, cleanup will be retried in next cycle") + return err + } + for _, nwInterface := range describeNetworkInterfaceOp.NetworkInterfaces { + if e.Manager.ShouldDeleteENI(nwInterface.NetworkInterfaceId) { + tagMap := utils.GetTagKeyValueMap(nwInterface.TagSet) + if val, ok := tagMap[config.NetworkInterfaceOwnerTagKey]; ok { + // Increment promethues metrics for number of leaked ENIs cleaned up + switch val { + case config.NetworkInterfaceOwnerTagValue: + vpcrcAvailableCount += 1 + case config.NetworkInterfaceOwnerVPCCNITagValue: + vpccniAvailableCount += 1 + default: + // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found + e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *nwInterface.NetworkInterfaceId) + continue + } + } + _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + }) + if err != nil { + if !strings.Contains(err.Error(), ec2Errors.NotFoundInterfaceID) { // ignore InvalidNetworkInterfaceID.NotFound error + // append err and continue, we will retry deletion in the next period/reconcile + leakedENICount += 1 + errors = append(errors, fmt.Errorf("failed to delete leaked network interface %v:%v", *nwInterface.NetworkInterfaceId, err)) + e.Log.Error(err, "failed to delete the leaked network interface", + "id", *nwInterface.NetworkInterfaceId) + } + continue + } + e.Log.Info("deleted leaked ENI successfully", "eni id", nwInterface.NetworkInterfaceId) + } else { + // Seeing the ENI for the first time, add it to the new list of available network interfaces + availableENIs[*nwInterface.NetworkInterfaceId] = struct{}{} + e.Log.Info("adding eni to to the map of available ENIs, will be removed if present in "+ + "next run too", "id", *nwInterface.NetworkInterfaceId) + } + } + + if describeNetworkInterfaceOp.NextToken == nil { + break + } + describeNetworkInterfaceIp.NextToken = describeNetworkInterfaceOp.NextToken + } + e.Manager.UpdateCleanupMetrics(vpcrcAvailableCount, vpccniAvailableCount, leakedENICount) + e.Manager.UpdateAvailableENIsIfNeeded(&availableENIs) + return kerrors.NewAggregate(errors) +} + +func (e *ClusterENICleaner) GetENITagFilters() []*ec2.Filter { + clusterNameTagKey := fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) + return []*ec2.Filter{ + { + Name: aws.String("tag:" + clusterNameTagKey), + Values: []*string{aws.String(config.ClusterNameTagValue)}, + }, + } +} + +// ShouldDeleteENI returns true if the ENI should be deleted. +func (e *ClusterENICleaner) ShouldDeleteENI(eniID *string) bool { + if _, exists := e.availableENIs[*eniID]; exists { + return true + } + return false +} + +// Set the available ENIs to the list of ENIs seen in the current cycle +// This adds ENIs that should not be deleted in the current cleanup cycle to the internal cache so it can be deleted in next cycle +// This prevents the clean up routine to remove ENIs that are created by another routines and are yet not attached to +// an instance or associated with a trunk interface in the periodic cleanup routine + +// Example +// 1st cycle, Describe Available NetworkInterface Result - Interface 1, Interface 2, Interface 3 +// 2nd cycle, Describe Available NetworkInterface Result - Interface 2, Interface 3 +// In the second cycle we can conclude that Interface 2 and 3 are leaked because they have been sitting for the time +// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was +// created but not attached at the the time when 1st cycle ran and hence it should not be deleted. +func (e *ClusterENICleaner) UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) { + e.availableENIs = *eniMap +} + +// Update cluster cleanup metrics for the current cleanup cycle +func (e *ClusterENICleaner) UpdateCleanupMetrics(vpcrcAvailableCount int, vpccniAvailableCount int, leakedENICount int) { + api.VpcRcAvailableClusterENICnt.Set(float64(vpcrcAvailableCount)) + api.VpcCniAvailableClusterENICnt.Set(float64(vpccniAvailableCount)) + api.LeakedENIClusterCleanupCnt.Set(float64(leakedENICount)) +} diff --git a/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go b/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go new file mode 100644 index 00000000..5f382b92 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go @@ -0,0 +1,246 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "context" + "fmt" + "reflect" + "testing" + + mock_api "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" +) + +var ( + mockClusterName = "cluster-name" + mockNodeName = "node-name" + mockClusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, mockClusterName) + + mockNetworkInterfaceId1 = "eni-000000000000000" + mockNetworkInterfaceId2 = "eni-000000000000001" + mockNetworkInterfaceId3 = "eni-000000000000002" + + mockVPCID = "vpc-0000000000000000" + + mockClusterTagInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVPCID)}, + }, + { + Name: aws.String("tag:" + mockClusterNameTagKey), + Values: []*string{aws.String(config.ClusterNameTagValue)}, + }, + }, + } + + mockNodenameTagInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVPCID)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceNodenameKey), + Values: []*string{aws.String(mockNodeName)}, + }, + }, + } + + mockDescribeInterfaceOpWith1And2 = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1}, + {NetworkInterfaceId: &mockNetworkInterfaceId2}, + }, + } + + mockDescribeInterfaceOpWith1And3 = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1}, + {NetworkInterfaceId: &mockNetworkInterfaceId3}, + }, + } +) + +func getMockClusterENICleaner(ctrl *gomock.Controller) (*ClusterENICleaner, *mock_api.MockEC2Wrapper) { + mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) + mockclusterENICleaner := ClusterENICleaner{ + availableENIs: map[string]struct{}{}, + ctx: context.Background(), + ClusterName: mockClusterName, + } + mockclusterENICleaner.ENICleaner = &ENICleaner{ + EC2Wrapper: mockEC2Wrapper, + Manager: &mockclusterENICleaner, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + VPCID: mockVPCID, + } + return &mockclusterENICleaner, mockEC2Wrapper +} + +func TestENICleaner_DeleteLeakedResources(t *testing.T) { + type fields struct { + mockEC2Wrapper *mock_api.MockEC2Wrapper + clusterENICleaner *ClusterENICleaner + } + testENICleaner_DeleteLeakedResources := []struct { + name string + getENICleaner func() (*ENICleaner, *ClusterENICleaner) + prepare func(f *fields) + assertFirstCall func(f *fields) + assertSecondCall func(f *fields) + }{ + { + name: "ClusterENICleaner, verifies leaked ENIs are deleted in the periodic cleanup routine and availableENI is updated", + getENICleaner: func() (*ENICleaner, *ClusterENICleaner) { + mockClusterENICleaner := &ClusterENICleaner{ + ClusterName: mockClusterName, + ctx: context.Background(), + availableENIs: map[string]struct{}{}, + } + mockClusterENICleaner.ENICleaner = &ENICleaner{ + Manager: mockClusterENICleaner, + VPCID: mockVPCID, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + } + return mockClusterENICleaner.ENICleaner, mockClusterENICleaner + }, + prepare: func(f *fields) { + gomock.InOrder( + // Return network interface 1 and 2 in first cycle + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockClusterTagInput). + Return(mockDescribeInterfaceOpWith1And2, nil), + // Return network interface 1 and 3 in the second cycle + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockClusterTagInput). + Return(mockDescribeInterfaceOpWith1And3, nil), + // Expect to delete the network interface 1 + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + ) + + }, + assertFirstCall: func(f *fields) { + // After first call, network interface 1 and 2 should be added to the map of available ENIs + assert.True(t, reflect.DeepEqual( + map[string]struct{}{mockNetworkInterfaceId1: {}, mockNetworkInterfaceId2: {}}, f.clusterENICleaner.availableENIs)) + + }, + assertSecondCall: func(f *fields) { + // After second call, network interface 1 should be deleted and network interface 3 added to list + assert.True(t, reflect.DeepEqual( + map[string]struct{}{mockNetworkInterfaceId3: {}}, f.clusterENICleaner.availableENIs)) + }, + }, + { + name: "NodeTerminationENICleaner, verifies ENIs are deleted on node termination", + getENICleaner: func() (*ENICleaner, *ClusterENICleaner) { + mocknodeCleaner := &NodeTerminationCleaner{ + NodeName: mockNodeName, + } + mocknodeCleaner.ENICleaner = &ENICleaner{ + Manager: mocknodeCleaner, + VPCID: mockVPCID, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + } + return mocknodeCleaner.ENICleaner, nil + }, + prepare: func(f *fields) { + gomock.InOrder( + + // Return network interface 1 and 2 in first cycle, expect to call delete on both + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockNodenameTagInput). + Return(mockDescribeInterfaceOpWith1And2, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId2}).Return(nil, nil), + // Return network interface 1 and 3 in the second cycle, again expect to call delete on both + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockNodenameTagInput). + Return(mockDescribeInterfaceOpWith1And3, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId3}).Return(nil, nil), + ) + }, + }, + } + + for _, tt := range testENICleaner_DeleteLeakedResources { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) + var mockENICleaner *ENICleaner + var mockClusterENICleaner *ClusterENICleaner + if tt.getENICleaner != nil { + mockENICleaner, mockClusterENICleaner = tt.getENICleaner() + } + mockENICleaner.EC2Wrapper = mockEC2Wrapper + f := fields{ + mockEC2Wrapper: mockEC2Wrapper, + clusterENICleaner: mockClusterENICleaner, + } + if tt.prepare != nil { + tt.prepare(&f) + } + + err := mockENICleaner.DeleteLeakedResources() + assert.NoError(t, err) + if tt.assertFirstCall != nil { + tt.assertFirstCall(&f) + } + + err = mockENICleaner.DeleteLeakedResources() + assert.NoError(t, err) + if tt.assertSecondCall != nil { + tt.assertSecondCall(&f) + } + } +} + +// TestENICleaner_StartENICleaner_Shutdown tests that ENICleaner would not start if shutdown is set to true. +func TestENICleaner_StartENICleaner_Shutdown(t *testing.T) { + ctrl := gomock.NewController(t) + eniCleaner, _ := getMockClusterENICleaner(ctrl) + + eniCleaner.shutdown = true + + eniCleaner.Start(context.TODO()) +} diff --git a/pkg/aws/ec2/api/cleanup/node_cleanup.go b/pkg/aws/ec2/api/cleanup/node_cleanup.go new file mode 100644 index 00000000..12c7c0ac --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/node_cleanup.go @@ -0,0 +1,50 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" +) + +// NodeTerminationCleanerto handle resource cleanup at node termination +type NodeTerminationCleaner struct { + NodeName string + *ENICleaner +} + +func (n *NodeTerminationCleaner) GetENITagFilters() []*ec2.Filter { + return []*ec2.Filter{ + { + Name: aws.String("tag:" + config.NetworkInterfaceNodenameKey), + Values: []*string{aws.String(n.NodeName)}, + }, + } +} + +// Return true. As the node is terminating all available ENIs need to be deleted +func (n *NodeTerminationCleaner) ShouldDeleteENI(eniID *string) bool { + return true +} + +func (n *NodeTerminationCleaner) UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) { + // Nothing to do for the node termination cleaner + return +} + +// Updating node termination metrics does not make much sense as it will be updated on each node deletion and does not give us much info +func (n *NodeTerminationCleaner) UpdateCleanupMetrics(vpcrcAvailableCount int, vpccniAvailableCount int, leakedENICount int) { + return +} diff --git a/pkg/aws/ec2/api/cleanup/resource_cleaner.go b/pkg/aws/ec2/api/cleanup/resource_cleaner.go new file mode 100644 index 00000000..d5c7e6d1 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/resource_cleaner.go @@ -0,0 +1,19 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +// ResourceCleaner interface should be implemented by components that need to delete leaked AWS resources +type ResourceCleaner interface { + DeleteLeakedResources() error +} diff --git a/pkg/aws/ec2/api/eni_cleanup.go b/pkg/aws/ec2/api/eni_cleanup.go deleted file mode 100644 index 583529a8..00000000 --- a/pkg/aws/ec2/api/eni_cleanup.go +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package api - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" - rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" - "github.com/prometheus/client_golang/prometheus" - "golang.org/x/exp/slices" - - ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/go-logr/logr" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/healthz" -) - -type ENICleaner struct { - EC2Wrapper EC2Wrapper - ClusterName string - Log logr.Logger - VPCID string - - availableENIs map[string]struct{} - shutdown bool - clusterNameTagKey string - ctx context.Context -} - -var ( - vpccniAvailableENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "vpc_cni_created_available_eni_count", - Help: "The number of available ENIs created by VPC-CNI that controller will try to delete in each cleanup cycle", - }, - ) - vpcrcAvailableENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "vpc_rc_created_available_eni_count", - Help: "The number of available ENIs created by VPC-RC that controller will try to delete in each cleanup cycle", - }, - ) - leakedENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "leaked_eni_count", - Help: "The number of available ENIs that failed to be deleted by the controller in each cleanup cycle", - }, - ) -) - -func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { - e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) - e.availableENIs = make(map[string]struct{}) - e.ctx = ctx - - healthzHandler.AddControllersHealthCheckers( - map[string]healthz.Checker{ - "health-interface-cleaner": rcHealthz.SimplePing("interface cleanup", e.Log), - }, - ) - - return mgr.Add(e) -} - -// StartENICleaner starts the ENI Cleaner routine that cleans up dangling ENIs created by the controller -func (e *ENICleaner) Start(ctx context.Context) error { - e.Log.Info("starting eni clean up routine") - // Start routine to listen for shut down signal, on receiving the signal it set shutdown to true - go func() { - <-ctx.Done() - e.shutdown = true - }() - // Perform ENI cleanup after fixed time intervals till shut down variable is set to true on receiving the shutdown - // signal - for !e.shutdown { - e.cleanUpAvailableENIs() - time.Sleep(config.ENICleanUpInterval) - } - - return nil -} - -// cleanUpAvailableENIs describes all the network interfaces in available status that are created by the controller, -// on seeing the a network interface for the first time, it is added to the map of available network interfaces, on -// seeing the network interface for the second time the network interface is deleted. This ensures that we are deleting -// the network interfaces that have been in available for upto the time interval between running the clean up routine. -// This prevents the clean up routine to remove ENIs that are created by another routines and are yet not attached to -// an instance or associated with a trunk interface -// Example, -// 1st cycle, Describe Available NetworkInterface Result - Interface 1, Interface 2, Interface 3 -// 2nd cycle, Describe Available NetworkInterface Result - Interface 2, Interface 3 -// In the second cycle we can conclude that Interface 2 and 3 are leaked because they have been sitting for the time -// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was -// created but not attached at the the time when 1st cycle ran and hence it should not be deleted. -func (e *ENICleaner) cleanUpAvailableENIs() { - vpcrcAvailableCount := 0 - vpccniAvailableCount := 0 - leakedENICount := 0 - - describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("status"), - Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, - }, - { - Name: aws.String("tag:" + e.clusterNameTagKey), - Values: []*string{aws.String(config.ClusterNameTagValue)}, - }, - { - Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), - Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, - config.NetworkInterfaceOwnerVPCCNITagValue}), - }, - { - Name: aws.String("vpc-id"), - Values: []*string{aws.String(e.VPCID)}, - }, - }, - } - - availableENIs := make(map[string]struct{}) - - for { - describeNetworkInterfaceOp, err := e.EC2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfaceIp) - if err != nil { - e.Log.Error(err, "failed to describe network interfaces, will retry") - return - } - - for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces { - if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists { - // Increment promethues metrics for number of leaked ENIs cleaned up - if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool { - return *tag.Key == config.NetworkInterfaceOwnerTagKey - }); tagIdx != -1 { - switch *networkInterface.TagSet[tagIdx].Value { - case config.NetworkInterfaceOwnerTagValue: - vpcrcAvailableCount += 1 - case config.NetworkInterfaceOwnerVPCCNITagValue: - vpccniAvailableCount += 1 - default: - // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found - e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *networkInterface.NetworkInterfaceId) - continue - } - } - - // The ENI in available state has been sitting for at least the eni clean up interval and it should - // be removed - _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ - NetworkInterfaceId: networkInterface.NetworkInterfaceId, - }) - if err != nil { - if !strings.Contains(err.Error(), ec2Errors.NotFoundInterfaceID) { // ignore InvalidNetworkInterfaceID.NotFound error - // append err and continue, we will retry deletion in the next period/reconcile - leakedENICount += 1 - - e.Log.Error(err, "failed to delete the dangling network interface", - "id", *networkInterface.NetworkInterfaceId) - } - continue - } - e.Log.Info("deleted dangling ENI successfully", - "eni id", networkInterface.NetworkInterfaceId) - } else { - // Seeing the ENI for the first time, add it to the new list of available network interfaces - availableENIs[*networkInterface.NetworkInterfaceId] = struct{}{} - e.Log.V(1).Info("adding eni to to the map of available ENIs, will be removed if present in "+ - "next run too", "id", *networkInterface.NetworkInterfaceId) - } - } - - if describeNetworkInterfaceOp.NextToken == nil { - break - } - - describeNetworkInterfaceIp.NextToken = describeNetworkInterfaceOp.NextToken - } - - // Update leaked ENI metrics - vpcrcAvailableENICnt.Set(float64(vpcrcAvailableCount)) - vpccniAvailableENICnt.Set(float64(vpccniAvailableCount)) - leakedENICnt.Set(float64(leakedENICount)) - // Set the available ENIs to the list of ENIs seen in the current cycle - e.availableENIs = availableENIs -} diff --git a/pkg/aws/ec2/api/eni_cleanup_test.go b/pkg/aws/ec2/api/eni_cleanup_test.go deleted file mode 100644 index e00127c0..00000000 --- a/pkg/aws/ec2/api/eni_cleanup_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package api - -import ( - "context" - "fmt" - "reflect" - "testing" - - mock_api "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api" - "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/assert" - "sigs.k8s.io/controller-runtime/pkg/log/zap" -) - -var ( - mockClusterName = "cluster-name" - mockClusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, mockClusterName) - - mockNetworkInterfaceId1 = "eni-000000000000000" - mockNetworkInterfaceId2 = "eni-000000000000001" - mockNetworkInterfaceId3 = "eni-000000000000002" - - mockVPCID = "vpc-0000000000000000" - - mockDescribeNetworkInterfaceIp = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("status"), - Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, - }, - { - Name: aws.String("tag:" + mockClusterNameTagKey), - Values: []*string{aws.String(config.ClusterNameTagValue)}, - }, - { - Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), - Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, - config.NetworkInterfaceOwnerVPCCNITagValue}), - }, - { - Name: aws.String("vpc-id"), - Values: []*string{aws.String(mockVPCID)}, - }, - }, - } - mockDescribeInterfaceOpWith1And2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId2}, - }, - } - mockDescribeInterfaceOpWith1And3 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId3}, - }, - } -) - -func getMockENICleaner(ctrl *gomock.Controller) (*ENICleaner, *mock_api.MockEC2Wrapper) { - mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) - return &ENICleaner{ - EC2Wrapper: mockEC2Wrapper, - availableENIs: map[string]struct{}{}, - Log: zap.New(zap.UseDevMode(true)), - VPCID: mockVPCID, - clusterNameTagKey: mockClusterNameTagKey, - ctx: context.Background(), - }, mockEC2Wrapper -} - -func TestENICleaner_cleanUpAvailableENIs(t *testing.T) { - ctrl := gomock.NewController(t) - eniCleaner, mockWrapper := getMockENICleaner(ctrl) - - gomock.InOrder( - // Return network interface 1 and 2 in first cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). - Return(mockDescribeInterfaceOpWith1And2, nil), - // Return network interface 1 and 3 in the second cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). - Return(mockDescribeInterfaceOpWith1And3, nil), - // Expect to delete the network interface 1 - mockWrapper.EXPECT().DeleteNetworkInterface( - &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), - ) - - // Run 1st cycle, network interface 1 and 2 should be added to the map of available ENIs - eniCleaner.cleanUpAvailableENIs() - assert.True(t, reflect.DeepEqual( - map[string]struct{}{mockNetworkInterfaceId1: {}, mockNetworkInterfaceId2: {}}, eniCleaner.availableENIs)) - - // Run the second cycle, this time network interface 1 should be deleted and network interface 3 added to list - eniCleaner.cleanUpAvailableENIs() - assert.True(t, reflect.DeepEqual( - map[string]struct{}{mockNetworkInterfaceId3: {}}, eniCleaner.availableENIs)) -} - -// TestENICleaner_StartENICleaner_Shutdown tests that ENICleaner would not start if shutdown is set to true. -func TestENICleaner_StartENICleaner_Shutdown(t *testing.T) { - ctrl := gomock.NewController(t) - eniCleaner, _ := getMockENICleaner(ctrl) - - eniCleaner.shutdown = true - - eniCleaner.Start(context.TODO()) -} diff --git a/pkg/aws/ec2/api/helper.go b/pkg/aws/ec2/api/helper.go index 14a7864f..17ff32eb 100644 --- a/pkg/aws/ec2/api/helper.go +++ b/pkg/aws/ec2/api/helper.go @@ -93,6 +93,7 @@ type EC2APIHelper interface { GetInstanceDetails(instanceId *string) (*ec2.Instance, error) AssignIPv4ResourcesAndWaitTillReady(eniID string, resourceType config.ResourceType, count int) ([]string, error) UnassignIPv4Resources(eniID string, resourceType config.ResourceType, resources []string) error + DisassociateTrunkInterface(associationID *string) error } // CreateNetworkInterface creates a new network interface @@ -603,7 +604,6 @@ func (h *ec2APIHelper) GetBranchNetworkInterface(trunkID, subnetID *string) ([]* describeNetworkInterfacesInput.NextToken = describeNetworkInterfaceOutput.NextToken } - return nwInterfaces, nil } @@ -623,3 +623,10 @@ func (h *ec2APIHelper) DetachAndDeleteNetworkInterface(attachmentID *string, nwI } return nil } + +func (h *ec2APIHelper) DisassociateTrunkInterface(associationID *string) error { + input := &ec2.DisassociateTrunkInterfaceInput{ + AssociationId: associationID, + } + return h.ec2Wrapper.DisassociateTrunkInterface(input) +} diff --git a/pkg/aws/ec2/api/helper_test.go b/pkg/aws/ec2/api/helper_test.go index 6981c99a..b2dd7644 100644 --- a/pkg/aws/ec2/api/helper_test.go +++ b/pkg/aws/ec2/api/helper_test.go @@ -179,7 +179,7 @@ var ( tokenID = "token" - describeTrunkInterfaceInput1 = &ec2.DescribeNetworkInterfacesInput{ + describeTrunkInterfaceInput = &ec2.DescribeNetworkInterfacesInput{ Filters: []*ec2.Filter{ { Name: aws.String("tag:" + config.TrunkENIIDTag), @@ -191,26 +191,9 @@ var ( }, }, } - describeTrunkInterfaceInput2 = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("tag:" + config.TrunkENIIDTag), - Values: []*string{&trunkInterfaceId}, - }, - { - Name: aws.String("subnet-id"), - Values: aws.StringSlice([]string{subnetId}), - }, - }, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput1 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1}, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface2}, + describeTrunkInterfaceOutput = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1, &networkInterface2}, } describeTrunkInterfaceAssociationsInput = &ec2.DescribeTrunkInterfaceAssociationsInput{ @@ -1190,14 +1173,13 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Describ } // TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults returns the branch interface when paginated results is returned -func TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults(t *testing.T) { +func TestEc2APIHelper_GetBranchNetworkInterface(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput1).Return(describeTrunkInterfaceOutput1, nil) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput2).Return(describeTrunkInterfaceOutput2, nil) + mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput).Return(describeTrunkInterfaceOutput, nil) branchInterfaces, err := ec2ApiHelper.GetBranchNetworkInterface(&trunkInterfaceId, &subnetId) assert.NoError(t, err) diff --git a/pkg/aws/ec2/api/wrapper.go b/pkg/aws/ec2/api/wrapper.go index 81ca97a3..97a20f3b 100644 --- a/pkg/aws/ec2/api/wrapper.go +++ b/pkg/aws/ec2/api/wrapper.go @@ -21,6 +21,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "k8s.io/apimachinery/pkg/util/wait" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" @@ -52,12 +53,14 @@ type EC2Wrapper interface { AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddressesInput) (*ec2.UnassignPrivateIpAddressesOutput, error) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) + DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) DescribeSubnets(input *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) AssociateTrunkInterface(input *ec2.AssociateTrunkInterfaceInput) (*ec2.AssociateTrunkInterfaceOutput, error) DescribeTrunkInterfaceAssociations(input *ec2.DescribeTrunkInterfaceAssociationsInput) (*ec2.DescribeTrunkInterfaceAssociationsOutput, error) ModifyNetworkInterfaceAttribute(input *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) CreateNetworkInterfacePermission(input *ec2.CreateNetworkInterfacePermissionInput) (*ec2.CreateNetworkInterfacePermissionOutput, error) + DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error } var ( @@ -253,7 +256,7 @@ var ( ec2AssociateTrunkInterfaceAPIErrCnt = prometheus.NewCounter( prometheus.CounterOpts{ Name: "ec2_associate_trunk_interface_api_err_count", - Help: "The number of errors encountered while disassociating Trunk with Branch ENI", + Help: "The number of errors encountered while associating Trunk with Branch ENI", }, ) @@ -307,6 +310,54 @@ var ( }, ) + ec2DisassociateTrunkInterfaceCallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_disassociate_trunk_interface_api_req_count", + Help: "The number of calls made to EC2 to remove association between a branch and trunk network interface", + }, + ) + + ec2DisassociateTrunkInterfaceErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_disassociate_trunk_interface_api_err_count", + Help: "The number of errors encountered while removing association between a branch and trunk network interface", + }, + ) + + VpcCniAvailableClusterENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_cni_created_available_eni_count", + Help: "The number of available ENIs created by VPC-CNI that will tried to be deleted by the controller", + }, + ) + + VpcRcAvailableClusterENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_rc_created_available_eni_count", + Help: "The number of available ENIs created by VPC-RC that will tried to be deleted by the controller", + }, + ) + + LeakedENIClusterCleanupCnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "leaked_eni_count", + Help: "The number of available ENIs that failed to be deleted by the controller", + }, + ) + + ec2DescribeNetworkInterfacesPagesAPICallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_call_count", + Help: "The number of calls made to describe network interfaces (paginated)", + }, + ) + ec2DescribeNetworkInterfacesPagesAPIErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_err_count", + Help: "The number of errors encountered while making call to describe network interfaces (paginated)", + }, + ) + prometheusRegistered = false ) @@ -345,9 +396,13 @@ func prometheusRegister() { ec2modifyNetworkInterfaceAttributeAPICallCnt, ec2modifyNetworkInterfaceAttributeAPIErrCnt, ec2APICallLatencies, - vpccniAvailableENICnt, - vpcrcAvailableENICnt, - leakedENICnt, + ec2DisassociateTrunkInterfaceCallCnt, + ec2DisassociateTrunkInterfaceErrCnt, + VpcRcAvailableClusterENICnt, + VpcCniAvailableClusterENICnt, + LeakedENIClusterCleanupCnt, + ec2DescribeNetworkInterfacesPagesAPICallCnt, + ec2DescribeNetworkInterfacesPagesAPIErrCnt, ) prometheusRegistered = true @@ -376,7 +431,7 @@ func NewEC2Wrapper(roleARN, clusterName, region string, log logr.Logger) (EC2Wra // Role ARN is passed, assume the role ARN to make EC2 API Calls if roleARN != "" { - // Create the instance service client with low QPS, it will be only used fro associate branch to trunk calls + // Create the instance service client with low QPS, it will be only used for associate branch to trunk calls log.Info("Creating INSTANCE service client with configured QPS", "QPS", config.InstanceServiceClientQPS, "Burst", config.InstanceServiceClientBurst) instanceServiceClient, err := ec2Wrapper.getInstanceServiceClient(config.InstanceServiceClientQPS, config.InstanceServiceClientBurst, instanceSession) @@ -640,6 +695,38 @@ func (e *ec2Wrapper) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfa return describeNetworkInterfacesOutput, err } +// DescribeNetworkInterfacesPages returns network interfaces that match the filters specified in the input with MaxResult set to 1000(max value) +// This API is used during periodic ENI cleanup routine and trunk initialization to list all network interfaces that match the given filters (vpc-id or subnet-id, and tag) +// Only required fields, network interface ID and tag set, is populated to avoid consuming extra memory +func (e *ec2Wrapper) DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + var networkInterfaces []*ec2.NetworkInterface + input.MaxResults = aws.Int64(config.DescribeNetworkInterfacesMaxResults) + + start := time.Now() + if err := e.userServiceClient.DescribeNetworkInterfacesPages(input, func(output *ec2.DescribeNetworkInterfacesOutput, _ bool) bool { + ec2APICallCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPICallCnt.Inc() + //Currently only network interface ID and the tag set is require, only add required details to avoid consuming extra memory + for _, nwInterface := range output.NetworkInterfaces { + networkInterfaces = append(networkInterfaces, &ec2.NetworkInterface{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + TagSet: nwInterface.TagSet, + }) + } + // Add jitter to avoid EC2 API throttling in the account + time.Sleep(wait.Jitter(500*time.Millisecond, 0.5)) + return true + + }); err != nil { + ec2APIErrCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPIErrCnt.Inc() + return nil, err + } + ec2APICallLatencies.WithLabelValues("describe_network_interfaces_pages").Observe(timeSinceMs(start)) + + return networkInterfaces, nil +} + func (e *ec2Wrapper) AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) { start := time.Now() assignPrivateIPAddressesOutput, err := e.userServiceClient.AssignPrivateIpAddresses(input) @@ -789,3 +876,19 @@ func (e *ec2Wrapper) CreateNetworkInterfacePermission(input *ec2.CreateNetworkIn return output, err } + +func (e *ec2Wrapper) DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error { + start := time.Now() + // Using the instance role + _, err := e.instanceServiceClient.DisassociateTrunkInterface(input) + ec2APICallLatencies.WithLabelValues("disassociate_branch_from_trunk").Observe(timeSinceMs(start)) + + ec2APICallCnt.Inc() + ec2DisassociateTrunkInterfaceCallCnt.Inc() + + if err != nil { + ec2APIErrCnt.Inc() + ec2DisassociateTrunkInterfaceErrCnt.Inc() + } + return err +} diff --git a/pkg/config/type.go b/pkg/config/type.go index d7673640..894b57b8 100644 --- a/pkg/config/type.go +++ b/pkg/config/type.go @@ -51,6 +51,8 @@ const ( OSWindows = "windows" // OSLinux is the the linux Operating System OSLinux = "linux" + // Node termination finalizer on CNINode CRD + NodeTerminationFinalizer = "networking.k8s.aws/resource-cleanup" ) // EC2 Tags @@ -65,6 +67,8 @@ const ( NetworkInterfaceOwnerTagKey = "eks:eni:owner" NetworkInterfaceOwnerTagValue = "eks-vpc-resource-controller" NetworkInterfaceOwnerVPCCNITagValue = "amazon-vpc-cni" + NetworkInterfaceNodenameKey = "node.k8s.amazonaws.com/nodename" + CNINodeClusterNameKey = "cluster.k8s.amazonaws.com/name" ) const ( @@ -86,6 +90,17 @@ const ( VpcCNIDaemonSetName = "aws-node" OldVPCControllerDeploymentName = "vpc-resource-controller" BranchENICooldownPeriodKey = "branch-eni-cooldown" + // DescribeNetworkInterfacesMaxResults defines the max number of requests to return for DescribeNetworkInterfaces API call + DescribeNetworkInterfacesMaxResults = int64(1000) +) + +// MaxNodeConcurrentReconciles is the number of go routines that can invoke +// Reconcile in parallel. Since Node Reconciler, performs local operation +// on cache only a single go routine should be sufficient. Using more than +// one routines to help high rate churn and larger nodes groups restarting +// when the controller has to be restarted for various reasons. +const ( + MaxNodeConcurrentReconciles = 10 ) type ResourceType string diff --git a/pkg/k8s/finalizer.go b/pkg/k8s/finalizer.go new file mode 100644 index 00000000..dd612ec8 --- /dev/null +++ b/pkg/k8s/finalizer.go @@ -0,0 +1,84 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package k8s + +import ( + "context" + + "github.com/go-logr/logr" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +type FinalizerManager interface { + AddFinalizers(ctx context.Context, object client.Object, finalizers ...string) error + RemoveFinalizers(ctx context.Context, object client.Object, finalizers ...string) error +} + +func NewDefaultFinalizerManager(k8sClient client.Client, log logr.Logger) FinalizerManager { + return &defaultFinalizerManager{ + k8sClient: k8sClient, + log: log, + } +} + +type defaultFinalizerManager struct { + k8sClient client.Client + log logr.Logger +} + +func (m *defaultFinalizerManager) AddFinalizers(ctx context.Context, obj client.Object, finalizers ...string) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if err := m.k8sClient.Get(ctx, NamespacedName(obj), obj); err != nil { + return err + } + + oldObj := obj.DeepCopyObject().(client.Object) + needsUpdate := false + for _, finalizer := range finalizers { + if !controllerutil.ContainsFinalizer(obj, finalizer) { + m.log.Info("adding finalizer", "object", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName(), "finalizer", finalizer) + controllerutil.AddFinalizer(obj, finalizer) + needsUpdate = true + } + } + if !needsUpdate { + return nil + } + return m.k8sClient.Patch(ctx, obj, client.MergeFromWithOptions(oldObj, client.MergeFromWithOptimisticLock{})) + }) +} + +func (m *defaultFinalizerManager) RemoveFinalizers(ctx context.Context, obj client.Object, finalizers ...string) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if err := m.k8sClient.Get(ctx, NamespacedName(obj), obj); err != nil { + return err + } + + oldObj := obj.DeepCopyObject().(client.Object) + needsUpdate := false + for _, finalizer := range finalizers { + if controllerutil.ContainsFinalizer(obj, finalizer) { + m.log.Info("removing finalizer", "object", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName(), "finalizer", finalizer) + controllerutil.RemoveFinalizer(obj, finalizer) + needsUpdate = true + } + } + if !needsUpdate { + return nil + } + return m.k8sClient.Patch(ctx, obj, client.MergeFromWithOptions(oldObj, client.MergeFromWithOptimisticLock{})) + }) +} diff --git a/pkg/k8s/utils.go b/pkg/k8s/utils.go new file mode 100644 index 00000000..bdf47cc7 --- /dev/null +++ b/pkg/k8s/utils.go @@ -0,0 +1,23 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package k8s + +import ( + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// NamespacedName returns the namespaced name for k8s objects +func NamespacedName(obj client.Object) client.ObjectKey { + return client.ObjectKeyFromObject(obj) +} diff --git a/pkg/k8s/wrapper.go b/pkg/k8s/wrapper.go index 681274f0..73d9b9c8 100644 --- a/pkg/k8s/wrapper.go +++ b/pkg/k8s/wrapper.go @@ -15,7 +15,9 @@ package k8s import ( "context" + "fmt" "strconv" + "time" "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" rcv1alpha1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" @@ -31,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes/scheme" corev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/tools/record" @@ -80,7 +83,7 @@ type K8sWrapper interface { AddLabelToManageNode(node *v1.Node, labelKey string, labelValue string) (bool, error) ListEvents(ops []client.ListOption) (*eventsv1.EventList, error) GetCNINode(namespacedName types.NamespacedName) (*rcv1alpha1.CNINode, error) - CreateCNINode(node *v1.Node) error + CreateCNINode(node *v1.Node, clusterName string) error } // k8sWrapper is the wrapper object with the client @@ -227,13 +230,21 @@ func (k *k8sWrapper) ListEvents(ops []client.ListOption) (*eventsv1.EventList, e func (k *k8sWrapper) GetCNINode(namespacedName types.NamespacedName) (*rcv1alpha1.CNINode, error) { cninode := &rcv1alpha1.CNINode{} - if err := k.cacheClient.Get(k.context, namespacedName, cninode); err != nil { - return cninode, err - } - return cninode, nil + // There could be cases when CNINode is deleted and re-created shortly after by CNINode controller so retry to get CNINode with backoff + err := retry.OnError(wait.Backoff{ + Duration: time.Millisecond * 100, + Factor: 3.0, + Jitter: 0.1, + Steps: 5, + }, func(error) bool { return true }, + func() error { + return k.cacheClient.Get(k.context, namespacedName, cninode) + }) + + return cninode, err } -func (k *k8sWrapper) CreateCNINode(node *v1.Node) error { +func (k *k8sWrapper) CreateCNINode(node *v1.Node, clusterName string) error { cniNode := &rcv1alpha1.CNINode{ ObjectMeta: metav1.ObjectMeta{ Name: node.Name, @@ -248,6 +259,16 @@ func (k *k8sWrapper) CreateCNINode(node *v1.Node) error { Controller: lo.ToPtr(true), }, }, + Labels: map[string]string{ + // OS is a standard label & is set by Kubernetes, so we can skip checking if it is set + config.NodeLabelOS: node.ObjectMeta.Labels[config.NodeLabelOS], + }, + Finalizers: []string{config.NodeTerminationFinalizer}, // finalizer to clean up leaked ENIs at node termination + }, + Spec: rcv1alpha1.CNINodeSpec{ + Tags: map[string]string{ + fmt.Sprintf(config.CNINodeClusterNameKey): clusterName, + }, }, } diff --git a/pkg/k8s/wrapper_test.go b/pkg/k8s/wrapper_test.go index 572a26e9..00c63fc2 100644 --- a/pkg/k8s/wrapper_test.go +++ b/pkg/k8s/wrapper_test.go @@ -37,6 +37,7 @@ import ( var ( nodeName = "node-name" + mockClusterName = "cluster-name" mockResourceName = config.ResourceNamePodENI existingResource = "extended-resource" @@ -45,6 +46,9 @@ var ( TypeMeta: metav1.TypeMeta{}, ObjectMeta: metav1.ObjectMeta{ Name: nodeName, + Labels: map[string]string{ + config.NodeLabelOS: config.OSLinux, + }, }, Spec: v1.NodeSpec{}, Status: v1.NodeStatus{ @@ -196,12 +200,12 @@ func TestK8sWrapper_CreateCNINodeWithExistedObject_NoError(t *testing.T) { ctrl := gomock.NewController(t) wrapper, _, _ := getMockK8sWrapperWithClient(ctrl, []runtime.Object{mockCNINode}) - err := wrapper.CreateCNINode(mockNode) + err := wrapper.CreateCNINode(mockNode, mockClusterName) assert.NoError(t, err) cniNode, err := wrapper.GetCNINode(types.NamespacedName{Name: mockNode.Name}) assert.NoError(t, err) assert.Equal(t, mockNode.Name, cniNode.Name) - err = wrapper.CreateCNINode(mockNode) + err = wrapper.CreateCNINode(mockNode, mockClusterName) assert.NoError(t, err) } @@ -209,7 +213,7 @@ func TestK8sWrapper_CreateCNINode_NoError(t *testing.T) { ctrl := gomock.NewController(t) wrapper, _, _ := getMockK8sWrapperWithClient(ctrl, []runtime.Object{mockCNINode}) - err := wrapper.CreateCNINode(mockNode) + err := wrapper.CreateCNINode(mockNode, mockClusterName) assert.NoError(t, err) cniNode, err := wrapper.GetCNINode(types.NamespacedName{Name: mockNode.Name}) assert.NoError(t, err) diff --git a/pkg/node/manager/manager.go b/pkg/node/manager/manager.go index c9b6f9c2..be48c959 100644 --- a/pkg/node/manager/manager.go +++ b/pkg/node/manager/manager.go @@ -57,6 +57,7 @@ type manager struct { worker asyncWorker.Worker conditions condition.Conditions controllerVersion string + clusterName string } // Manager to perform operation on list of managed/un-managed node @@ -98,7 +99,7 @@ type AsyncOperationJob struct { // NewNodeManager returns a new node manager func NewNodeManager(logger logr.Logger, resourceManager resource.ResourceManager, - wrapper api.Wrapper, worker asyncWorker.Worker, conditions condition.Conditions, controllerVersion string, healthzHandler *rcHealthz.HealthzHandler) (Manager, error) { + wrapper api.Wrapper, worker asyncWorker.Worker, conditions condition.Conditions, clusterName string, controllerVersion string, healthzHandler *rcHealthz.HealthzHandler) (Manager, error) { manager := &manager{ resourceManager: resourceManager, @@ -108,6 +109,7 @@ func NewNodeManager(logger logr.Logger, resourceManager resource.ResourceManager worker: worker, conditions: conditions, controllerVersion: controllerVersion, + clusterName: clusterName, } // add health check on subpath for node manager @@ -224,7 +226,7 @@ func (m *manager) CreateCNINodeIfNotExisting(node *v1.Node) error { ); err != nil { if apierrors.IsNotFound(err) { m.Log.Info("Will create a new CNINode", "CNINodeName", node.Name) - return m.wrapper.K8sAPI.CreateCNINode(node) + return m.wrapper.K8sAPI.CreateCNINode(node, m.clusterName) } return err } else { @@ -451,7 +453,7 @@ func (m *manager) performAsyncOperation(job interface{}) (ctrl.Result, error) { log.V(1).Info("successfully performed node operation") return ctrl.Result{}, nil } - log.Error(err, "failed to performed node operation") + log.Error(err, "failed to perform node operation") return ctrl.Result{}, nil } diff --git a/pkg/node/manager/manager_test.go b/pkg/node/manager/manager_test.go index c8450927..ecb3f48b 100644 --- a/pkg/node/manager/manager_test.go +++ b/pkg/node/manager/manager_test.go @@ -50,6 +50,7 @@ var ( subnetID = "subnet-id" nodeName = "ip-192-168-55-73.us-west-2.compute.internal" securityGroupId = "sg-1" + mockClusterName = "cluster-name" eniConfig = &v1alpha1.ENIConfig{ ObjectMeta: metav1.ObjectMeta{ @@ -147,6 +148,7 @@ func NewMock(ctrl *gomock.Controller, existingDataStore map[string]node.Node) Mo worker: mockAsyncWorker, resourceManager: mockResourceManager, conditions: mockConditions, + clusterName: mockClusterName, }, MockK8sAPI: mockK8sWrapper, MockEC2API: mockEC2APIHelper, @@ -165,7 +167,7 @@ func Test_GetNewManager(t *testing.T) { mock := NewMock(ctrl, map[string]node.Node{}) mock.MockWorker.EXPECT().StartWorkerPool(gomock.Any()).Return(nil) - manager, err := NewNodeManager(zap.New(), nil, api.Wrapper{}, mock.MockWorker, mock.MockConditions, "v1.3.1", healthzHandler) + manager, err := NewNodeManager(zap.New(), nil, api.Wrapper{}, mock.MockWorker, mock.MockConditions, mockClusterName, "v1.3.1", healthzHandler) assert.NotNil(t, manager) assert.NoError(t, err) @@ -179,7 +181,7 @@ func Test_GetNewManager_Error(t *testing.T) { mock := NewMock(ctrl, map[string]node.Node{}) mock.MockWorker.EXPECT().StartWorkerPool(gomock.Any()).Return(mockError) - manager, err := NewNodeManager(zap.New(), nil, api.Wrapper{}, mock.MockWorker, mock.MockConditions, "v1.3.1", healthzHandler) + manager, err := NewNodeManager(zap.New(), nil, api.Wrapper{}, mock.MockWorker, mock.MockConditions, mockClusterName, "v1.3.1", healthzHandler) assert.NotNil(t, manager) assert.Error(t, err, mockError) @@ -201,7 +203,7 @@ func Test_AddNode_CNINode_Existing(t *testing.T) { mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(v1Node, nil).Times(1) mock.MockWorker.EXPECT().SubmitJob(gomock.All(NewAsyncOperationMatcher(expectedJob))) - mock.MockK8sAPI.EXPECT().CreateCNINode(v1Node).Return(nil).Times(0) + mock.MockK8sAPI.EXPECT().CreateCNINode(v1Node, mockClusterName).Return(nil).Times(0) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: v1Node.Name}).Return(&rcV1alpha1.CNINode{}, nil).Times(2) err := mock.Manager.AddNode(nodeName) @@ -224,7 +226,7 @@ func Test_AddNode_CNINode_Not_Existing(t *testing.T) { mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(v1Node, nil).Times(1) mock.MockWorker.EXPECT().SubmitJob(gomock.All(NewAsyncOperationMatcher(expectedJob))) - mock.MockK8sAPI.EXPECT().CreateCNINode(v1Node).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(v1Node, mock.Manager.clusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: v1Node.Name}).Return( &rcV1alpha1.CNINode{}, apierrors.NewNotFound(schema.GroupResource{Group: "vpcresources.k8s.aws", Resource: "1"}, "test")). Times(2) @@ -245,7 +247,7 @@ func Test_AddNode_UnManaged(t *testing.T) { nodeWithoutLabel.Labels = map[string]string{} mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(nodeWithoutLabel, nil).Times(1) - mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithoutLabel).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithoutLabel, mock.Manager.clusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: nodeWithoutLabel.Name}).Return( &rcV1alpha1.CNINode{}, apierrors.NewNotFound(schema.GroupResource{Group: "vpcresources.k8s.aws", Resource: "1"}, "test")). Times(1) // unmanaged node won't check custom networking subnets and call GetCNINode only once @@ -287,7 +289,7 @@ func Test_AddNode_CustomNetworking_CNINode(t *testing.T) { mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(nodeWithENIConfig, nil) mock.MockK8sAPI.EXPECT().GetENIConfig(eniConfigName).Return(eniConfig, nil).Times(1) mock.MockWorker.EXPECT().SubmitJob(gomock.All(NewAsyncOperationMatcher(job))) - mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig, mock.Manager.clusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: nodeWithENIConfig.Name}).Return(&rcV1alpha1.CNINode{ Spec: rcV1alpha1.CNINodeSpec{ Features: []rcV1alpha1.Feature{{Name: rcV1alpha1.CustomNetworking, Value: eniConfigName}}, @@ -327,7 +329,7 @@ func Test_AddNode_CustomNetworking_CNINode_No_EniConfigName(t *testing.T) { mock.MockK8sAPI.EXPECT().BroadcastEvent(nodeWithENIConfig, utils.EniConfigNameNotFoundReason, msg, v1.EventTypeWarning).Times(1) mock.MockK8sAPI.EXPECT().GetENIConfig(eniConfigName).Return(eniConfig, nil).Times(0) mock.MockWorker.EXPECT().SubmitJob(gomock.All(NewAsyncOperationMatcher(job))).Times(0) - mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig, mock.Manager.clusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: nodeWithENIConfig.Name}).Return(&rcV1alpha1.CNINode{ Spec: rcV1alpha1.CNINodeSpec{ Features: []rcV1alpha1.Feature{{Name: rcV1alpha1.CustomNetworking}}, @@ -364,7 +366,7 @@ func Test_AddNode_CustomNetworking_NodeLabel(t *testing.T) { mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(nodeWithENIConfig, nil) mock.MockK8sAPI.EXPECT().GetENIConfig(eniConfigName).Return(eniConfig, nil).Times(1) mock.MockWorker.EXPECT().SubmitJob(gomock.All(NewAsyncOperationMatcher(job))) - mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig, mock.Manager.clusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: nodeWithENIConfig.Name}).Return(&rcV1alpha1.CNINode{ Spec: rcV1alpha1.CNINodeSpec{ Features: []rcV1alpha1.Feature{{Name: rcV1alpha1.CustomNetworking}}, @@ -397,7 +399,7 @@ func Test_AddNode_CustomNetworking_Incorrect_ENIConfig(t *testing.T) { mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(nodeWithENIConfig, nil) mock.MockK8sAPI.EXPECT().GetENIConfig(eniConfigName).Return(eniConfig_empty_sg, nil) mock.MockWorker.EXPECT().SubmitJob(gomock.All(NewAsyncOperationMatcher(job))) - mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig, mockClusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: nodeWithENIConfig.Name}).Return(&rcV1alpha1.CNINode{ Spec: rcV1alpha1.CNINodeSpec{ Features: []rcV1alpha1.Feature{{Name: rcV1alpha1.CustomNetworking}}, @@ -421,7 +423,7 @@ func Test_AddNode_CustomNetworking_NoENIConfig(t *testing.T) { nodeWithENIConfig.Labels[config.CustomNetworkingLabel] = eniConfigName mock.MockK8sAPI.EXPECT().GetNode(nodeName).Return(nodeWithENIConfig, nil) - mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig).Return(nil).Times(1) + mock.MockK8sAPI.EXPECT().CreateCNINode(nodeWithENIConfig, mock.Manager.clusterName).Return(nil).Times(1) mock.MockK8sAPI.EXPECT().GetENIConfig(eniConfigName).Return(nil, mockError) mock.MockK8sAPI.EXPECT().GetCNINode(types.NamespacedName{Name: nodeWithENIConfig.Name}).Return(&rcV1alpha1.CNINode{}, apierrors.NewNotFound(schema.GroupResource{Group: "vpcresources.k8s.aws", Resource: "1"}, "test")) diff --git a/pkg/provider/branch/provider.go b/pkg/provider/branch/provider.go index 0242c55d..f9eb2409 100644 --- a/pkg/provider/branch/provider.go +++ b/pkg/provider/branch/provider.go @@ -76,7 +76,7 @@ var ( // NodeDeleteRequeueRequestDelay represents the time after which the resources belonging to a node will be cleaned // up after receiving the actual node delete event. - NodeDeleteRequeueRequestDelay = time.Minute * 5 + NodeDeleteRequeueRequestDelay = time.Minute * 1 prometheusRegistered = false ) @@ -175,8 +175,8 @@ func (b *branchENIProvider) InitResource(instance ec2.EC2Instance) error { } branchProviderOperationLatency.WithLabelValues(operationInitTrunk, "1").Observe(timeSinceMs(start)) - // Add the Trunk ENI to cache - if err := b.addTrunkToCache(nodeName, trunkENI); err != nil { + // Add the Trunk ENI to cache if it does not already exist + if err := b.addTrunkToCache(nodeName, trunkENI); err != nil && err != ErrTrunkExistInCache { branchProviderOperationsErrCount.WithLabelValues("add_trunk_to_cache").Inc() return err } @@ -234,12 +234,14 @@ func (b *branchENIProvider) ProcessAsyncJob(job interface{}) (ctrl.Result, error // DeleteNode deletes all the cached branch ENIs associated with the trunk and removes the trunk from the cache. func (b *branchENIProvider) DeleteNode(nodeName string) (ctrl.Result, error) { - trunkENI, isPresent := b.getTrunkFromCache(nodeName) + _, isPresent := b.getTrunkFromCache(nodeName) if !isPresent { return ctrl.Result{}, fmt.Errorf("failed to find node %s", nodeName) } - trunkENI.DeleteAllBranchENIs() + // At this point, the finalizer routine should have deleted all available branch ENIs + // Any leaked ENIs will be deleted by the periodic cleanup routine if cluster is active + // remove trunk from cache and de-initializer the resource provider b.removeTrunkFromCache(nodeName) b.log.Info("de-initialized resource provider successfully", "nodeName", nodeName) diff --git a/pkg/provider/branch/trunk/trunk.go b/pkg/provider/branch/trunk/trunk.go index 1a5e1dd3..5ff3aea6 100644 --- a/pkg/provider/branch/trunk/trunk.go +++ b/pkg/provider/branch/trunk/trunk.go @@ -93,8 +93,6 @@ type TrunkENI interface { Reconcile(pods []v1.Pod) bool // PushENIsToFrontOfDeleteQueue pushes the eni network interfaces to the front of the delete queue PushENIsToFrontOfDeleteQueue(*v1.Pod, []*ENIDetails) - // DeleteAllBranchENIs deletes all the branch ENI associated with the trunk and also clears the cool down queue - DeleteAllBranchENIs() // Introspect returns the state of the Trunk ENI Introspect() IntrospectResponse } @@ -117,6 +115,8 @@ type trunkENI struct { uidToBranchENIMap map[string][]*ENIDetails // deleteQueue is the queue of ENIs that are being cooled down before being deleted deleteQueue []*ENIDetails + // nodeName tag is the tag added to trunk and branch ENIs created on the node + nodeNameTag []*awsEC2.Tag } // PodENI is a json convertible structure that stores the Branch ENI details that can be @@ -138,6 +138,8 @@ type ENIDetails struct { deletionTimeStamp time.Time // deleteRetryCount is the deleteRetryCount int + // ID of association between branch and trunk ENI + AssociationID string `json:"associationID"` } type IntrospectResponse struct { @@ -167,6 +169,12 @@ func NewTrunkENI(logger logr.Logger, instance ec2.EC2Instance, helper api.EC2API ec2ApiHelper: helper, instance: instance, uidToBranchENIMap: make(map[string][]*ENIDetails), + nodeNameTag: []*awsEC2.Tag{ + { + Key: aws.String(config.NetworkInterfaceNodenameKey), + Value: aws.String(instance.Name()), + }, + }, } } @@ -219,7 +227,7 @@ func (t *trunkENI) InitTrunk(instance ec2.EC2Instance, podList []v1.Pod) error { } trunk, err := t.ec2ApiHelper.CreateAndAttachNetworkInterface(&instanceID, aws.String(t.instance.SubnetID()), - t.instance.CurrentInstanceSecurityGroups(), nil, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil) + t.instance.CurrentInstanceSecurityGroups(), t.nodeNameTag, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil) if err != nil { trunkENIOperationsErrCount.WithLabelValues("create_trunk_eni").Inc() return err @@ -373,6 +381,8 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st Value: &t.trunkENIId, }, } + // append the nodeName tag to add to branch ENIs + tags = append(tags, t.nodeNameTag...) // Create Branch ENI nwInterface, err = t.ec2ApiHelper.CreateNetworkInterface(&BranchEniDescription, aws.String(t.instance.SubnetID()), securityGroups, tags, nil, nil) @@ -399,12 +409,14 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st newENIs = append(newENIs, newENI) // Associate Branch to trunk - _, err = t.ec2ApiHelper.AssociateBranchToTrunk(&t.trunkENIId, nwInterface.NetworkInterfaceId, vlanID) + var associationOutput *awsEC2.AssociateTrunkInterfaceOutput + associationOutput, err = t.ec2ApiHelper.AssociateBranchToTrunk(&t.trunkENIId, nwInterface.NetworkInterfaceId, vlanID) if err != nil { err = fmt.Errorf("associating branch to trunk, %w", err) trunkENIOperationsErrCount.WithLabelValues("associate_branch").Inc() break } + newENI.AssociationID = *associationOutput.InterfaceAssociation.AssociationId } if err != nil { @@ -422,31 +434,6 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st return newENIs, nil } -// DeleteAllBranchENIs deletes all the branch ENIs associated with the trunk and all the ENIs present in the cool down -// queue, this is the last API call to the the Trunk ENI before it is removed from cache -func (t *trunkENI) DeleteAllBranchENIs() { - // Delete all the branch used by the pod on this trunk ENI - // Since after this call, the trunk will be removed from cache. No need to clean up its branch map - for _, podENIs := range t.uidToBranchENIMap { - for _, eni := range podENIs { - err := t.deleteENI(eni) - if err != nil { - // Just log, if the ENI still exists it can be removed by the dangling ENI cleaner routine - t.log.Error(err, "failed to delete eni", "eni id", eni.ID) - } - } - } - - // Delete all the branch ENI present in the cool down queue - for _, eni := range t.deleteQueue { - err := t.deleteENI(eni) - if err != nil { - // Just log, if the ENI still exists it can be removed by the dangling ENI cleaner routine - t.log.Error(err, "failed to delete eni", "eni id", eni.ID) - } - } -} - // DeleteBranchNetworkInterface deletes the branch network interface and returns an error in case of failure to delete func (t *trunkENI) PushBranchENIsToCoolDownQueue(UID string) { // Lock is required as Reconciler is also performing operation concurrently @@ -500,7 +487,19 @@ func (t *trunkENI) DeleteCooledDownENIs() { // deleteENIs deletes the provided ENIs and frees up the Vlan assigned to then func (t *trunkENI) deleteENI(eniDetail *ENIDetails) (err error) { - // Delete Branch network interface first + // Disassociate branch ENI from trunk if association ID exists and delete branch network interface + if eniDetail.AssociationID != "" { + err = t.ec2ApiHelper.DisassociateTrunkInterface(&eniDetail.AssociationID) + if err != nil { + trunkENIOperationsErrCount.WithLabelValues("disassociate_trunk_error").Inc() + if !strings.Contains(err.Error(), ec2Errors.NotFoundAssociationID) { + t.log.Error(err, "failed to disassociate branch ENI from trunk, will try to delete the branch ENI") + // Not returning error here, fallback to force branch ENI deletion + } else { + t.log.Info("AssociationID not found when disassociating branch from trunk ENI, it is already disassociated so delete the branch ENI") + } + } + } err = t.ec2ApiHelper.DeleteNetworkInterface(&eniDetail.ID) if err != nil { branchENIOperationsFailureCount.WithLabelValues("delete_branch_error").Inc() diff --git a/pkg/provider/branch/trunk/trunk_test.go b/pkg/provider/branch/trunk/trunk_test.go index 2f0eed90..05346311 100644 --- a/pkg/provider/branch/trunk/trunk_test.go +++ b/pkg/provider/branch/trunk/trunk_test.go @@ -62,8 +62,9 @@ var ( Name: MockPodName1, Namespace: MockPodNamespace1, Annotations: map[string]string{config.ResourceNamePodENI: "[{\"eniId\":\"eni-00000000000000000\",\"ifAddress\":\"FF:FF:FF:FF:FF:FF\",\"privateIp\":\"192.168.0.15\"," + - "\"ipv6Addr\":\"2600::\",\"vlanId\":1,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"},{\"eniId\":\"eni-00000000000000001\",\"ifAddress\":\"" + - "FF:FF:FF:FF:FF:F9\",\"privateIp\":\"192.168.0.16\",\"ipv6Addr\":\"2600::1\",\"vlanId\":2,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"}]"}}, + "\"ipv6Addr\":\"2600::\",\"vlanId\":1,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\",\"AssociationId\":\"trunk-assoc-0000000000000000\"},{\"eniId\":\"eni-00000000000000001\"" + + ",\"ifAddress\":\"FF:FF:FF:FF:FF:F9\",\"privateIp\":\"192.168.0.16\",\"ipv6Addr\":\"2600::1\",\"vlanId\":2,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"," + + "\"AssociationId\":\"trunk-assoc-0000000000000001\"}]"}}, Spec: v1.PodSpec{NodeName: NodeName}, Status: v1.PodStatus{}, } @@ -93,20 +94,23 @@ var ( SecurityGroups = []string{SecurityGroup1, SecurityGroup2} // Branch Interface 1 - Branch1Id = "eni-00000000000000000" - MacAddr1 = "FF:FF:FF:FF:FF:FF" - BranchIp1 = "192.168.0.15" - BranchV6Ip1 = "2600::" - VlanId1 = 1 + Branch1Id = "eni-00000000000000000" + MacAddr1 = "FF:FF:FF:FF:FF:FF" + BranchIp1 = "192.168.0.15" + BranchV6Ip1 = "2600::" + VlanId1 = 1 + MockAssociationID1 = "trunk-assoc-0000000000000000" + MockAssociationID2 = "trunk-assoc-0000000000000001" EniDetails1 = &ENIDetails{ - ID: Branch1Id, - MACAdd: MacAddr1, - IPV4Addr: BranchIp1, - IPV6Addr: BranchV6Ip1, - VlanID: VlanId1, - SubnetCIDR: SubnetCidrBlock, - SubnetV6CIDR: SubnetV6CidrBlock, + ID: Branch1Id, + MACAdd: MacAddr1, + IPV4Addr: BranchIp1, + IPV6Addr: BranchV6Ip1, + VlanID: VlanId1, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + AssociationID: MockAssociationID1, } branchENIs1 = []*ENIDetails{EniDetails1} @@ -126,13 +130,14 @@ var ( VlanId2 = 2 EniDetails2 = &ENIDetails{ - ID: Branch2Id, - MACAdd: MacAddr2, - IPV4Addr: BranchIp2, - IPV6Addr: BranchV6Ip2, - VlanID: VlanId2, - SubnetCIDR: SubnetCidrBlock, - SubnetV6CIDR: SubnetV6CidrBlock, + ID: Branch2Id, + MACAdd: MacAddr2, + IPV4Addr: BranchIp2, + IPV6Addr: BranchV6Ip2, + VlanID: VlanId2, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + AssociationID: MockAssociationID2, } BranchInterface2 = &awsEc2.NetworkInterface{ @@ -142,8 +147,6 @@ var ( Ipv6Address: &BranchV6Ip2, } - branchENIs2 = []*ENIDetails{EniDetails2} - // Trunk Interface trunkId = "eni-00000000000000002" trunkInterface = &awsEc2.NetworkInterface{ @@ -189,17 +192,27 @@ var ( }, } - trunkAssociationsBranch1And2 = []*awsEc2.TrunkInterfaceAssociation{ - { - BranchInterfaceId: &EniDetails1.ID, - VlanId: aws.Int64(int64(EniDetails1.VlanID)), + mockAssociationOutput1 = &awsEc2.AssociateTrunkInterfaceOutput{ + InterfaceAssociation: &awsEc2.TrunkInterfaceAssociation{ + AssociationId: &MockAssociationID1, }, - { - BranchInterfaceId: &EniDetails2.ID, - VlanId: aws.Int64(int64(EniDetails2.VlanID)), + } + mockAssociationOutput2 = &awsEc2.AssociateTrunkInterfaceOutput{ + InterfaceAssociation: &awsEc2.TrunkInterfaceAssociation{ + AssociationId: &MockAssociationID2, }, } + ENIDetailsMissingAssociationID = &ENIDetails{ + ID: Branch2Id, + MACAdd: MacAddr2, + IPV4Addr: BranchIp2, + IPV6Addr: BranchV6Ip2, + VlanID: VlanId2, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + } + MockError = fmt.Errorf("mock error") ) @@ -229,11 +242,17 @@ func getMockTrunk() trunkENI { log: log, usedVlanIds: make([]bool, MaxAllocatableVlanIds), uidToBranchENIMap: map[string][]*ENIDetails{}, + nodeNameTag: []*awsEc2.Tag{ + { + Key: aws.String(config.NetworkInterfaceNodenameKey), + Value: aws.String(FakeInstance.Name()), + }, + }, } } func TestNewTrunkENI(t *testing.T) { - trunkENI := NewTrunkENI(zap.New(), nil, nil) + trunkENI := NewTrunkENI(zap.New(), FakeInstance, nil) assert.NotNil(t, trunkENI) } @@ -400,34 +419,106 @@ func TestTrunkENI_getBranchInterfaceMap_EmptyList(t *testing.T) { assert.Zero(t, len(branchENIsMap)) } -// TestTrunkENI_deleteENI tests the trunk is deleted and vlan ID freed in case of no errors +// TestTrunkENI_deleteENI tests deleting branch ENI func TestTrunkENI_deleteENI(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.markVlanAssigned(VlanId1) - - ec2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) - - err := trunkENI.deleteENI(EniDetails1) - assert.NoError(t, err) - assert.False(t, trunkENI.usedVlanIds[VlanId1]) -} - -// TestTrunkENI_deleteENI_Fail tests if the ENI deletion fails then the vlan ID is not freed -func TestTrunkENI_deleteENI_Fail(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() + type args struct { + eniDetail *ENIDetails + VlanID int + } + type fields struct { + mockEC2APIHelper *mock_api.MockEC2APIHelper + trunkENI *trunkENI + } + testTrunkENI_deleteENI := []struct { + name string + prepare func(f *fields) + args args + wantErr bool + asserts func(f *fields) + }{ + { + name: "Vland_Freed, verifies VLANID is freed when branch ENI is deleted", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "Vland_NotFreed, verifies VLANID is not freed when branch ENI delete fails", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(MockError) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: true, + asserts: func(f *fields) { + assert.True(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "DisassociateTrunkInterface_Fails, verifies branch ENI is deleted when disassociation fails for backward compatibility", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(MockError) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "MissingAssociationID, verifies DisassociateTrunkInterface is skipped when association ID is missing and branch ENI is deleted for backward compatibility", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch2Id).Return(nil) + }, + args: args{ + eniDetail: ENIDetailsMissingAssociationID, + VlanID: VlanId2, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId2]) + }, + }, + } - trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.markVlanAssigned(VlanId1) + for _, tt := range testTrunkENI_deleteENI { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() - ec2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(MockError) + trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) + trunkENI.markVlanAssigned(tt.args.VlanID) - err := trunkENI.deleteENI(EniDetails1) - assert.Error(t, MockError, err) - assert.True(t, trunkENI.usedVlanIds[VlanId1]) + f := fields{ + mockEC2APIHelper: ec2APIHelper, + trunkENI: trunkENI, + } + if tt.prepare != nil { + tt.prepare(&f) + } + err := f.trunkENI.deleteENI(tt.args.eniDetail) + assert.Equal(t, err != nil, tt.wantErr) + if tt.asserts != nil { + tt.asserts(&f) + } + }) + } } // TestTrunkENI_DeleteCooledDownENIs_NotCooledDown tests that ENIs that have not cooled down are not deleted @@ -463,7 +554,9 @@ func TestTrunkENI_DeleteCooledDownENIs_NoDeletionTimeStamp(t *testing.T) { trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(nil) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID2).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) @@ -487,6 +580,7 @@ func TestTrunkENI_DeleteCooledDownENIs_CooledDownResource(t *testing.T) { trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(nil) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) @@ -512,16 +606,17 @@ func TestTrunkENI_DeleteCooledDownENIs_DeleteFailed(t *testing.T) { trunkENI.usedVlanIds[VlanId2] = true trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) - gomock.InOrder( - coolDown.EXPECT().GetCoolDownPeriod().Return(time.Second*60).AnyTimes(), - ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(MockError).Times(MaxDeleteRetries), - ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil), - ) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) mockK8sAPI.EXPECT().GetConfigMap(config.VpcCniConfigMapName, config.KubeSystemNamespace).Return(createCoolDownMockCM("60"), nil) cooldown.InitCoolDownPeriod(mockK8sAPI, zap.New(zap.UseDevMode(true)).WithName("cooldown")) + coolDown.EXPECT().GetCoolDownPeriod().Return(time.Second * 60).AnyTimes() + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil).Times(MaxDeleteRetries) + ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(MockError).Times(MaxDeleteRetries) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID2).Return(nil) + ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil) + trunkENI.DeleteCooledDownENIs() assert.Zero(t, len(trunkENI.deleteQueue)) } @@ -598,7 +693,7 @@ func TestTrunkENI_InitTrunk(t *testing.T) { f.mockEC2APIHelper.EXPECT().GetInstanceNetworkInterface(&InstanceId).Return([]*awsEc2.InstanceNetworkInterface{}, nil) f.mockInstance.EXPECT().GetHighestUnusedDeviceIndex().Return(freeIndex, nil) f.mockInstance.EXPECT().SubnetID().Return(SubnetId) - f.mockEC2APIHelper.EXPECT().CreateAndAttachNetworkInterface(&InstanceId, &SubnetId, SecurityGroups, nil, + f.mockEC2APIHelper.EXPECT().CreateAndAttachNetworkInterface(&InstanceId, &SubnetId, SecurityGroups, f.trunkENI.nodeNameTag, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil).Return(trunkInterface, nil) }, // Pass nil to set the instance to fields.mockInstance in the function later @@ -729,23 +824,6 @@ func TestTrunkENI_InitTrunk(t *testing.T) { } } -// TestTrunkENI_DeleteAllBranchENIs tests all branch ENI associated with the trunk are deleted -func TestTrunkENI_DeleteAllBranchENIs(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - trunkENI, mockEC2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.uidToBranchENIMap[PodUID] = branchENIs1 - trunkENI.uidToBranchENIMap[PodUID2] = branchENIs2 - trunkENI.deleteQueue = append(trunkENI.deleteQueue, branchENIs1[0]) - - // Since we added the same branch ENIs in the cool down queue and in the pod to eni map - mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil).Times(2) - mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch2Id).Return(nil) - - trunkENI.DeleteAllBranchENIs() -} - // TestTrunkENI_CreateAndAssociateBranchENIs test branch is created and associated with the trunk and valid eni details // are returned func TestTrunkENI_CreateAndAssociateBranchENIs(t *testing.T) { @@ -761,11 +839,11 @@ func TestTrunkENI_CreateAndAssociateBranchENIs(t *testing.T) { mockInstance.EXPECT().SubnetV6CidrBlock().Return(SubnetV6CidrBlock).Times(2) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan1Tag, nil, nil).Return(BranchInterface1, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil) - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan2Tag, + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil) + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(mockAssociationOutput2, nil) eniDetails, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, SecurityGroups, 2) expectedENIDetails := []*ENIDetails{EniDetails1, EniDetails2} @@ -795,11 +873,11 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_InstanceSecurityGroup(t *testing. mockInstance.EXPECT().CurrentInstanceSecurityGroups().Return(InstanceSecurityGroup) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, InstanceSecurityGroup, - vlan1Tag, nil, nil).Return(BranchInterface1, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil) + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, InstanceSecurityGroup, - vlan2Tag, nil, nil).Return(BranchInterface2, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, nil) + append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(mockAssociationOutput2, nil) eniDetails, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, []string{}, 2) expectedENIDetails := []*ENIDetails{EniDetails1, EniDetails2} @@ -829,16 +907,16 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_ErrorAssociate(t *testing.T) { gomock.InOrder( mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan1Tag, nil, nil).Return(BranchInterface1, nil), - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil), + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil), + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil), mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan2Tag, nil, nil).Return(BranchInterface2, nil), + append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil), mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, MockError), ) _, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, SecurityGroups, 2) assert.Error(t, MockError, err) - assert.Equal(t, []*ENIDetails{EniDetails1, EniDetails2}, trunkENI.deleteQueue) + assert.Equal(t, []*ENIDetails{EniDetails1, ENIDetailsMissingAssociationID}, trunkENI.deleteQueue) } // TestTrunkENI_CreateAndAssociateBranchENIs_ErrorCreate tests if error is returned on associate then the created interfaces @@ -856,10 +934,10 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_ErrorCreate(t *testing.T) { mockInstance.EXPECT().SubnetV6CidrBlock().Return(SubnetV6CidrBlock).Times(1) gomock.InOrder( - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan1Tag, + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil), - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil), - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan2Tag, + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil), + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(nil, MockError), ) diff --git a/pkg/utils/events.go b/pkg/utils/events.go index 6afef7ad..4ac5d03c 100644 --- a/pkg/utils/events.go +++ b/pkg/utils/events.go @@ -23,12 +23,13 @@ import ( const ( UnsupportedInstanceTypeReason = "Unsupported" InsufficientCidrBlocksReason = "InsufficientCidrBlocks" - CNINodeCreatedReason = "CNINodeCreation" NodeTrunkInitiatedReason = "NodeTrunkInitiated" NodeTrunkFailedInitializationReason = "NodeTrunkFailedInit" EniConfigNameNotFoundReason = "EniConfigNameNotFound" VersionNotice = "ControllerVersionNotice" BranchENICoolDownUpdateReason = "BranchENICoolDownPeriodUpdated" + CNINodeDeleteFailed = "CNINodeDeletionFailed" + CNINodeCreateFailed = "CNINodeCreationFailed" ) func SendNodeEventWithNodeName(client k8s.K8sWrapper, nodeName, reason, msg, eventType string, logger logr.Logger) { diff --git a/pkg/utils/set.go b/pkg/utils/set.go index ab7a037e..326ffa6d 100644 --- a/pkg/utils/set.go +++ b/pkg/utils/set.go @@ -13,6 +13,10 @@ package utils +import ( + "github.com/aws/aws-sdk-go/service/ec2" +) + // Difference returns a-b, elements present in a and not in b func Difference[T comparable](a, b []T) (diff []T) { m := make(map[T]struct{}) @@ -35,3 +39,11 @@ func GetKeyValSlice(m map[string]string) (key []string, val []string) { } return } + +func GetTagKeyValueMap(tagSet []*ec2.Tag) map[string]string { + m := make(map[string]string) + for _, tag := range tagSet { + m[*tag.Key] = *tag.Value + } + return m +} diff --git a/test/framework/framework.go b/test/framework/framework.go index f2f65be2..af650606 100644 --- a/test/framework/framework.go +++ b/test/framework/framework.go @@ -17,6 +17,7 @@ import ( eniConfig "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" cninode "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" sgp "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/aws/autoscaling" ec2Manager "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/aws/ec2" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/configmap" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/controller" @@ -42,21 +43,22 @@ import ( ) type Framework struct { - Options Options - K8sClient client.Client - ec2Client *ec2.EC2 - DeploymentManager deployment.Manager - PodManager pod.Manager - EC2Manager *ec2Manager.Manager - SAManager serviceaccount.Manager - NSManager namespace.Manager - SGPManager *sgpManager.Manager - SVCManager service.Manager - JobManager jobs.Manager - NodeManager node.Manager - ControllerManager controller.Manager - RBACManager rbac.Manager - ConfigMapManager configmap.Manager + Options Options + K8sClient client.Client + ec2Client *ec2.EC2 + DeploymentManager deployment.Manager + PodManager pod.Manager + EC2Manager *ec2Manager.Manager + SAManager serviceaccount.Manager + NSManager namespace.Manager + SGPManager *sgpManager.Manager + SVCManager service.Manager + JobManager jobs.Manager + NodeManager node.Manager + ControllerManager controller.Manager + RBACManager rbac.Manager + ConfigMapManager configmap.Manager + AutoScalingManager autoscaling.Manager } func New(options Options) *Framework { @@ -91,20 +93,21 @@ func New(options Options) *Framework { ec2 := ec2.New(sess, &aws.Config{Region: aws.String(options.AWSRegion)}) return &Framework{ - K8sClient: k8sClient, - ec2Client: ec2, - PodManager: pod.NewManager(k8sClient, k8sSchema, config), - DeploymentManager: deployment.NewManager(k8sClient), - EC2Manager: ec2Manager.NewManager(ec2, options.AWSVPCID), - SAManager: serviceaccount.NewManager(k8sClient, config), - NSManager: namespace.NewManager(k8sClient), - SGPManager: sgpManager.NewManager(k8sClient), - SVCManager: service.NewManager(k8sClient), - JobManager: jobs.NewManager(k8sClient), - NodeManager: node.NewManager(k8sClient), - ControllerManager: controller.NewManager(k8sClient), - RBACManager: rbac.NewManager(k8sClient), - ConfigMapManager: configmap.NewManager(k8sClient), - Options: options, + K8sClient: k8sClient, + ec2Client: ec2, + PodManager: pod.NewManager(k8sClient, k8sSchema, config), + DeploymentManager: deployment.NewManager(k8sClient), + EC2Manager: ec2Manager.NewManager(ec2, options.AWSVPCID), + SAManager: serviceaccount.NewManager(k8sClient, config), + NSManager: namespace.NewManager(k8sClient), + SGPManager: sgpManager.NewManager(k8sClient), + SVCManager: service.NewManager(k8sClient), + JobManager: jobs.NewManager(k8sClient), + NodeManager: node.NewManager(k8sClient), + ControllerManager: controller.NewManager(k8sClient), + RBACManager: rbac.NewManager(k8sClient), + ConfigMapManager: configmap.NewManager(k8sClient), + AutoScalingManager: autoscaling.NewManager(sess), + Options: options, } } diff --git a/test/framework/options.go b/test/framework/options.go index c4bdbe94..1b7b82c4 100644 --- a/test/framework/options.go +++ b/test/framework/options.go @@ -15,6 +15,8 @@ package framework import ( "flag" + "os" + "strings" "github.com/pkg/errors" "k8s.io/client-go/tools/clientcmd" @@ -32,6 +34,7 @@ type Options struct { AWSRegion string AWSVPCID string ReleasedImageVersion string + ClusterRoleArn string } func (options *Options) BindFlags() { @@ -40,6 +43,7 @@ func (options *Options) BindFlags() { flag.StringVar(&options.AWSRegion, "aws-region", "", `AWS Region for the kubernetes cluster`) flag.StringVar(&options.AWSVPCID, "aws-vpc-id", "", `AWS VPC ID for the kubernetes cluster`) flag.StringVar(&options.ReleasedImageVersion, "latest-released-rc-image-tag", "v1.1.3", `VPC RC latest released image`) + flag.StringVar(&options.ClusterRoleArn, "cluster-role-arn", "", "EKS Cluster role ARN") } func (options *Options) Validate() error { @@ -58,5 +62,10 @@ func (options *Options) Validate() error { if len(options.ReleasedImageVersion) == 0 { return errors.Errorf("%s must be set!", "latest-released-rc-image-tag") } + dir, err := os.Executable() + if err == nil && len(options.ClusterRoleArn) == 0 && strings.Contains(dir, "ec2api") { + return errors.Errorf("%s must be set when running ec2api tests", "cluster-role-arn") + } + return nil } diff --git a/test/framework/resource/aws/autoscaling/manager.go b/test/framework/resource/aws/autoscaling/manager.go new file mode 100644 index 00000000..ea8b67c2 --- /dev/null +++ b/test/framework/resource/aws/autoscaling/manager.go @@ -0,0 +1,64 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package autoscaling + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/autoscaling/autoscalingiface" +) + +type Manager interface { + DescribeAutoScalingGroup(autoScalingGroupName string) ([]*autoscaling.Group, error) + UpdateAutoScalingGroup(asgName string, minSize, maxSize int64) error +} + +type defaultManager struct { + autoscalingiface.AutoScalingAPI +} + +func NewManager(session *session.Session) Manager { + return &defaultManager{ + AutoScalingAPI: autoscaling.New(session), + } +} + +func (d defaultManager) DescribeAutoScalingGroup(autoScalingGroupName string) ([]*autoscaling.Group, error) { + describeAutoScalingGroupIp := &autoscaling.DescribeAutoScalingGroupsInput{ + AutoScalingGroupNames: aws.StringSlice([]string{autoScalingGroupName}), + } + asg, err := d.AutoScalingAPI.DescribeAutoScalingGroups(describeAutoScalingGroupIp) + if err != nil { + return nil, err + } + if len(asg.AutoScalingGroups) == 0 { + return nil, fmt.Errorf("failed to find asg %s", autoScalingGroupName) + } + + return asg.AutoScalingGroups, nil +} + +func (d defaultManager) UpdateAutoScalingGroup(asgName string, minSize, maxSize int64) error { + updateASGInput := &autoscaling.UpdateAutoScalingGroupInput{ + AutoScalingGroupName: aws.String(asgName), + DesiredCapacity: aws.Int64(minSize), // Set DesiredCapacity to minSize + MaxSize: aws.Int64(maxSize), + MinSize: aws.Int64(minSize), + } + _, err := d.AutoScalingAPI.UpdateAutoScalingGroup(updateASGInput) + return err +} diff --git a/test/framework/resource/aws/ec2/manager.go b/test/framework/resource/aws/ec2/manager.go index bbfa69d4..5c086a0c 100644 --- a/test/framework/resource/aws/ec2/manager.go +++ b/test/framework/resource/aws/ec2/manager.go @@ -19,6 +19,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" @@ -240,3 +241,42 @@ func (d *Manager) GetPrivateIPv4AddressAndPrefix(instanceID string) ([]string, [ return secondaryIPAddresses, ipV4Prefixes, err } + +func (d *Manager) CreateAndAttachNetworkInterface(subnetID, instanceID, instanceType string) (string, error) { + createENIOp, err := d.ec2Client.CreateNetworkInterface(&ec2.CreateNetworkInterfaceInput{ + SubnetId: aws.String(subnetID), + Description: aws.String("VPC-Resource-Controller integration test ENI"), + }) + if err != nil { + return "", err + } + nwInterfaceID := *createENIOp.NetworkInterface.NetworkInterfaceId + // for test just use the max index - 2 (as trunk maybe attached to max index) + indexID := vpc.Limits[instanceType].NetworkCards[0].MaximumNetworkInterfaces - 2 + _, err = d.ec2Client.AttachNetworkInterface(&ec2.AttachNetworkInterfaceInput{ + InstanceId: aws.String(instanceID), + NetworkInterfaceId: aws.String(nwInterfaceID), + DeviceIndex: aws.Int64(indexID), + }) + return nwInterfaceID, err +} + +func (d *Manager) TerminateInstances(instanceID string) error { + _, err := d.ec2Client.TerminateInstances(&ec2.TerminateInstancesInput{ + InstanceIds: []*string{&instanceID}, + }) + return err +} + +func (d *Manager) DescribeNetworkInterface(nwInterfaceID string) error { + _, err := d.ec2Client.DescribeNetworkInterfaces(&ec2.DescribeNetworkInterfacesInput{ + NetworkInterfaceIds: []*string{&nwInterfaceID}, + }) + return err +} +func (d *Manager) DeleteNetworkInterface(nwInterfaceID string) error { + _, err := d.ec2Client.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ + NetworkInterfaceId: aws.String(nwInterfaceID), + }) + return err +} diff --git a/test/framework/resource/k8s/node/manager.go b/test/framework/resource/k8s/node/manager.go index 6b3f42c9..a7e9e28b 100644 --- a/test/framework/resource/k8s/node/manager.go +++ b/test/framework/resource/k8s/node/manager.go @@ -15,6 +15,7 @@ package node import ( "context" + "strings" cninode "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" @@ -32,6 +33,9 @@ type Manager interface { GetNodeList() (*v1.NodeList, error) GetCNINode(node *v1.Node) (*cninode.CNINode, error) GetCNINodeList() (*cninode.CNINodeList, error) + GetInstanceID(node *v1.Node) string + DeleteCNINode(cniNode *cninode.CNINode) error + UpdateCNINode(oldCNINode, newCNINode *cninode.CNINode) error } type defaultManager struct { @@ -117,3 +121,21 @@ func (d *defaultManager) GetNodeList() (*v1.NodeList, error) { err := d.k8sClient.List(context.TODO(), list) return list, err } + +func (d *defaultManager) GetInstanceID(node *v1.Node) string { + if node.Spec.ProviderID != "" { + id := strings.Split(node.Spec.ProviderID, "/") + return id[len(id)-1] + } + return "" +} + +func (d *defaultManager) DeleteCNINode(cniNode *cninode.CNINode) error { + err := d.k8sClient.Delete(context.Background(), cniNode) + return err +} + +func (d *defaultManager) UpdateCNINode(oldCNINode, newCNINode *cninode.CNINode) error { + err := d.k8sClient.Patch(context.Background(), newCNINode, client.MergeFromWithOptions(oldCNINode, client.MergeFromWithOptimisticLock{})) + return err +} diff --git a/test/framework/resource/k8s/node/wrapper.go b/test/framework/resource/k8s/node/wrapper.go index e9cff281..e20ec448 100644 --- a/test/framework/resource/k8s/node/wrapper.go +++ b/test/framework/resource/k8s/node/wrapper.go @@ -15,30 +15,61 @@ package node import ( "context" + "fmt" + cninode "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" ) -func GetNodeAndWaitTillCapacityPresent(manager Manager, ctx context.Context, os string, expectedResource string) *v1.NodeList { - +func GetNodeAndWaitTillCapacityPresent(manager Manager, os string, expectedResource string) *v1.NodeList { observedNodeList := &v1.NodeList{} var err error - err = wait.Poll(utils.PollIntervalShort, utils.ResourceCreationTimeout, func() (bool, error) { - By("checking nodes have capacity present") - observedNodeList, err = manager.GetNodesWithOS(os) - Expect(err).ToNot(HaveOccurred()) - for _, node := range observedNodeList.Items { - _, found := node.Status.Allocatable[v1.ResourceName(expectedResource)] - if !found { - return false, nil + err = wait.PollUntilContextTimeout(context.Background(), utils.PollIntervalShort, utils.ResourceCreationTimeout, true, + func(ctx context.Context) (bool, error) { + By("checking nodes have capacity present") + observedNodeList, err = manager.GetNodesWithOS(os) + Expect(err).ToNot(HaveOccurred()) + for _, node := range observedNodeList.Items { + _, found := node.Status.Allocatable[v1.ResourceName(expectedResource)] + if !found { + return false, nil + } } - } - return true, nil - }) + return true, nil + }) Expect(err).ToNot(HaveOccurred()) return observedNodeList } + +// VerifyCNINodeCount checks if the number of CNINodes is equal to number of nodes in the cluster, and verifies 1:1 mapping between CNINode and Node objects +// Returns nil if count and 1:1 mapping exists, else returns error +func VerifyCNINodeCount(manager Manager) error { + cniNodes, err := manager.GetCNINodeList() + Expect(err).NotTo(HaveOccurred()) + nodes, err := manager.GetNodeList() + Expect(err).NotTo(HaveOccurred()) + By("checking number of CNINodes match number of nodes in the cluster") + isEqual := len(nodes.Items) == len(cniNodes.Items) + if !isEqual { + return fmt.Errorf("number of CNINodes does not match number of nodes in the cluster") + } + + By("checking CNINode list matches node list") + nameMatched := true + for _, node := range nodes.Items { + if !lo.ContainsBy(cniNodes.Items, func(cniNode cninode.CNINode) bool { + return cniNode.Name == node.Name + }) { + nameMatched = false + } + } + if !nameMatched { + return fmt.Errorf("CNINode list does not match node list") + } + return nil +} diff --git a/test/integration/cninode/cninode_suite_test.go b/test/integration/cninode/cninode_suite_test.go new file mode 100644 index 00000000..67b27287 --- /dev/null +++ b/test/integration/cninode/cninode_suite_test.go @@ -0,0 +1,45 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cninode_test + +import ( + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/node" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCNINode(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "CNINode Test Suite") +} + +var frameWork *framework.Framework +var _ = BeforeSuite(func() { + By("creating a framework") + frameWork = framework.New(framework.GlobalOptions) + + By("verify CNINode count") + err := node.VerifyCNINodeCount(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) +}) + +// Verify CNINode count before and after test remains same +var _ = AfterSuite(func() { + By("verify CNINode count") + err := node.VerifyCNINodeCount(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) +}) diff --git a/test/integration/cninode/cninode_test.go b/test/integration/cninode/cninode_test.go new file mode 100644 index 00000000..140cd0e6 --- /dev/null +++ b/test/integration/cninode/cninode_test.go @@ -0,0 +1,183 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cninode_test + +import ( + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/node" + testUtils "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +var _ = Describe("CNINode test", func() { + Describe("CNINode count verification on adding or removing node", func() { + var oldMinSize int64 + var oldMaxSize int64 + var newMinSize int64 + var newMaxSize int64 + var asgName string + BeforeEach(func() { + By("getting autoscaling group name") + asgName = ListNodesAndGetAutoScalingGroupName() + asg, err := frameWork.AutoScalingManager.DescribeAutoScalingGroup(asgName) + Expect(err).ToNot(HaveOccurred()) + oldMinSize = *asg[0].MinSize + oldMaxSize = *asg[0].MaxSize + }) + AfterEach(func() { + By("restoring ASG minSize & maxSize after test") + err := frameWork.AutoScalingManager.UpdateAutoScalingGroup(asgName, oldMinSize, oldMaxSize) + Expect(err).ToNot(HaveOccurred()) + // sleep to allow ASG to be updated + time.Sleep(testUtils.ResourceCreationTimeout) + }) + + Context("when new node is added", func() { + It("it should create new CNINode", func() { + newMinSize = oldMinSize + 1 + newMaxSize = oldMaxSize + 1 + // Update ASG to set new minSize and new maxSize + By("adding new node") + err := frameWork.AutoScalingManager.UpdateAutoScalingGroup(asgName, newMinSize, newMaxSize) + Expect(err).ToNot(HaveOccurred()) + // sleep to allow ASG to be updated + time.Sleep(testUtils.ResourceCreationTimeout) + + err = node.VerifyCNINodeCount(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("when existing node is removed", func() { + It("it should delete CNINode", func() { + newMinSize = oldMinSize - 1 + newMaxSize = oldMaxSize - 1 + // Update ASG to set new minSize and new maxSize + By("removing existing node") + err := frameWork.AutoScalingManager.UpdateAutoScalingGroup(asgName, newMinSize, newMaxSize) + Expect(err).ToNot(HaveOccurred()) + // sleep to allow ASG to be updated + time.Sleep(testUtils.ResourceCreationTimeout) + err = node.VerifyCNINodeCount(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) + + }) + }) + }) + + Describe("CNINode is re-created when node exists", func() { + Context("when CNINode is deleted but node exists", func() { + It("it should re-create CNINode", func() { + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + cniNode, err := frameWork.NodeManager.GetCNINode(&nodeList.Items[0]) + Expect(err).ToNot(HaveOccurred()) + err = frameWork.NodeManager.DeleteCNINode(cniNode) + Expect(err).ToNot(HaveOccurred()) + time.Sleep(testUtils.PollIntervalShort) // allow time to re-create CNINode + _, err = frameWork.NodeManager.GetCNINode(&nodeList.Items[0]) + Expect(err).ToNot(HaveOccurred()) + VerifyCNINodeFields(cniNode) + }) + }) + + }) + + Describe("CNINode update tests", func() { + var cniNode *v1alpha1.CNINode + var node *v1.Node + BeforeEach(func() { + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + node = &nodeList.Items[0] + cniNode, err = frameWork.NodeManager.GetCNINode(node) + Expect(err).ToNot(HaveOccurred()) + VerifyCNINodeFields(cniNode) + }) + AfterEach(func() { + time.Sleep(testUtils.PollIntervalShort) + newCNINode, err := frameWork.NodeManager.GetCNINode(node) + Expect(err).ToNot(HaveOccurred()) + // Verify CNINode after update matches CNINode before update + Expect(newCNINode).To(BeComparableTo(cniNode, cmp.Options{ + cmpopts.IgnoreTypes(metav1.TypeMeta{}), + cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion", "Generation", "ManagedFields"), + })) + }) + + Context("when finalizer is removed", func() { + It("it should add the finalizer", func() { + cniNodeCopy := cniNode.DeepCopy() + controllerutil.RemoveFinalizer(cniNodeCopy, config.NodeTerminationFinalizer) + err := frameWork.NodeManager.UpdateCNINode(cniNode, cniNodeCopy) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("when Tags is removed", func() { + It("it should add the Tags", func() { + cniNodeCopy := cniNode.DeepCopy() + cniNodeCopy.Spec.Tags = map[string]string{} + err := frameWork.NodeManager.UpdateCNINode(cniNode, cniNodeCopy) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("when Label is removed", func() { + It("it should add the label", func() { + cniNodeCopy := cniNode.DeepCopy() + cniNodeCopy.ObjectMeta.Labels = map[string]string{} + err := frameWork.NodeManager.UpdateCNINode(cniNode, cniNodeCopy) + Expect(err).ToNot(HaveOccurred()) + }) + }) + }) + +}) + +func ListNodesAndGetAutoScalingGroupName() string { + By("getting instance details") + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeList.Items).ToNot(BeEmpty()) + instanceID := frameWork.NodeManager.GetInstanceID(&nodeList.Items[0]) + Expect(instanceID).ToNot(BeEmpty()) + instance, err := frameWork.EC2Manager.GetInstanceDetails(instanceID) + Expect(err).ToNot(HaveOccurred()) + tags := utils.GetTagKeyValueMap(instance.Tags) + val, ok := tags["aws:autoscaling:groupName"] + Expect(ok).To(BeTrue()) + return val +} + +// Verify finalizer, tag, and label is set on new CNINode +func VerifyCNINodeFields(cniNode *v1alpha1.CNINode) { + By("verifying finalizer is set") + Expect(cniNode.ObjectMeta.Finalizers).To(ContainElement(config.NodeTerminationFinalizer)) + // For maps, ContainElement searches through the map's values. + By("verifying cluster name tag is set") + Expect(cniNode.Spec.Tags).To(ContainElement(frameWork.Options.ClusterName)) + Expect(config.CNINodeClusterNameKey).To(BeKeyOf(cniNode.Spec.Tags)) + + By("verifying node OS label is set") + Expect(cniNode.ObjectMeta.Labels).To(ContainElement(config.OSLinux)) + Expect(config.NodeLabelOS).To(BeKeyOf(cniNode.ObjectMeta.Labels)) +} diff --git a/test/integration/ec2api/ec2api_suite_test.go b/test/integration/ec2api/ec2api_suite_test.go new file mode 100644 index 00000000..1183cc7d --- /dev/null +++ b/test/integration/ec2api/ec2api_suite_test.go @@ -0,0 +1,46 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. +package ec2api_test + +import ( + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestEc2api(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "EC2API Suite") +} + +var frameWork *framework.Framework +var nodeListLen int +var _ = BeforeSuite(func() { + By("creating a framework") + frameWork = framework.New(framework.GlobalOptions) + By("verify node count before test") + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + nodeListLen = len(nodeList.Items) + Expect(nodeListLen).To(BeNumerically(">", 1)) +}) + +var _ = AfterSuite(func() { + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + By("verifying node count after test is unchanged") + Expect(len(nodeList.Items)).To(Equal(nodeListLen)) +}) diff --git a/test/integration/ec2api/ec2api_test.go b/test/integration/ec2api/ec2api_test.go new file mode 100644 index 00000000..4926b450 --- /dev/null +++ b/test/integration/ec2api/ec2api_test.go @@ -0,0 +1,128 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. +package ec2api_test + +import ( + "strings" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials/stscreds" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// requires AmazonEKSVPCResourceController policy to be attached to the EKS cluster role +var _ = Describe("[LOCAL] Test IAM permissions for EC2 API calls", func() { + var instanceID string + var subnetID string + var instanceType string + var nwInterfaceID string + var err error + BeforeEach(func() { + By("getting instance details") + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeList.Items).ToNot(BeEmpty()) + instanceID = frameWork.NodeManager.GetInstanceID(&nodeList.Items[0]) + ec2Instance, err := frameWork.EC2Manager.GetInstanceDetails(instanceID) + Expect(err).ToNot(HaveOccurred()) + subnetID = *ec2Instance.SubnetId + instanceType = *ec2Instance.InstanceType + + }) + AfterEach(func() { + By("deleting test interface") + err = frameWork.EC2Manager.DeleteNetworkInterface(nwInterfaceID) + Expect(err).ToNot(HaveOccurred()) + }) + Describe("Test DeleteNetworkInterface permission", func() { + Context("when instance is terminated", func() { + It("it should only delete ENIs provisioned by the controller or vpc-cni", func() { + By("creating test ENI without eks:eni:owner tag and attach to EC2 instance") + nwInterfaceID, err = frameWork.EC2Manager.CreateAndAttachNetworkInterface(subnetID, instanceID, instanceType) + Expect(err).ToNot(HaveOccurred()) + By("terminating the instance and sleeping") + err = frameWork.EC2Manager.TerminateInstances(instanceID) + Expect(err).ToNot(HaveOccurred()) + // allow time for instance to be deleted and ENI to be available, new node to be ready + time.Sleep(utils.ResourceCreationTimeout) + By("verifying ENI is not deleted by controller") + err = frameWork.EC2Manager.DescribeNetworkInterface(nwInterfaceID) + Expect(err).ToNot(HaveOccurred()) + }) + }) + }) + Describe("Test CreateNetworkInterfacePermission permission", func() { + var ec2Client *ec2.EC2 + var accountID string + var wantErr bool + JustBeforeEach(func() { + arnSplit := strings.Split(frameWork.Options.ClusterRoleArn, ":") + accountID = arnSplit[len(arnSplit)-2] + By("assuming EKS cluster role") + sess := session.Must(session.NewSession()) + creds := stscreds.NewCredentials(sess, frameWork.Options.ClusterRoleArn) + ec2Client = ec2.New(sess, &aws.Config{Credentials: creds}) + }) + JustAfterEach(func() { + By("creating network interface permission") + _, err = ec2Client.CreateNetworkInterfacePermission(&ec2.CreateNetworkInterfacePermissionInput{ + AwsAccountId: aws.String(accountID), + NetworkInterfaceId: aws.String(nwInterfaceID), + Permission: aws.String(ec2.InterfacePermissionTypeInstanceAttach), + }) + By("validating error is nil or as expected") + Expect(err != nil).To(Equal(wantErr)) + }) + Context("CreateNetworkInterfacePermission on ENI WITH required tag eks:eni:owner=eks-vpc-resource-controller", func() { + It("it should not grant CreateNetworkInterfacePermission", func() { + By("creating network interface") + nwInterfaceOp, err := ec2Client.CreateNetworkInterface(&ec2.CreateNetworkInterfaceInput{ + SubnetId: aws.String(subnetID), + TagSpecifications: []*ec2.TagSpecification{ + { + ResourceType: aws.String(ec2.ResourceTypeNetworkInterface), + Tags: []*ec2.Tag{ + { + Key: aws.String(config.NetworkInterfaceOwnerTagKey), + Value: aws.String((config.NetworkInterfaceOwnerTagValue)), + }, + }, + }, + }, + Description: aws.String("VPC-Resource-Controller integration test ENI"), + }) + Expect(err).ToNot(HaveOccurred()) + nwInterfaceID = *nwInterfaceOp.NetworkInterface.NetworkInterfaceId + wantErr = false + }) + }) + Context("CreateNetworkInterfacePermission on ENI WITHOUT required tag eks:eni:owner=eks-vpc-resource-controller", func() { + It("it should not grant CreateNetworkInterfacePermission", func() { + By("creating network interface") + nwInterfaceOp, err := ec2Client.CreateNetworkInterface(&ec2.CreateNetworkInterfaceInput{ + SubnetId: aws.String(subnetID), + Description: aws.String("VPC-Resource-Controller integration test ENI"), + }) + Expect(err).ToNot(HaveOccurred()) + nwInterfaceID = *nwInterfaceOp.NetworkInterface.NetworkInterfaceId + wantErr = true + }) + }) + }) +}) diff --git a/test/integration/perpodsg/perpodsg_suite_test.go b/test/integration/perpodsg/perpodsg_suite_test.go index c31c6003..ad745c2e 100644 --- a/test/integration/perpodsg/perpodsg_suite_test.go +++ b/test/integration/perpodsg/perpodsg_suite_test.go @@ -50,8 +50,10 @@ var _ = BeforeSuite(func() { securityGroupID1 = reCreateSGIfAlreadyExists(utils.ResourceNamePrefix + "sg-1") securityGroupID2 = reCreateSGIfAlreadyExists(utils.ResourceNamePrefix + "sg-2") - nodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "linux", + nodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "linux", config.ResourceNamePodENI) + err = node.VerifyCNINodeCount(frameWork.NodeManager) + Expect(err).ToNot(HaveOccurred()) }) var _ = AfterSuite(func() { diff --git a/test/integration/perpodsg/perpodsg_test.go b/test/integration/perpodsg/perpodsg_test.go index 4c34ddf8..ef8f51e4 100644 --- a/test/integration/perpodsg/perpodsg_test.go +++ b/test/integration/perpodsg/perpodsg_test.go @@ -37,29 +37,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -var _ = Describe("CNINode Veification", func() { - Describe("verify CNINode mapping to nodes", func() { - Context("when nodes are ready", func() { - It("should have same number of CNINode no matter which mode", func() { - cniNodes, err := frameWork.NodeManager.GetCNINodeList() - Expect(err).NotTo(HaveOccurred()) - nodes, err := frameWork.NodeManager.GetNodeList() - Expect(err).NotTo(HaveOccurred()) - Expect(len(nodes.Items)).To(Equal(len(cniNodes.Items))) - nameMatched := true - for _, node := range nodes.Items { - if !lo.ContainsBy(cniNodes.Items, func(cniNode cninode.CNINode) bool { - return cniNode.Name == node.Name - }) { - nameMatched = false - } - } - Expect(nameMatched).To(BeTrue()) - }) - }) - }) -}) - var _ = Describe("Branch ENI Pods", func() { var ( securityGroupPolicy *v1beta1.SecurityGroupPolicy diff --git a/test/integration/windows/windows_suite_test.go b/test/integration/windows/windows_suite_test.go index 60b147d1..bebd99eb 100644 --- a/test/integration/windows/windows_suite_test.go +++ b/test/integration/windows/windows_suite_test.go @@ -67,7 +67,7 @@ var _ = BeforeSuite(func() { } By("getting the list of Windows node") - windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "windows", + windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "windows", config.ResourceNameIPAddress) By("getting the instance ID for the first node") diff --git a/test/integration/windows/windows_test.go b/test/integration/windows/windows_test.go index 24962b4a..14a83489 100644 --- a/test/integration/windows/windows_test.go +++ b/test/integration/windows/windows_test.go @@ -219,7 +219,7 @@ var _ = Describe("Windows Integration Test", func() { bufferForCoolDown = time.Second * 30 - windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, ctx, "windows", + windowsNodeList = node.GetNodeAndWaitTillCapacityPresent(frameWork.NodeManager, "windows", config.ResourceNameIPAddress) instanceID = manager.GetNodeInstanceID(&windowsNodeList.Items[0]) nodeName = windowsNodeList.Items[0].Name