From 20d88f4a1dc3c7a7addd763069722473d4d97299 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Tue, 27 Feb 2024 14:49:19 +0100 Subject: [PATCH] e2e: Ensure operator restart quickly Add CoordinationV1 to `test/util/clients.go` to make assertions on `coordination.k8s.io/Lease` objects. Add `OPERATOR_LEADER_ELECTION_ENABLE` environment variable to `deploy/operator.yaml` to let user enable leader election on the operator. Signed-off-by: Andrea Panattoni --- deploy/operator.yaml | 2 + deploy/role.yaml | 6 +++ .../templates/role.yaml | 6 +++ hack/env.sh | 1 + hack/run-e2e-conformance-virtual-ocp.sh | 1 + main.go | 2 +- pkg/consts/constants.go | 1 + test/conformance/tests/test_sriov_operator.go | 46 +++++++++++++++++-- test/util/client/clients.go | 3 ++ 9 files changed, 64 insertions(+), 4 deletions(-) diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 8d9f88cf2..576dd3f34 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -41,6 +41,8 @@ spec: image: $SRIOV_NETWORK_OPERATOR_IMAGE command: - sriov-network-operator + args: + - --leader-elect=$OPERATOR_LEADER_ELECTION_ENABLE resources: requests: cpu: 100m diff --git a/deploy/role.yaml b/deploy/role.yaml index 409286ab5..a24f13729 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -56,6 +56,12 @@ rules: - get - list - watch +- apiGroups: + - 'coordination.k8s.io' + resources: + - 'leases' + verbs: + - '*' --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role diff --git a/deployment/sriov-network-operator/templates/role.yaml b/deployment/sriov-network-operator/templates/role.yaml index 6058a86e1..29cf80cce 100644 --- a/deployment/sriov-network-operator/templates/role.yaml +++ b/deployment/sriov-network-operator/templates/role.yaml @@ -59,6 +59,12 @@ rules: - get - list - watch + - apiGroups: + - 'coordination.k8s.io' + resources: + - 'leases' + verbs: + - '*' --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role diff --git a/hack/env.sh b/hack/env.sh index 9dc547eff..8c12bc1e8 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -46,3 +46,4 @@ export ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED=${ADMISSION_CONTR export ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT=${ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT:-""} export ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT=${ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT:-""} export DEV_MODE=${DEV_MODE:-"FALSE"} +export OPERATOR_LEADER_ELECTION_ENABLE=${OPERATOR_LEADER_ELECTION_ENABLE:-"false"} diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index 03d788e0d..20859c669 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -189,6 +189,7 @@ export OPERATOR_EXEC=kubectl export CLUSTER_TYPE=openshift export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE +export OPERATOR_LEADER_ELECTION_ENABLE=true export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest" export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest" diff --git a/main.go b/main.go index 3b6418e23..0684fb7b8 100644 --- a/main.go +++ b/main.go @@ -114,7 +114,7 @@ func main() { LeaderElectionReleaseOnCancel: true, RenewDeadline: &le.RenewDeadline, RetryPeriod: &le.RetryPeriod, - LeaderElectionID: "a56def2a.openshift.io", + LeaderElectionID: consts.LeaderElectionID, }) if err != nil { setupLog.Error(err, "unable to start leader election manager") diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 8dc071f97..95adda3de 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -35,6 +35,7 @@ const ( ServiceAccount = "ServiceAccount" DPConfigFileName = "config.json" OVSHWOLMachineConfigNameSuffix = "ovs-hw-offload" + LeaderElectionID = "a56def2a.openshift.io" LinkTypeEthernet = "ether" LinkTypeInfiniband = "infiniband" diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index fa2a83cc6..bd7fd13ad 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -28,6 +28,7 @@ import ( runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/clean" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" @@ -276,6 +277,35 @@ var _ = Describe("[sriov] operator", func() { }, 3*time.Minute, 5*time.Second).Should(Succeed()) }) }) + + It("should gracefully restart quickly", func() { + // This test case ensure leader election process runs smoothly when the operator's pod is restarted. + oldLease, err := clients.CoordinationV1Interface.Leases(operatorNamespace).Get(context.Background(), consts.LeaderElectionID, metav1.GetOptions{}) + if k8serrors.IsNotFound(err) { + Skip("Leader Election is not enabled on the cluster. Skipping") + } + Expect(err).ToNot(HaveOccurred()) + + oldOperatorPod := getOperatorPod() + + By("Delete the operator's pod") + deleteOperatorPod() + + By("Wait the new operator's pod to start") + Eventually(func(g Gomega) { + newOperatorPod := getOperatorPod() + Expect(newOperatorPod.Name).ToNot(Equal(oldOperatorPod.Name)) + Expect(newOperatorPod.Status.Phase).To(Equal(corev1.PodRunning)) + }, 45*time.Second, 5*time.Second) + + By("Assert the new operator's pod acquire the lease before 30 seconds") + Eventually(func(g Gomega) { + newLease, err := clients.CoordinationV1Interface.Leases(operatorNamespace).Get(context.Background(), consts.LeaderElectionID, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(newLease.Spec.HolderIdentity).ToNot(Equal(oldLease.Spec.HolderIdentity)) + }, 30*time.Second, 5*time.Second).Should(Succeed()) + }) }) Describe("Generic SriovNetworkNodePolicy", func() { @@ -2743,14 +2773,17 @@ func getOperatorConfigLogLevel() int { return cfg.Spec.LogLevel } -func getOperatorLogs(since time.Time) []string { +func getOperatorPod() corev1.Pod { podList, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "name=sriov-network-operator", }) ExpectWithOffset(1, err).ToNot(HaveOccurred()) - ExpectWithOffset(1, podList.Items).To(HaveLen(1), "One operator pod expected") + ExpectWithOffset(1, podList.Items).ToNot(HaveLen(0), "At least one operator pod expected") + return podList.Items[0] +} - pod := podList.Items[0] +func getOperatorLogs(since time.Time) []string { + pod := getOperatorPod() logStart := metav1.NewTime(since) rawLogs, err := clients.Pods(pod.Namespace). GetLogs(pod.Name, &corev1.PodLogOptions{ @@ -2763,6 +2796,13 @@ func getOperatorLogs(since time.Time) []string { return strings.Split(string(rawLogs), "\n") } +func deleteOperatorPod() { + pod := getOperatorPod() + + err := clients.Pods(operatorNamespace).Delete(context.Background(), pod.Name, metav1.DeleteOptions{}) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) +} + func assertObjectIsNotFound(name string, obj runtimeclient.Object) { Eventually(func() bool { err := clients.Get(context.Background(), runtimeclient.ObjectKey{Name: name, Namespace: operatorNamespace}, obj) diff --git a/test/util/client/clients.go b/test/util/client/clients.go index a96634c19..21eb12053 100644 --- a/test/util/client/clients.go +++ b/test/util/client/clients.go @@ -11,6 +11,7 @@ import ( discovery "k8s.io/client-go/discovery" clientgoscheme "k8s.io/client-go/kubernetes/scheme" appsv1client "k8s.io/client-go/kubernetes/typed/apps/v1" + coordinationv1 "k8s.io/client-go/kubernetes/typed/coordination/v1" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -37,6 +38,7 @@ type ClientSet struct { clientsriovv1.SriovnetworkV1Interface Config *rest.Config runtimeclient.Client + coordinationv1.CoordinationV1Interface } // New returns a *ClientBuilder with the given kubeconfig. @@ -67,6 +69,7 @@ func New(kubeconfig string) *ClientSet { clientSet.AppsV1Interface = appsv1client.NewForConfigOrDie(config) clientSet.DiscoveryInterface = discovery.NewDiscoveryClientForConfigOrDie(config) clientSet.SriovnetworkV1Interface = clientsriovv1.NewForConfigOrDie(config) + clientSet.CoordinationV1Interface = coordinationv1.NewForConfigOrDie(config) clientSet.Config = config crScheme := runtime.NewScheme()