From 9c212ebede58bc681ea6ab3558fc9d92dcfa9033 Mon Sep 17 00:00:00 2001 From: Federico Paolinelli Date: Thu, 13 May 2021 14:07:34 +0200 Subject: [PATCH] Add a sanity check test suite This adds a "validation" test suite that perform very basic tests. It checks that all the crds are deployed, and that the pods are running correctly. This serves two purpouses: - having a toggle to understand when a deployment finished (implemented by the make deploy-wait rule) - having a set of tests able to prevent DOA releases, verifying that the operator deploys correctly (even without needing a baremetal cluster) Signed-off-by: Federico Paolinelli --- Makefile | 8 +- hack/deploy-wait.sh | 25 ++++ hack/run-e2e-conformance.sh | 4 +- test/conformance/tests/sriov_operator.go | 18 +-- test/util/client/clients.go | 2 + test/validation/test_suite_test.go | 55 +++++++++ test/validation/tests/validation.go | 143 +++++++++++++++++++++++ 7 files changed, 243 insertions(+), 12 deletions(-) create mode 100755 hack/deploy-wait.sh create mode 100644 test/validation/test_suite_test.go create mode 100644 test/validation/tests/validation.go diff --git a/Makefile b/Makefile index 201afb06d..99fc04a0b 100644 --- a/Makefile +++ b/Makefile @@ -200,7 +200,10 @@ deploy-setup-k8s: export CLUSTER_TYPE=kubernetes deploy-setup-k8s: deploy-setup test-e2e-conformance: - ./hack/run-e2e-conformance.sh + SUITE=./test/conformance ./hack/run-e2e-conformance.sh + +test-e2e-validation-only: + SUITE=./test/validation ./hack/run-e2e-conformance.sh test-e2e: generate vet manifests skopeo mkdir -p ${ENVTEST_ASSETS_DIR} @@ -220,6 +223,9 @@ test-%: generate vet manifests # deploy-setup-k8s: export CNI_BIN_PATH=/opt/cni/bin # test-e2e-k8s: test-e2e +deploy-wait: + hack/deploy-wait.sh + undeploy: uninstall @hack/undeploy.sh $(NAMESPACE) diff --git a/hack/deploy-wait.sh b/hack/deploy-wait.sh new file mode 100755 index 000000000..d4e66281e --- /dev/null +++ b/hack/deploy-wait.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +ATTEMPTS=0 +MAX_ATTEMPTS=72 +ready=false +sleep_time=10 + +until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ] +do + echo "running tests" + if SUITE=./test/validation ./hack/run-e2e-conformance.sh; then + echo "succeeded" + ready=true + else + echo "failed, retrying" + sleep $sleep_time + fi + (( ATTEMPTS++ )) +done + +if ! $ready; then + echo "Timed out waiting for features to be ready" + oc get nodes + exit 1 +fi diff --git a/hack/run-e2e-conformance.sh b/hack/run-e2e-conformance.sh index 10250c38b..13c4e3248 100755 --- a/hack/run-e2e-conformance.sh +++ b/hack/run-e2e-conformance.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -x which ginkgo if [ $? -ne 0 ]; then # we are moving to a temp folder as in go.mod we have a dependency that is not @@ -18,4 +18,4 @@ GOPATH="${GOPATH:-~/go}" JUNIT_OUTPUT="${JUNIT_OUTPUT:-/tmp/artifacts/unit_report.xml}" export PATH=$PATH:$GOPATH/bin -GOFLAGS=-mod=vendor ginkgo ./test/conformance -- -junit $JUNIT_OUTPUT +GOFLAGS=-mod=vendor ginkgo "$SUITE" -- -junit $JUNIT_OUTPUT diff --git a/test/conformance/tests/sriov_operator.go b/test/conformance/tests/sriov_operator.go index 197c9034a..c18f3e9f6 100644 --- a/test/conformance/tests/sriov_operator.go +++ b/test/conformance/tests/sriov_operator.go @@ -90,7 +90,7 @@ var _ = Describe("[sriov] operator", func() { Expect(err).ToNot(HaveOccurred()) err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) Expect(err).ToNot(HaveOccurred()) - waitForSRIOVStable() + WaitForSRIOVStable() sriovInfos, err = cluster.DiscoverSriov(clients, operatorNamespace) Expect(err).ToNot(HaveOccurred()) initPassed = true @@ -194,7 +194,7 @@ var _ = Describe("[sriov] operator", func() { } else { node = sriovInfos.Nodes[0] createVanillaNetworkPolicy(node, sriovInfos, numVfs, resourceName) - waitForSRIOVStable() + WaitForSRIOVStable() sriovDevice, err = sriovInfos.FindOneSriovDevice(node) Expect(err).ToNot(HaveOccurred()) @@ -854,7 +854,7 @@ var _ = Describe("[sriov] operator", func() { BeforeEach(func() { err := namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) Expect(err).ToNot(HaveOccurred()) - waitForSRIOVStable() + WaitForSRIOVStable() }) Describe("Configuration", func() { @@ -888,7 +888,7 @@ var _ = Describe("[sriov] operator", func() { }))) By("waiting the sriov to be stable on the node") - waitForSRIOVStable() + WaitForSRIOVStable() By("waiting for the resources to be available") Eventually(func() int64 { @@ -935,7 +935,7 @@ var _ = Describe("[sriov] operator", func() { })), }))) - waitForSRIOVStable() + WaitForSRIOVStable() Eventually(func() int64 { testedNode, err := clients.Nodes().Get(context.Background(), vfioNode, metav1.GetOptions{}) @@ -1223,7 +1223,7 @@ var _ = Describe("[sriov] operator", func() { err = clients.Create(context.Background(), mtuPolicy) Expect(err).ToNot(HaveOccurred()) - waitForSRIOVStable() + WaitForSRIOVStable() By("waiting for the resources to be available") Eventually(func() int64 { testedNode, err := clients.Nodes().Get(context.Background(), node, metav1.GetOptions{}) @@ -1373,7 +1373,7 @@ var _ = Describe("[sriov] operator", func() { }))) By("waiting the sriov to be stable on the node") - waitForSRIOVStable() + WaitForSRIOVStable() By("waiting for the resources to be available") Eventually(func() int64 { @@ -1767,7 +1767,7 @@ func daemonsScheduledOnNodes(selector string) bool { func createSriovPolicy(sriovDevice string, testNode string, numVfs int, resourceName string) { _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, sriovDevice, testNode, numVfs, resourceName, "netdevice") Expect(err).ToNot(HaveOccurred()) - waitForSRIOVStable() + WaitForSRIOVStable() Eventually(func() int64 { testedNode, err := clients.Nodes().Get(context.Background(), testNode, metav1.GetOptions{}) @@ -1860,7 +1860,7 @@ func pingPod(ip string, nodeSelector string, sriovNetworkAttachment string) { }, 3*time.Minute, 1*time.Second).Should(Equal(k8sv1.PodSucceeded)) } -func waitForSRIOVStable() { +func WaitForSRIOVStable() { // This used to be to check for sriov not to be stable first, // then stable. The issue is that if no configuration is applied, then // the status won't never go to not stable and the test will fail. diff --git a/test/util/client/clients.go b/test/util/client/clients.go index a063e6130..029f9c131 100644 --- a/test/util/client/clients.go +++ b/test/util/client/clients.go @@ -10,6 +10,7 @@ import ( clientsriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/typed/sriovnetwork/v1" clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" clientmachineconfigv1 "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned/typed/machineconfiguration.openshift.io/v1" + apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" discovery "k8s.io/client-go/discovery" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -68,6 +69,7 @@ func New(kubeconfig string) *ClientSet { clientgoscheme.AddToScheme(crScheme) netattdefv1.SchemeBuilder.AddToScheme(crScheme) sriovv1.AddToScheme(crScheme) + apiext.AddToScheme(crScheme) clientSet.Client, err = runtimeclient.New(config, client.Options{ Scheme: crScheme, diff --git a/test/validation/test_suite_test.go b/test/validation/test_suite_test.go new file mode 100644 index 000000000..c7e85bab2 --- /dev/null +++ b/test/validation/test_suite_test.go @@ -0,0 +1,55 @@ +package conformance + +import ( + "flag" + "fmt" + "os" + "testing" + + . "github.com/onsi/ginkgo" + "github.com/onsi/ginkgo/reporters" + . "github.com/onsi/gomega" + + testclient "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/client" + + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/k8sreporter" + _ "github.com/k8snetworkplumbingwg/sriov-network-operator/test/validation/tests" +) + +var ( + junitPath *string + dumpOutput *bool +) + +func init() { + junitPath = flag.String("junit", "junit.xml", "the path for the junit format report") + dumpOutput = flag.Bool("dump", false, "dump informations for failed tests") +} + +func TestTest(t *testing.T) { + RegisterFailHandler(Fail) + + rr := []Reporter{} + if junitPath != nil { + rr = append(rr, reporters.NewJUnitReporter(*junitPath)) + } + + reporterFile := os.Getenv("REPORTER_OUTPUT") + + clients := testclient.New("") + + if reporterFile != "" { + f, err := os.OpenFile(reporterFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to open the file: %v\n", err) + return + } + defer f.Close() + rr = append(rr, k8sreporter.New(clients, f)) + + } else if *dumpOutput { + rr = append(rr, k8sreporter.New(clients, os.Stdout)) + } + + RunSpecsWithDefaultAndCustomReporters(t, "SRIOV Operator validation tests", rr) +} diff --git a/test/validation/tests/validation.go b/test/validation/tests/validation.go new file mode 100644 index 000000000..999cb32fe --- /dev/null +++ b/test/validation/tests/validation.go @@ -0,0 +1,143 @@ +package tests + +import ( + "context" + "fmt" + "os" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + goclient "sigs.k8s.io/controller-runtime/pkg/client" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + testclient "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/client" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" +) + +var ( + machineConfigPoolNodeSelector string +) + +var ( + clients *testclient.ClientSet + operatorNamespace string +) + +func init() { + operatorNamespace = os.Getenv("OPERATOR_NAMESPACE") + if operatorNamespace == "" { + operatorNamespace = "openshift-sriov-network-operator" + } + + clients = testclient.New("") +} + +const ( + sriovOperatorDeploymentName = "sriov-network-operator" + // SriovNetworkNodePolicies contains the name of the sriov network node policies CRD + sriovNetworkNodePolicies = "sriovnetworknodepolicies.sriovnetwork.openshift.io" + // sriovNetworkNodeStates contains the name of the sriov network node state CRD + sriovNetworkNodeStates = "sriovnetworknodestates.sriovnetwork.openshift.io" + // sriovNetworks contains the name of the sriov network CRD + sriovNetworks = "sriovnetworks.sriovnetwork.openshift.io" + // sriovOperatorConfigs contains the name of the sriov Operator config CRD + sriovOperatorConfigs = "sriovoperatorconfigs.sriovnetwork.openshift.io" +) + +func init() { + roleWorkerCNF := os.Getenv("ROLE_WORKER_CNF") + if roleWorkerCNF == "" { + roleWorkerCNF = "worker-cnf" + } + + machineConfigPoolNodeSelector = fmt.Sprintf("node-role.kubernetes.io/%s", roleWorkerCNF) +} + +var _ = Describe("validation", func() { + + Context("sriov", func() { + It("should have the sriov namespace", func() { + _, err := clients.Namespaces().Get(context.Background(), operatorNamespace, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should have the sriov operator deployment in running state", func() { + deploy, err := clients.Deployments(operatorNamespace).Get(context.Background(), sriovOperatorDeploymentName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(deploy.Status.Replicas).To(Equal(deploy.Status.ReadyReplicas)) + + pods, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("name=%s", sriovOperatorDeploymentName)}) + Expect(err).ToNot(HaveOccurred()) + + Expect(len(pods.Items)).To(Equal(1)) + Expect(pods.Items[0].Status.Phase).To(Equal(corev1.PodRunning)) + }) + + It("Should have the sriov CRDs available in the cluster", func() { + crd := &apiext.CustomResourceDefinition{} + err := clients.Client.Get(context.TODO(), goclient.ObjectKey{Name: sriovNetworkNodePolicies}, crd) + Expect(err).ToNot(HaveOccurred()) + + err = clients.Client.Get(context.TODO(), goclient.ObjectKey{Name: sriovNetworkNodeStates}, crd) + Expect(err).ToNot(HaveOccurred()) + + err = clients.Client.Get(context.TODO(), goclient.ObjectKey{Name: sriovNetworks}, crd) + Expect(err).ToNot(HaveOccurred()) + + err = clients.Client.Get(context.TODO(), goclient.ObjectKey{Name: sriovOperatorConfigs}, crd) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should deploy the injector pod if requested", func() { + operatorConfig := &sriovv1.SriovOperatorConfig{} + err := clients.Client.Get(context.TODO(), goclient.ObjectKey{Name: "default", Namespace: operatorNamespace}, operatorConfig) + Expect(err).ToNot(HaveOccurred()) + + if *operatorConfig.Spec.EnableInjector { + daemonset, err := clients.DaemonSets(operatorNamespace).Get(context.Background(), "network-resources-injector", metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(daemonset.Status.DesiredNumberScheduled).To(Equal(daemonset.Status.NumberReady)) + } else { + _, err := clients.DaemonSets(operatorNamespace).Get(context.Background(), "network-resources-injector", metav1.GetOptions{}) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(BeTrue()) + } + }) + + It("should deploy the operator webhook if requested", func() { + operatorConfig := &sriovv1.SriovOperatorConfig{} + err := clients.Get(context.TODO(), goclient.ObjectKey{Name: "default", Namespace: operatorNamespace}, operatorConfig) + Expect(err).ToNot(HaveOccurred()) + + if *operatorConfig.Spec.EnableOperatorWebhook { + daemonset, err := clients.DaemonSets(operatorNamespace).Get(context.Background(), "operator-webhook", metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(daemonset.Status.DesiredNumberScheduled).To(Equal(daemonset.Status.NumberReady)) + } else { + _, err := clients.DaemonSets(operatorNamespace).Get(context.Background(), "operator-webhook", metav1.GetOptions{}) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(BeTrue()) + } + }) + + It("should have SR-IOV node statuses not in progress", func() { + CheckStable() + }) + }) +}) + +func CheckStable() { + res, err := cluster.SriovStable(operatorNamespace, clients) + Expect(err).ToNot(HaveOccurred()) + Expect(res).To(BeTrue(), "SR-IOV status is not stable") + + isClusterReady, err := cluster.IsClusterStable(clients) + Expect(err).ToNot(HaveOccurred()) + Expect(isClusterReady).To(BeTrue(), "Cluster is not stable") +}