From 0a0b700e42415d2914a4300edcdad8a6f5d41942 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 28 Aug 2020 17:28:21 +0200 Subject: [PATCH] test: add version skew testing This has two facets: - switching the entire driver deployment from one version to another while there is persistent state like active volumes - combining components from different releases in a deployment, which can happen during a rolling update Upgrade and downgrade testing is done in two directions: from 0.6 to master and from master to 0.6. In both cases and for all deployment methods and most volume types, some sanity checks are run: - an unused volume must be deletable - an unused volume must be usable for a pod - a volume used by a pod can be removed Different volume types are covered via test patterns, i.e. each volume type goes through driver downgrade/upgrade separately. Persistent filesystem types are covered in some varieties, including cache volumes. Block and CSI inline volumes only get tested once. This is a compromise between keeping tests small and overall runtime, because reinstalling the driver is slow. Therefore these tests also don't run in our pre-submit testing. Skew testing is done by switching to an old release and replacing the controller image. --- go.mod | 4 +- go.sum | 13 +- test/e2e/deploy/deploy.go | 207 +++++++++++++------- test/e2e/driver/driver.go | 45 +++++ test/e2e/driver/pattern.go | 68 +++++++ test/e2e/e2e_test.go | 1 + test/e2e/storage/csi_volumes.go | 2 + test/e2e/storage/dax/dax.go | 106 +++++++--- test/e2e/versionskew/versionskew.go | 293 ++++++++++++++++++++++++++++ 9 files changed, 634 insertions(+), 105 deletions(-) create mode 100644 test/e2e/driver/pattern.go create mode 100644 test/e2e/versionskew/versionskew.go diff --git a/go.mod b/go.mod index bba90a31fc..02662926a3 100644 --- a/go.mod +++ b/go.mod @@ -65,10 +65,12 @@ replace ( k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.19.0-rc.4 k8s.io/kubectl => k8s.io/kubectl v0.19.0-rc.4 k8s.io/kubelet => k8s.io/kubelet v0.19.0-rc.4 - k8s.io/kubernetes => k8s.io/kubernetes v1.19.0-rc.4 k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.19.0-rc.4 k8s.io/metrics => k8s.io/metrics v0.19.0-rc.4 k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.19.0-rc.4 k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.19.0-rc.4 k8s.io/sample-controller => k8s.io/sample-controller v0.19.0-rc.4 ) + +// We need the fix from https://github.com/kubernetes/kubernetes/pull/94283 +replace k8s.io/kubernetes => github.com/jingxu97/kubernetes v1.3.0-alpha.3.0.20200827160453-c99083e14e00 diff --git a/go.sum b/go.sum index ae6a1f1841..ab22e5852c 100644 --- a/go.sum +++ b/go.sum @@ -54,6 +54,7 @@ github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:l github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/auth0/go-jwt-middleware v0.0.0-20170425171159-5493cabe49f7/go.mod h1:LWMyo4iOLWXHGdBki7NIht1kHru/0wM179h+d3g8ATM= github.com/aws/aws-sdk-go v1.6.10/go.mod h1:ZRmQr0FajVIyZ4ZzBYKG5P3ZqPz9IHG41ZoMu1ADI3k= +github.com/aws/aws-sdk-go v1.28.2 h1:j5IXG9CdyLfcVfICqo1PXVv+rua+QQHbkXuvuU/JF+8= github.com/aws/aws-sdk-go v1.28.2/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -162,6 +163,8 @@ github.com/euank/go-kmsg-parser v2.0.0+incompatible/go.mod h1:MhmAMZ8V4CYH4ybgdR github.com/evanphx/json-patch v0.0.0-20190815234213-e83c0a1c26c8/go.mod h1:pmLOTb3x90VhIKxsA9yeQG5yfOkkKnkk1h+Ql8NDYDw= github.com/evanphx/json-patch v4.5.0+incompatible h1:ouOWdg56aJriqS0huScTkVXPC5IcNrDCXZ6OoTAWu7M= github.com/evanphx/json-patch v4.5.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch v4.9.0+incompatible h1:kLcOMZeuLAJvL2BPWLMIj5oaZQobrkAqrL+WFZwQses= +github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -333,7 +336,10 @@ github.com/imdario/mergo v0.3.9/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/ishidawataru/sctp v0.0.0-20190723014705-7c296d48a2b5/go.mod h1:DM4VvS+hD/kDi1U1QsX2fnZowwBhqD0Dk3bRPKF/Oc8= github.com/jimstudt/http-authentication v0.0.0-20140401203705-3eca13d6893a/go.mod h1:wK6yTYYcgjHE1Z1QtXACPDjcFJyBskHEdagmnq3vsP8= +github.com/jingxu97/kubernetes v1.3.0-alpha.3.0.20200827160453-c99083e14e00 h1:C7PUZC8k1ow7E3Sd9SpKkb+pen7Bd+TahiBm7v5Ta1M= +github.com/jingxu97/kubernetes v1.3.0-alpha.3.0.20200827160453-c99083e14e00/go.mod h1:yhT1/ltQajQsha3tnYc9QPFYSumGM45nlZdjf7WqE1A= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -565,6 +571,8 @@ go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.etcd.io/etcd v0.5.0-alpha.5.0.20200716221620-18dfb9cca345 h1:2gOG36vt1BhUqpzxwZLZJxUim2dHB05vw+RAn4Q6YOU= go.etcd.io/etcd v0.5.0-alpha.5.0.20200716221620-18dfb9cca345/go.mod h1:skWido08r9w6Lq/w70DO5XYIKMu4QFu1+4VsqLQuJy8= +go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 h1:Gqga3zA9tdAcfqobUGjSoCob5L3f8Dt5EuOp3ihNZko= +go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5/go.mod h1:skWido08r9w6Lq/w70DO5XYIKMu4QFu1+4VsqLQuJy8= go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.2/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= @@ -879,14 +887,14 @@ k8s.io/kube-aggregator v0.19.0-rc.4/go.mod h1:WOkoTISv7iVFk7fTTbQUoRpj+4dA/jZLmv k8s.io/kube-controller-manager v0.19.0-rc.4/go.mod h1:WDg85vnU/Kbfuo6wOTPFNb/R9Y25sMM2dh/iAD3UUtI= k8s.io/kube-openapi v0.0.0-20200427153329-656914f816f9 h1:5NC2ITmvg8RoxoH0wgmL4zn4VZqXGsKbxrikjaQx6s4= k8s.io/kube-openapi v0.0.0-20200427153329-656914f816f9/go.mod h1:bfCVj+qXcEaE5SCvzBaqpOySr6tuCcpPKqF6HD8nyCw= +k8s.io/kube-openapi v0.0.0-20200805222855-6aeccd4b50c6 h1:+WnxoVtG8TMiudHBSEtrVL1egv36TkkJm+bA8AxicmQ= +k8s.io/kube-openapi v0.0.0-20200805222855-6aeccd4b50c6/go.mod h1:UuqjUnNftUyPE5H64/qeyjQoUZhGpeFDVdxjTeEVN2o= k8s.io/kube-proxy v0.19.0-rc.4/go.mod h1:2friA88LA6cS23RiBSUa1qMsGcN/IkigqETnIMfZ5YA= k8s.io/kube-scheduler v0.19.0-rc.4 h1:Iv/wtf8xjQl1cNVjPhmuCbPKF4Ei82sSPKYL6E7cGuU= k8s.io/kube-scheduler v0.19.0-rc.4/go.mod h1:v8ypc520PG0qK4DnzDTnHpFdCEY/WZxqd4CIyLI3m98= k8s.io/kubectl v0.19.0-rc.4 h1:x8d3ZWkubQZhM0WZjE8BBDBwWjbzmmznfa6a1NuFIfg= k8s.io/kubectl v0.19.0-rc.4/go.mod h1:Mcv3axnSyOwmupb6XZ06uAwaA+pRKJ525w3dHpH7xLk= k8s.io/kubelet v0.19.0-rc.4/go.mod h1:Shb5kTRpyOO5sY15p/8knPcVky1HHZGM/yPYckKgllI= -k8s.io/kubernetes v1.19.0-rc.4 h1:E3pgtybJgSaKE9T1yurW3a6y/yHbh6C69Ro9GnDqtng= -k8s.io/kubernetes v1.19.0-rc.4/go.mod h1:+wgXVVu9ZLggkhtx861Vq254Y/ZrjuCU9jMzVZ+e/kM= k8s.io/legacy-cloud-providers v0.19.0-rc.4/go.mod h1:KBs0kOkl0NDrAlCE8BIS23PoToVdz3Rd18yQlpc2GGI= k8s.io/metrics v0.19.0-rc.4/go.mod h1:nTrsL5F9u8RRz4Bf3AtfwNoPVahjNWD3Utxwp1oKwoY= k8s.io/sample-apiserver v0.19.0-rc.4/go.mod h1:x/uam2CKHv7dQuDjgoBvaoYxuUEkvFfsDwIFZ81Y6tc= @@ -911,6 +919,7 @@ sigs.k8s.io/kustomize v2.0.3+incompatible/go.mod h1:MkjgH3RdOWrievjo6c9T245dYlB5 sigs.k8s.io/structured-merge-diff/v3 v3.0.0-20200116222232-67a7b8c61874/go.mod h1:PlARxl6Hbt/+BC80dRLi1qAmnMqwqDg62YvvVkZjemw= sigs.k8s.io/structured-merge-diff/v3 v3.0.1-0.20200706213357-43c19bbb7fba h1:AAbnc5KQuTWKuh2QSnyghKIOTFzB0Jayv7/OFDn3Cy4= sigs.k8s.io/structured-merge-diff/v3 v3.0.1-0.20200706213357-43c19bbb7fba/go.mod h1:V06abazjHneE37ZdSY/UUwPVgcJMKI/jU5XGUjgIKoc= +sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/test/e2e/deploy/deploy.go b/test/e2e/deploy/deploy.go index 100dcedecd..0318856534 100644 --- a/test/e2e/deploy/deploy.go +++ b/test/e2e/deploy/deploy.go @@ -188,7 +188,7 @@ func WaitForPMEMDriver(c *Cluster, name, namespace string) (metricsURL string) { func CheckPMEMDriver(c *Cluster, deployment *Deployment) { pods, err := c.cs.CoreV1().Pods(deployment.Namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deployment.Name), + LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deployment.DeploymentLabel()), }, ) framework.ExpectNoError(err, "list PMEM-CSI pods") @@ -210,19 +210,19 @@ func CheckPMEMDriver(c *Cluster, deployment *Deployment) { // RemoveObjects deletes everything that might have been created for a // PMEM-CSI driver or operator installation (pods, daemonsets, // statefulsets, driver info, storage classes, etc.). -func RemoveObjects(c *Cluster, deploymentName string) error { +func RemoveObjects(c *Cluster, deployment *Deployment) error { // Try repeatedly, in case that communication with the API server fails temporarily. deadline, cancel := context.WithTimeout(context.Background(), 3*time.Minute) defer cancel() ticker := time.NewTicker(time.Second) - framework.Logf("deleting the %s PMEM-CSI deployment", deploymentName) + framework.Logf("deleting the %s PMEM-CSI deployment", deployment.Name) for _, h := range uninstallHooks { - h(deploymentName) + h(deployment.Name) } filter := metav1.ListOptions{ - LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deploymentName), + LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deployment.DeploymentLabel()), } infoDelay := 5 * time.Second infoTimestamp := time.Now().Add(infoDelay) @@ -424,7 +424,7 @@ func RemoveObjects(c *Cluster, deploymentName string) error { // check again whether all objects have been deleted. select { case <-deadline.Done(): - return fmt.Errorf("timed out while trying to delete the %s PMEM-CSI deployment", deploymentName) + return fmt.Errorf("timed out while trying to delete the %s PMEM-CSI deployment", deployment.Name) case <-ticker.C: } } @@ -454,6 +454,10 @@ type Deployment struct { // Testing is true when socat pods are available. Testing bool + + // A version of the format X.Y when installing an older + // release from the release-X.Y branch. + Version string } func (d Deployment) DeploymentMode() string { @@ -463,6 +467,18 @@ func (d Deployment) DeploymentMode() string { return "production" } +// DeploymentLabel returns the label used for objects belonging to the deployment. +// It's the same as the name minus the version. The reason for not including +// the version in the label value is that previous releases did not +// have that either. We have to stay consistent with that for up- and downgrade +// testing. +func (d Deployment) DeploymentLabel() string { + if d.Version != "" { + return d.Name[:len(d.Name)-1-len(d.Version)] + } + return d.Name +} + // FindDeployment checks whether there is a PMEM-CSI driver and/or // operator deployment in the cluster. A deployment is found via its // deployment resp. statefulset object, which must have a @@ -488,6 +504,8 @@ func FindDeployment(c *Cluster) (*Deployment, error) { return nil, nil } +var imageVersion = regexp.MustCompile(`pmem-csi-driver(?:-test)?:v(\d+\.\d+)`) + func findDriver(c *Cluster) (*Deployment, error) { list, err := c.cs.AppsV1().StatefulSets("").List(context.Background(), metav1.ListOptions{LabelSelector: deploymentLabel}) if err != nil { @@ -504,6 +522,16 @@ func findDriver(c *Cluster) (*Deployment, error) { } deployment.Namespace = list.Items[0].Namespace + // Derive the version from the image tag. The annotation doesn't include it. + for _, container := range list.Items[0].Spec.Template.Spec.Containers { + m := imageVersion.FindStringSubmatch(container.Image) + if m != nil { + deployment.Version = m[1] + deployment.Name = deployment.Name + "-" + deployment.Version + break + } + } + // Currently we don't support parallel installations, so all // objects must belong to each other. for _, item := range list.Items { @@ -551,7 +579,7 @@ var allDeployments = []string{ "operator-lvm-production", "operator-direct-production", // Uses kube-system, to ensure that deployment in a namespace also works. } -var deploymentRE = regexp.MustCompile(`^(operator)?-?(\w*)?-?(testing|production)?$`) +var deploymentRE = regexp.MustCompile(`^(operator)?-?(\w*)?-?(testing|production)?-?([0-9\.]*)$`) // Parse the deployment name and sets fields accordingly. func Parse(deploymentName string) (*Deployment, error) { @@ -577,6 +605,7 @@ func Parse(deploymentName string) (*Deployment, error) { return nil, fmt.Errorf("deployment name %s: %v", deploymentName, err) } } + deployment.Version = matches[4] return deployment, nil } @@ -603,71 +632,11 @@ func EnsureDeployment(deploymentName string) *Deployment { ginkgo.CurrentGinkgoTestDescription().FullTestText, deployment.Namespace, )) - c, err := NewCluster(f.ClientSet, f.DynamicClient) // Remember list of volumes before test, using out-of-band host commands (i.e. not CSI API). prevVol = GetHostVolumes(deployment) - framework.ExpectNoError(err, "get cluster information") - running, err := FindDeployment(c) - framework.ExpectNoError(err, "check for PMEM-CSI components") - if running != nil { - if reflect.DeepEqual(deployment, running) { - framework.Logf("reusing existing %s PMEM-CSI components", deployment.Name) - // Do some sanity checks on the running deployment before the test. - if deployment.HasDriver { - WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) - CheckPMEMDriver(c, deployment) - } - if deployment.HasOperator { - WaitForOperator(c, deployment.Namespace) - } - return - } - framework.Logf("have %s PMEM-CSI deployment, want %s -> delete existing deployment", running.Name, deployment.Name) - err := RemoveObjects(c, running.Name) - framework.ExpectNoError(err, "remove PMEM-CSI deployment") - } - - if deployment.HasOperator { - // At the moment, the only supported deployment method is via test/start-operator.sh. - cmd := exec.Command("test/start-operator.sh") - cmd.Dir = os.Getenv("REPO_ROOT") - cmd.Env = append(os.Environ(), - "TEST_OPERATOR_NAMESPACE="+deployment.Namespace, - "TEST_OPERATOR_DEPLOYMENT="+deployment.Name) - cmd.Stdout = ginkgo.GinkgoWriter - cmd.Stderr = ginkgo.GinkgoWriter - err = cmd.Run() - framework.ExpectNoError(err, "create operator deployment: %q", deployment.Name) - - WaitForOperator(c, deployment.Namespace) - } - if deployment.HasDriver { - if deployment.HasOperator { - // Deploy driver through operator. - dep := deployment.GetDriverDeployment() - EnsureDeploymentCR(f, dep) - } else { - // Deploy with script. - cmd := exec.Command("test/setup-deployment.sh") - cmd.Dir = os.Getenv("REPO_ROOT") - cmd.Env = append(os.Environ(), - "TEST_DEPLOYMENT_QUIET=quiet", - "TEST_DEPLOYMENTMODE="+deployment.DeploymentMode(), - "TEST_DEVICEMODE="+string(deployment.Mode)) - cmd.Stdout = ginkgo.GinkgoWriter - cmd.Stderr = ginkgo.GinkgoWriter - err = cmd.Run() - framework.ExpectNoError(err, "create %s PMEM-CSI deployment", deployment.Name) - } - - // We check for a running driver the same way at the moment, by directly - // looking at the driver state. Long-term we want the operator to do that - // checking itself. - WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) - CheckPMEMDriver(c, deployment) - } + EnsureDeploymentNow(f, deployment) for _, h := range installHooks { h(deployment) @@ -695,6 +664,102 @@ func EnsureDeployment(deploymentName string) *Deployment { return deployment } +// EnsureDeploymentNow checks the currently running driver and replaces it if necessary. +func EnsureDeploymentNow(f *framework.Framework, deployment *Deployment) { + c, err := NewCluster(f.ClientSet, f.DynamicClient) + framework.ExpectNoError(err, "get cluster information") + running, err := FindDeployment(c) + framework.ExpectNoError(err, "check for PMEM-CSI components") + if running != nil { + if reflect.DeepEqual(deployment, running) { + framework.Logf("reusing existing %s PMEM-CSI components", deployment.Name) + // Do some sanity checks on the running deployment before the test. + if deployment.HasDriver { + WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) + CheckPMEMDriver(c, deployment) + } + if deployment.HasOperator { + WaitForOperator(c, deployment.Namespace) + } + return + } + framework.Logf("have %s PMEM-CSI deployment, want %s -> delete existing deployment", running.Name, deployment.Name) + err := RemoveObjects(c, running) + framework.ExpectNoError(err, "remove PMEM-CSI deployment") + } + + if deployment.HasOperator { + if deployment.Version != "" { + framework.Failf("installing PMEM-CSI %s via the operator is not supported", deployment.Version) + } + + // At the moment, the only supported deployment method is via test/start-operator.sh. + cmd := exec.Command("test/start-operator.sh") + cmd.Dir = os.Getenv("REPO_ROOT") + cmd.Env = append(os.Environ(), + "TEST_OPERATOR_NAMESPACE="+deployment.Namespace, + "TEST_OPERATOR_DEPLOYMENT="+deployment.Name) + cmd.Stdout = ginkgo.GinkgoWriter + cmd.Stderr = ginkgo.GinkgoWriter + err = cmd.Run() + framework.ExpectNoError(err, "create operator deployment: %q", deployment.Name) + + WaitForOperator(c, deployment.Namespace) + } + if deployment.HasDriver { + if deployment.HasOperator { + // Deploy driver through operator. + dep := deployment.GetDriverDeployment() + EnsureDeploymentCR(f, dep) + } else { + // Deploy with script. + root := os.Getenv("REPO_ROOT") + env := os.Environ() + if deployment.Version != "" { + // Clean check out in _work/pmem-csi-release-. + // Pulling from remote must be done before running the test. + workRoot := root + "/_work/pmem-csi-release-" + deployment.Version + err := os.RemoveAll(workRoot) + framework.ExpectNoError(err, "remove PMEM-CSI source code") + cmd := exec.Command("git", "clone", "--shared", "--branch=release-"+deployment.Version, root, workRoot) + cmd.Stdout = ginkgo.GinkgoWriter + cmd.Stderr = ginkgo.GinkgoWriter + err = cmd.Run() + framework.ExpectNoError(err, "check out release-%s of PMEM-CSI", deployment.Version) + root = workRoot + + // The release branch does not pull from Docker Hub by default, + // we have to select that explicitly. + env = append(env, "TEST_PMEM_REGISTRY=intel") + + // The setup script expects to have + // the same _work as in the normal + // root. + err = os.Symlink("../../_work", workRoot+"/_work") + framework.ExpectNoError(err, "symlink the _work directory") + } + cmd := exec.Command("test/setup-deployment.sh") + cmd.Dir = root + env = append(env, + "REPO_ROOT="+root, + "TEST_DEPLOYMENT_QUIET=quiet", + "TEST_DEPLOYMENTMODE="+deployment.DeploymentMode(), + "TEST_DEVICEMODE="+string(deployment.Mode)) + cmd.Env = env + cmd.Stdout = ginkgo.GinkgoWriter + cmd.Stderr = ginkgo.GinkgoWriter + err = cmd.Run() + framework.ExpectNoError(err, "create %s PMEM-CSI deployment", deployment.Name) + } + + // We check for a running driver the same way at the moment, by directly + // looking at the driver state. Long-term we want the operator to do that + // checking itself. + WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) + CheckPMEMDriver(c, deployment) + } +} + // GetDriverDeployment returns the spec for the driver deployment that is used // for deployments like operator-lvm-production. func (d *Deployment) GetDriverDeployment() api.Deployment { @@ -708,12 +773,12 @@ func (d *Deployment) GetDriverDeployment() api.Deployment { ObjectMeta: metav1.ObjectMeta{ Name: "pmem-csi", Labels: map[string]string{ - deploymentLabel: d.Name, + deploymentLabel: d.DeploymentLabel(), }, }, Spec: api.DeploymentSpec{ Labels: map[string]string{ - deploymentLabel: d.Name, + deploymentLabel: d.DeploymentLabel(), }, // TODO: replace pmemcsidriver.DeviceMode with api.DeviceMode everywhere // and remove this cast here. @@ -729,7 +794,7 @@ func (d *Deployment) GetDriverDeployment() api.Deployment { // DeleteAllPods deletes all currently running pods that belong to the deployment. func (d Deployment) DeleteAllPods(c *Cluster) error { listOptions := metav1.ListOptions{ - LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, d.Name), + LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, d.DeploymentLabel()), } pods, err := c.cs.CoreV1().Pods(d.Namespace).List(context.Background(), listOptions) if err != nil { diff --git a/test/e2e/driver/driver.go b/test/e2e/driver/driver.go index b6f8f8d1be..ea3d14f519 100644 --- a/test/e2e/driver/driver.go +++ b/test/e2e/driver/driver.go @@ -35,6 +35,21 @@ import ( . "github.com/onsi/gomega" ) +// DynamicDriver has the ability to return a modified copy of itself with additional options set. +type DynamicDriver interface { + testsuites.TestDriver + + // WithStorageClassNameSuffix sets a suffix which gets added + // to the name of all future storage classes that + // GetDynamicProvisionStorageClass creates. Can be used to + // create more than one class per test. + WithStorageClassNameSuffix(suffix string) DynamicDriver + + // WithParameters sets parameters that are used in future + // storage classes and CSI inline volumes. + WithParameters(parameters map[string]string) DynamicDriver +} + func New(name, csiDriverName string, fsTypes []string, scManifests map[string]string) testsuites.TestDriver { if fsTypes == nil { fsTypes = []string{"", "ext4", "xfs"} @@ -79,11 +94,14 @@ type manifestDriver struct { manifests []string scManifest map[string]string cleanup func() + scSuffix string + parameters map[string]string } var _ testsuites.TestDriver = &manifestDriver{} var _ testsuites.DynamicPVTestDriver = &manifestDriver{} var _ testsuites.EphemeralTestDriver = &manifestDriver{} +var _ DynamicDriver = &manifestDriver{} func (m *manifestDriver) GetDriverInfo() *testsuites.DriverInfo { return &m.driverInfo @@ -112,6 +130,17 @@ func (m *manifestDriver) GetDynamicProvisionStorageClass(config *testsuites.PerT sc, ok := items[0].(*storagev1.StorageClass) Expect(ok).To(BeTrue(), "storage class from %s", scManifest) sc.Provisioner = m.csiDriverName + sc.Name = config.Prefix + "-" + sc.Name + + // Add additional parameters, if any. + for name, value := range m.parameters { + if sc.Parameters == nil { + sc.Parameters = map[string]string{} + } + sc.Parameters[name] = value + } + sc.Name += m.scSuffix + return sc } @@ -164,9 +193,13 @@ func (m *manifestDriver) GetVolume(config *testsuites.PerTestConfig, volumeNumbe attributes := map[string]string{"size": m.driverInfo.SupportedSizeRange.Min} shared := false readOnly := false + // TODO (?): this trick with the driver name might no longer be necessary. if strings.HasSuffix(m.driverInfo.Name, "-kata") { attributes["kataContainers"] = "true" } + for name, value := range m.parameters { + attributes[name] = value + } return attributes, shared, readOnly } @@ -176,3 +209,15 @@ func (m *manifestDriver) GetCSIDriverName(config *testsuites.PerTestConfig) stri // We can't use m.driverInfo.Name as its not necessarily the real driver name return m.csiDriverName } + +func (m *manifestDriver) WithParameters(parameters map[string]string) DynamicDriver { + m2 := *m + m2.parameters = parameters + return &m2 +} + +func (m *manifestDriver) WithStorageClassNameSuffix(suffix string) DynamicDriver { + m2 := *m + m2.scSuffix = suffix + return &m2 +} diff --git a/test/e2e/driver/pattern.go b/test/e2e/driver/pattern.go new file mode 100644 index 0000000000..53d81d0d7a --- /dev/null +++ b/test/e2e/driver/pattern.go @@ -0,0 +1,68 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package driver + +import ( + "encoding/json" + "fmt" + "strings" + + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/test/e2e/storage/testpatterns" +) + +// StorageClassParameters can be used in combination with DynamicDriver to implement test patterns +// that encode additional parameters in the test pattern name. This is a workaround for the +// fixed content of the original test pattern struct. +type StorageClassParameters struct { + FSType string + Parameters map[string]string +} + +func (scp *StorageClassParameters) Encode() (string, error) { + data, err := json.Marshal(scp) + return string(data), err +} + +func (scp *StorageClassParameters) MustEncode() string { + data, err := scp.Encode() + if err != nil { + panic(err) + } + return data +} + +func (scp *StorageClassParameters) Decode(parameters string) error { + return json.Unmarshal([]byte(parameters), scp) +} + +func EncodeTestPatternName(volType testpatterns.TestVolType, volMode v1.PersistentVolumeMode, scp StorageClassParameters) string { + return fmt.Sprintf("%s %s %s", volType, volMode, scp.MustEncode()) +} + +func DecodeTestPatternName(name string) (volType testpatterns.TestVolType, volMode v1.PersistentVolumeMode, scp *StorageClassParameters, err error) { + parts := strings.SplitN(name, " ", 3) + if len(parts) != 3 { + err = fmt.Errorf("not of format ' {}': %s", name) + return + } + scp = &StorageClassParameters{} + volType = testpatterns.TestVolType(parts[0]) + volMode = v1.PersistentVolumeMode(parts[1]) + err = scp.Decode(parts[2]) + return +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 18d5188f38..bc55ea3dde 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -36,6 +36,7 @@ import ( _ "github.com/intel/pmem-csi/test/e2e/operator" _ "github.com/intel/pmem-csi/test/e2e/storage" _ "github.com/intel/pmem-csi/test/e2e/tls" + _ "github.com/intel/pmem-csi/test/e2e/versionskew" "github.com/intel/pmem-csi/test/e2e/deploy" ) diff --git a/test/e2e/storage/csi_volumes.go b/test/e2e/storage/csi_volumes.go index 6f3ccc77b7..695234f748 100644 --- a/test/e2e/storage/csi_volumes.go +++ b/test/e2e/storage/csi_volumes.go @@ -28,6 +28,7 @@ import ( "github.com/intel/pmem-csi/test/e2e/ephemeral" "github.com/intel/pmem-csi/test/e2e/storage/dax" "github.com/intel/pmem-csi/test/e2e/storage/scheduler" + "github.com/intel/pmem-csi/test/e2e/versionskew" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -61,6 +62,7 @@ var _ = deploy.DescribeForAll("E2E", func(d *deploy.Deployment) { testsuites.InitVolumesTestSuite, dax.InitDaxTestSuite, scheduler.InitSchedulerTestSuite, + versionskew.InitSkewTestSuite, } if ephemeral.Supported { diff --git a/test/e2e/storage/dax/dax.go b/test/e2e/storage/dax/dax.go index 4ee1aa3cb8..48100199f5 100644 --- a/test/e2e/storage/dax/dax.go +++ b/test/e2e/storage/dax/dax.go @@ -77,11 +77,14 @@ type local struct { config *testsuites.PerTestConfig testCleanup func() - resource *testsuites.VolumeResource - root string - daxCheckBinary string + resource *testsuites.VolumeResource + root string } +const ( + daxCheckBinary = "_work/pmem-dax-check" +) + func (p *daxTestSuite) DefineTests(driver testsuites.TestDriver, pattern testpatterns.TestPattern) { var l local @@ -92,8 +95,7 @@ func (p *daxTestSuite) DefineTests(driver testsuites.TestDriver, pattern testpat // Build pmem-dax-check helper binary. l.root = os.Getenv("REPO_ROOT") - l.daxCheckBinary = "_work/pmem-dax-check" - build := exec.Command("/bin/sh", "-c", os.Getenv("GO")+" build -o "+l.daxCheckBinary+" ./test/cmd/pmem-dax-check") + build := exec.Command("/bin/sh", "-c", os.Getenv("GO")+" build -o "+daxCheckBinary+" ./test/cmd/pmem-dax-check") build.Stdout = GinkgoWriter build.Stderr = GinkgoWriter build.Dir = l.root @@ -124,18 +126,36 @@ func (p *daxTestSuite) DefineTests(driver testsuites.TestDriver, pattern testpat init() defer cleanup() - l.testDaxInPod(f, l.resource.Pattern.VolMode, l.resource.VolSource, l.config, withKataContainers) + testDaxInPod(f, l.root, l.resource.Pattern.VolMode, l.resource.VolSource, l.config, withKataContainers) }) } -func (l local) testDaxInPod( +func testDaxInPod( f *framework.Framework, + root string, volumeMode v1.PersistentVolumeMode, source *v1.VolumeSource, config *testsuites.PerTestConfig, withKataContainers bool, ) { + pod := CreatePod(f, volumeMode, source, config, withKataContainers) + defer func() { + DeletePod(f, pod) + }() + checkWithNormalRuntime := testDax(f, pod, root, volumeMode, source, withKataContainers) + DeletePod(f, pod) + if checkWithNormalRuntime { + testDaxOutside(f, pod, root) + } +} +func CreatePod( + f *framework.Framework, + volumeMode v1.PersistentVolumeMode, + source *v1.VolumeSource, + config *testsuites.PerTestConfig, + withKataContainers bool, +) *v1.Pod { const ( volPath = "/vol1" volName = "vol1" @@ -252,23 +272,33 @@ func (l local) testDaxInPod( ns := f.Namespace.Name podClient := f.PodClientNS(ns) createdPod := podClient.Create(pod) - defer func() { - By("delete the pod") - podClient.DeleteSync(createdPod.Name, metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout) - }() podErr := e2epod.WaitForPodRunningInNamespace(f.ClientSet, createdPod) framework.ExpectNoError(podErr, "running pod") + + return createdPod +} + +func testDax( + f *framework.Framework, + pod *v1.Pod, + root string, + volumeMode v1.PersistentVolumeMode, + source *v1.VolumeSource, + withKataContainers bool, +) bool { + ns := f.Namespace.Name + containerName := pod.Spec.Containers[0].Name if volumeMode == v1.PersistentVolumeBlock { By("mounting raw block device") // TODO: remove the workaround above and script invocation here. - pmempod.RunInPod(f, l.root, nil, "/data/create-dax-dev.sh && mkfs.ext4 -b 4096 /dax-dev && mkdir -p /mnt && mount -odax /dax-dev /mnt", ns, pod.Name, containerName) + pmempod.RunInPod(f, root, nil, "/data/create-dax-dev.sh && mkfs.ext4 -b 4096 /dax-dev && mkdir -p /mnt && mount -odax /dax-dev /mnt", ns, pod.Name, containerName) } By("checking that missing DAX support is detected") - pmempod.RunInPod(f, l.root, []string{l.daxCheckBinary}, l.daxCheckBinary+" /no-dax; if [ $? -ne 1 ]; then echo should have reported missing DAX >&2; exit 1; fi", ns, pod.Name, containerName) + pmempod.RunInPod(f, root, []string{daxCheckBinary}, daxCheckBinary+" /no-dax; if [ $? -ne 1 ]; then echo should have reported missing DAX >&2; exit 1; fi", ns, pod.Name, containerName) By("checking volume for DAX support") - pmempod.RunInPod(f, l.root, []string{l.daxCheckBinary}, "lsblk; mount | grep /mnt; "+l.daxCheckBinary+" /mnt/daxtest", ns, pod.Name, containerName) + pmempod.RunInPod(f, root, []string{daxCheckBinary}, "lsblk; mount | grep /mnt; "+daxCheckBinary+" /mnt/daxtest", ns, pod.Name, containerName) // Data written in a container running under Kata Containers // should be visible also in a normal container, unless the @@ -277,27 +307,41 @@ func (l local) testDaxInPod( checkWithNormalRuntime := withKataContainers && source.CSI == nil if checkWithNormalRuntime { By("creating file for usage under normal pod") - pmempod.RunInPod(f, l.root, nil, "touch /mnt/hello-world", ns, pod.Name, containerName) + pmempod.RunInPod(f, root, nil, "touch /mnt/hello-world", ns, pod.Name, containerName) } + return checkWithNormalRuntime +} + +func DeletePod( + f *framework.Framework, + pod *v1.Pod, +) { By(fmt.Sprintf("Deleting pod %s", pod.Name)) err := e2epod.DeletePodWithWait(f.ClientSet, pod) framework.ExpectNoError(err, "while deleting pod") +} - if checkWithNormalRuntime { - // Check for data written earlier. - pod.Spec.RuntimeClassName = nil - pod.Name = "data-volume-test" - - By(fmt.Sprintf("Creating pod %s", pod.Name)) - createdPod = podClient.Create(pod) - podErr := e2epod.WaitForPodRunningInNamespace(f.ClientSet, createdPod) - framework.ExpectNoError(podErr, "running second pod") - By("checking for previously created file under normal pod") - pmempod.RunInPod(f, l.root, nil, "ls -l /mnt/hello-world", ns, pod.Name, containerName) - - By(fmt.Sprintf("Deleting pod %s", pod.Name)) - err := e2epod.DeletePodWithWait(f.ClientSet, pod) - framework.ExpectNoError(err, "while deleting pod") - } +func testDaxOutside( + f *framework.Framework, + pod *v1.Pod, + root string, +) { + // Check for data written earlier. + pod.Spec.RuntimeClassName = nil + pod.Name = "data-volume-test" + + By(fmt.Sprintf("Creating pod %s", pod.Name)) + ns := f.Namespace.Name + podClient := f.PodClientNS(ns) + pod = podClient.Create(pod) + podErr := e2epod.WaitForPodRunningInNamespace(f.ClientSet, pod) + framework.ExpectNoError(podErr, "running second pod") + By("checking for previously created file under normal pod") + containerName := pod.Spec.Containers[0].Name + pmempod.RunInPod(f, root, nil, "ls -l /mnt/hello-world", ns, pod.Name, containerName) + + By(fmt.Sprintf("Deleting pod %s", pod.Name)) + err := e2epod.DeletePodWithWait(f.ClientSet, pod) + framework.ExpectNoError(err, "while deleting pod") } diff --git a/test/e2e/versionskew/versionskew.go b/test/e2e/versionskew/versionskew.go new file mode 100644 index 0000000000..c02f94b7d9 --- /dev/null +++ b/test/e2e/versionskew/versionskew.go @@ -0,0 +1,293 @@ +/* +Copyright 2020 Intel Corporation. + +SPDX-License-Identifier: Apache-2.0 +*/ + +/* Version skew testing ensures that APIs and state is compatible +across up- and downgrades. The driver for older releases is installed +by checking out the deployment YAML files from an older release. + +The operator is not covered yet. +*/ +package versionskew + +import ( + "context" + "fmt" + + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e/storage/testpatterns" + "k8s.io/kubernetes/test/e2e/storage/testsuites" + + "github.com/intel/pmem-csi/pkg/pmem-csi-driver/parameters" + "github.com/intel/pmem-csi/test/e2e/deploy" + "github.com/intel/pmem-csi/test/e2e/driver" + "github.com/intel/pmem-csi/test/e2e/storage/dax" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + e2estatefulset "k8s.io/kubernetes/test/e2e/framework/statefulset" + e2evolume "k8s.io/kubernetes/test/e2e/framework/volume" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +const ( + base = "0.7" +) + +type skewTestSuite struct { + tsInfo testsuites.TestSuiteInfo +} + +var _ testsuites.TestSuite = &skewTestSuite{} + +var ( + // The version skew tests run with combinations of the + // following volume parameters. + fsTypes = []string{"", "ext4"} + volTypes = []testpatterns.TestVolType{testpatterns.CSIInlineVolume, testpatterns.DynamicPV} + volParameters = []map[string]string{ + nil, + { + string(parameters.CacheSize): "2", + string(parameters.PersistencyModel): string(parameters.PersistencyCache), + }, + } + volModes = []v1.PersistentVolumeMode{ + v1.PersistentVolumeFilesystem, + v1.PersistentVolumeBlock, + } +) + +// InitSkewTestSuite dynamically generates testcases for version skew testing. +// Each test case represents a certain kind of volume supported by PMEM-CSI. +func InitSkewTestSuite() testsuites.TestSuite { + suite := &skewTestSuite{ + tsInfo: testsuites.TestSuiteInfo{ + Name: "skew", + }, + } + + haveCSIInline := false + haveBlock := false + for _, volType := range volTypes { + for _, fs := range fsTypes { + for _, parameters := range volParameters { + scp := driver.StorageClassParameters{ + FSType: fs, + Parameters: parameters, + } + for _, volMode := range volModes { + pattern := testpatterns.TestPattern{ + Name: driver.EncodeTestPatternName(volType, volMode, scp), + VolType: volType, + VolMode: volMode, + FsType: fs, + } + if volType == testpatterns.CSIInlineVolume { + if haveCSIInline { + // Only generate a single test pattern for inline volumes + // because we don't want the number of testcases to explode. + continue + } + haveCSIInline = true + } + if volMode == v1.PersistentVolumeBlock { + if haveBlock { + // Same for raw block. + continue + } + haveBlock = true + } + suite.tsInfo.TestPatterns = append(suite.tsInfo.TestPatterns, pattern) + } + } + } + } + + return suite +} + +func (p *skewTestSuite) GetTestSuiteInfo() testsuites.TestSuiteInfo { + return p.tsInfo +} + +func (p *skewTestSuite) SkipRedundantSuite(driver testsuites.TestDriver, pattern testpatterns.TestPattern) { +} + +type local struct { + config *testsuites.PerTestConfig + testCleanup func() + + unused, usedBefore, usedAfter *testsuites.VolumeResource +} + +func (p *skewTestSuite) DefineTests(driver testsuites.TestDriver, pattern testpatterns.TestPattern) { + var l local + + f := framework.NewDefaultFramework("skew") + + init := func(all bool) { + l = local{} + l.config, l.testCleanup = driver.PrepareTest(f) + + // Now do the more expensive test initialization. We potentially create more than one + // storage class, so each resource needs a different prefix. + l.unused = createVolumeResource(driver, l.config, "-unused", pattern) + if all { + l.usedBefore = createVolumeResource(driver, l.config, "-before", pattern) + l.usedAfter = createVolumeResource(driver, l.config, "-after", pattern) + } + } + + cleanup := func() { + if l.unused != nil { + l.unused.CleanupResource() + l.unused = nil + } + + if l.usedBefore != nil { + l.usedBefore.CleanupResource() + l.usedBefore = nil + } + + if l.usedAfter != nil { + l.usedAfter.CleanupResource() + l.usedAfter = nil + } + + if l.testCleanup != nil { + l.testCleanup() + l.testCleanup = nil + } + } + + testVersionChange := func(otherDeploymentName string) { + withKataContainers := false + + // Create volumes. + init(true) + defer cleanup() + + // Use some volume before the up- or downgrade + podBefore := dax.CreatePod(f, l.usedBefore.Pattern.VolMode, l.usedBefore.VolSource, l.config, withKataContainers) + + // Change driver releases. + deployment, err := deploy.Parse(otherDeploymentName) + if err != nil { + framework.Failf("internal error while parsing %s: %v", otherDeploymentName, err) + } + deploy.EnsureDeploymentNow(f, deployment) + + // Use some other volume. + podAfter := dax.CreatePod(f, l.usedAfter.Pattern.VolMode, l.usedAfter.VolSource, l.config, withKataContainers) + + // Remove everything. + dax.DeletePod(f, podBefore) + dax.DeletePod(f, podAfter) + cleanup() + } + + // This changes controller and node versions at the same time. + It("everthing [Slow]", func() { + // First try the downgrade direction. We rely here on + // the driver being named after a deployment (see + // csi_volumes.go). + currentDeploymentName := driver.GetDriverInfo().Name + oldDeploymentName := currentDeploymentName + "-" + base + testVersionChange(oldDeploymentName) + + // Now that older driver is running, do the same for + // an upgrade. When the test is done, the cluster is + // back in the same state as before. + testVersionChange(currentDeploymentName) + }) + + // This test combines controller and node from different releases + // and checks that they can work together. This can happen when + // the operator mutates the deployment objects and the change isn't + // applied everywhere at once. + // + // We change the controller because that side is easier to modify + // (scale down, change spec, scale up) and test only one direction + // (old nodes, new controller) because that direction is more likely + // and if there compatibility issues, then hopefully the direction + // of the skew won't matter. + It("controller", func() { + withKataContainers := false + c, err := deploy.NewCluster(f.ClientSet, f.DynamicClient) + + // Get the current controller image. + // + // The test has to make some assumptions about our deployments, + // like "controller is in a statefulset" and what its name is. + // The test also relies on command line parameters staying + // compatible. If we ever change that, we need to add some extra + // logic here. + controllerSet, err := f.ClientSet.AppsV1().StatefulSets("default").Get(context.Background(), "pmem-csi-controller", metav1.GetOptions{}) + framework.ExpectNoError(err, "get controller") + currentImage := controllerSet.Spec.Template.Spec.Containers[0].Image + Expect(currentImage).To(ContainSubstring("pmem-csi")) + + // Now downgrade. + currentDeploymentName := driver.GetDriverInfo().Name + otherDeploymentName := currentDeploymentName + "-" + base + deployment, err := deploy.Parse(otherDeploymentName) + if err != nil { + framework.Failf("internal error while parsing %s: %v", otherDeploymentName, err) + } + deploy.EnsureDeploymentNow(f, deployment) + deployment, err = deploy.FindDeployment(c) + framework.ExpectNoError(err, "find downgraded deployment") + Expect(deployment.Version).NotTo(BeEmpty(), "should be running an old release") + + // Update the controller image. + setImage := func(newImage string) string { + By(fmt.Sprintf("changing controller image to %s", newImage)) + controllerSet, err := f.ClientSet.AppsV1().StatefulSets("default").Get(context.Background(), "pmem-csi-controller", metav1.GetOptions{}) + framework.ExpectNoError(err, "get controller") + oldImage := controllerSet.Spec.Template.Spec.Containers[0].Image + controllerSet.Spec.Template.Spec.Containers[0].Image = newImage + controllerSet, err = f.ClientSet.AppsV1().StatefulSets("default").Update(context.Background(), controllerSet, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "update controller") + + // Ensure that the stateful set runs the modified image. + e2estatefulset.Restart(f.ClientSet, controllerSet) + + return oldImage + } + oldImage := setImage(currentImage) + // Strictly speaking, we could also leave a broken deployment behind because the next + // test will want to start with a deployment of the current release and thus will + // reinstall anyway, but it is cleaner this way. + defer setImage(oldImage) + + // check that PMEM-CSI is up again. + framework.ExpectNoError(err, "get cluster information") + deploy.WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) + + // This relies on FindDeployment getting the version number from the image. + deployment, err = deploy.FindDeployment(c) + framework.ExpectNoError(err, "find modified deployment") + Expect(deployment.Version).To(BeEmpty(), "should be running a current release") // TODO: what about testing 0.8? + + // Now that we are in a version skewed state, try some simple interaction between + // controller and node by creating a volume and using it. This makes sense + // even for CSI inline volumes because those may invoke the scheduler extensions. + init(false) + defer cleanup() + pod := dax.CreatePod(f, l.unused.Pattern.VolMode, l.unused.VolSource, l.config, withKataContainers) + dax.DeletePod(f, pod) + }) +} + +// createVolumeResource takes one of the test patterns prepared by InitSkewTestSuite and +// creates a volume for it. +func createVolumeResource(pmemDriver testsuites.TestDriver, config *testsuites.PerTestConfig, suffix string, pattern testpatterns.TestPattern) *testsuites.VolumeResource { + _, _, scp, err := driver.DecodeTestPatternName(pattern.Name) + Expect(err).NotTo(HaveOccurred(), "decode test pattern name") + pmemDriver = pmemDriver.(driver.DynamicDriver).WithStorageClassNameSuffix(suffix).WithParameters(scp.Parameters) + return testsuites.CreateVolumeResource(pmemDriver, config, pattern, e2evolume.SizeRange{}) +}