diff --git a/deploy/kustomize/operator/operator.yaml b/deploy/kustomize/operator/operator.yaml index 323fb13e88..435fce085b 100644 --- a/deploy/kustomize/operator/operator.yaml +++ b/deploy/kustomize/operator/operator.yaml @@ -148,7 +148,8 @@ spec: fieldRef: fieldPath: metadata.name - name: OPERATOR_NAME - # This should match to the operator container name + # This must match to the operator container name. + # The operator uses it to find its image. value: "pmem-csi-operator" volumeMounts: - name: tmp diff --git a/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go b/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go index 702f09e4c0..f87b5cc8f8 100644 --- a/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go +++ b/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go @@ -185,11 +185,12 @@ func TestDeploymentController(t *testing.T) { testIt := func(t *testing.T, testK8sVersion version.Version) { type testContext struct { - c client.Client - cs kubernetes.Interface - rc reconcile.Reconciler - evWatcher watch.Interface - events []*corev1.Event + c client.Client + cs kubernetes.Interface + rc reconcile.Reconciler + evWatcher watch.Interface + events []*corev1.Event + resourceVersions map[string]string } const ( testNamespace = "test-namespace" @@ -209,9 +210,11 @@ func TestDeploymentController(t *testing.T) { } setup := func(t *testing.T) *testContext { - tc := &testContext{} - tc.c = fake.NewFakeClient() - tc.cs = cgfake.NewSimpleClientset() + tc := &testContext{ + c: fake.NewFakeClient(), + cs: cgfake.NewSimpleClientset(), + resourceVersions: map[string]string{}, + } tc.rc = newReconcileDeployment(tc.c, tc.cs) tc.evWatcher = tc.rc.(*deployment.ReconcileDeployment).EventBroadcaster().StartEventWatcher(func(ev *corev1.Event) { // Discard consecutive duplicate events, mimicking the EventAggregator behavior @@ -245,14 +248,20 @@ func TestDeploymentController(t *testing.T) { require.ElementsMatch(t, events, expectedEvents, "events must match") } - validateDriver := func(t *testing.T, tc *testContext, dep *api.Deployment, expectedEvents []string) { + validateDriver := func(t *testing.T, tc *testContext, dep *api.Deployment, expectedEvents []string, wasUpdated bool) { // We may have to fill in some defaults, so make a copy first. dep = dep.DeepCopyObject().(*api.Deployment) if dep.Spec.Image == "" { dep.Spec.Image = testDriverImage } - require.NoError(t, validate.DriverDeployment(tc.c, testK8sVersion, testNamespace, *dep), "validate deployment") + // If the CR was not updated, then objects should still be the same as they were initially. + rv := tc.resourceVersions + if wasUpdated { + rv = nil + } + _, err := validate.DriverDeployment(tc.c, testK8sVersion, testNamespace, *dep, rv) + require.NoError(t, err, "validate deployment") validateEvents(t, tc, dep, expectedEvents) } @@ -271,7 +280,7 @@ func TestDeploymentController(t *testing.T) { require.NoError(t, err, "failed to create deployment") testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("deployment with explicit values", func(t *testing.T) { @@ -297,7 +306,7 @@ func TestDeploymentController(t *testing.T) { // Reconcile now should change Phase to running testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("multiple deployments", func(t *testing.T) { @@ -320,9 +329,9 @@ func TestDeploymentController(t *testing.T) { require.NoError(t, err, "failed to create deployment2") testReconcilePhase(t, tc.rc, tc.c, d1.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep1, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep1, []string{api.EventReasonNew, api.EventReasonRunning}, false) testReconcilePhase(t, tc.rc, tc.c, d2.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep2, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep2, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("invalid device mode", func(t *testing.T) { @@ -354,7 +363,7 @@ func TestDeploymentController(t *testing.T) { err := tc.c.Create(context.TODO(), dep) require.NoError(t, err, "failed to create deployment") testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("direct mode", func(t *testing.T) { @@ -369,7 +378,7 @@ func TestDeploymentController(t *testing.T) { err := tc.c.Create(context.TODO(), dep) require.NoError(t, err, "failed to create deployment") testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("provided private keys", func(t *testing.T) { @@ -391,7 +400,7 @@ func TestDeploymentController(t *testing.T) { // First deployment expected to be successful testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("provided private keys and certificates", func(t *testing.T) { @@ -424,7 +433,7 @@ func TestDeploymentController(t *testing.T) { // First deployment expected to be successful testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) }) t.Run("invalid private keys and certificates", func(t *testing.T) { @@ -517,13 +526,13 @@ func TestDeploymentController(t *testing.T) { tc.rc.(*deployment.ReconcileDeployment).AddHook(&hook) testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) tc.rc.(*deployment.ReconcileDeployment).RemoveHook(&hook) // Next reconcile phase should catch the deployment changes testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, updatedDep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, updatedDep, []string{api.EventReasonNew, api.EventReasonRunning}, true) }) t.Run("updating", func(t *testing.T) { @@ -543,10 +552,11 @@ func TestDeploymentController(t *testing.T) { require.NoError(t, err, "create deployment") testReconcilePhase(t, tc.rc, tc.c, dep.Name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) // Reconcile now should keep phase as running. testReconcilePhase(t, tc.rc, tc.c, dep.Name, false, false, api.DeploymentPhaseRunning) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) validateEvents(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) // Retrieve existing object before updating it. @@ -567,7 +577,7 @@ func TestDeploymentController(t *testing.T) { testReconcilePhase(t, tc.rc, tc.c, dep.Name, false, false, api.DeploymentPhaseRunning) // Recheck the container resources are updated - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, true) } t.Run("while running", func(t *testing.T) { @@ -593,7 +603,7 @@ func TestDeploymentController(t *testing.T) { err := tc.c.Create(context.TODO(), dep) require.NoError(t, err, "failed to create deployment") testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) err = tc.c.Get(context.TODO(), client.ObjectKey{Name: d.name}, dep) require.NoError(t, err, "get deployment") @@ -629,17 +639,6 @@ func TestDeploymentController(t *testing.T) { err = tc.c.Create(context.TODO(), cm2) require.NoError(t, err, "create configmap: %s", cm2.Name) - defer func() { - err := tc.c.Delete(context.TODO(), cm1) - if err != nil && !errors.IsNotFound(err) { - require.NoErrorf(t, err, "delete configmap: %s", cm1.Name) - } - err = tc.c.Delete(context.TODO(), cm2) - if err != nil && !errors.IsNotFound(err) { - require.NoErrorf(t, err, "delete configmap: %s", cm2.Name) - } - }() - // Use a fresh reconciler to mimic operator restart tc.rc = newReconcileDeployment(tc.c, tc.cs) @@ -647,7 +646,9 @@ func TestDeploymentController(t *testing.T) { testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) err = tc.c.Get(context.TODO(), client.ObjectKey{Name: d.name}, dep) require.NoError(t, err, "get deployment") - validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}) + // It is debatable whether the operator should update all objects after + // a restart. Currently it does. + validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, true) cm := &corev1.ConfigMap{} err = tc.c.Get(context.TODO(), client.ObjectKey{Name: cm1.Name, Namespace: testNamespace}, cm) diff --git a/test/e2e/operator/deployment_api.go b/test/e2e/operator/deployment_api.go index 8c0d55fd92..a916055d55 100644 --- a/test/e2e/operator/deployment_api.go +++ b/test/e2e/operator/deployment_api.go @@ -95,7 +95,15 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { if what == nil { what = []interface{}{"validate driver"} } - framework.ExpectNoErrorWithOffset(1, validate.DriverDeploymentEventually(ctx, client, k8sver, d.Namespace, deployment), what...) + + // We cannot check for unexpected object modifications + // by the operator during E2E testing because the app + // controllers themselves will also modify the same + // objects with status changes. We can only test + // that during unit testing. + initialCreation := false + + framework.ExpectNoErrorWithOffset(1, validate.DriverDeploymentEventually(ctx, client, k8sver, d.Namespace, deployment, initialCreation), what...) framework.Logf("got expected driver deployment %s", deployment.Name) } @@ -197,7 +205,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { deployment2 = deploy.CreateDeploymentCR(f, deployment2) defer deploy.DeleteDeploymentCR(f, deployment2.Name) - validateDriver(deployment1 /* TODO 2 */, "validate driver #2") + validateDriver(deployment1, true /* TODO 2 */, "validate driver #2") }) It("shall support dots in the name", func() { @@ -214,7 +222,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { deployment = deploy.CreateDeploymentCR(f, deployment) defer deploy.DeleteDeploymentCR(f, deployment.Name) - validateDriver(deployment) + validateDriver(deployment, true) }) It("driver deployment shall be running even after operator exit", func() { @@ -223,7 +231,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { deployment = deploy.CreateDeploymentCR(f, deployment) defer deploy.DeleteDeploymentCR(f, deployment.Name) - validateDriver(deployment) + validateDriver(deployment, true) // Stop the operator stopOperator(c, d) @@ -231,8 +239,13 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { defer startOperator(c, d) // Ensure that the driver is running consistently + resourceVersions := map[string]string{} Consistently(func() error { - return validate.DriverDeployment(client, k8sver, d.Namespace, deployment) + final, err := validate.DriverDeployment(client, k8sver, d.Namespace, deployment, resourceVersions) + if final { + framework.Failf("final error during driver validation after restarting: %v", err) + } + return err }, "1m", "20s").ShouldNot(HaveOccurred(), "driver validation failure after restarting") }) @@ -283,7 +296,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { // Deleting the existing secret should make the deployment succeed. deleteSecret(sec.Name) - validateDriver(deployment) + validateDriver(deployment, true) }) }) @@ -378,7 +391,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { deployment = deploy.CreateDeploymentCR(f, deployment) defer deploy.DeleteDeploymentCR(f, deployment.Name) deploy.WaitForPMEMDriver(c, deployment.Name, corev1.NamespaceDefault, false /* testing */) - validateDriver(deployment) + validateDriver(deployment, true) // NOTE(avalluri): As the current operator does not support deploying // the driver in 'testing' mode, we cannot directely access CSI @@ -434,8 +447,10 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { validateDriver(deployment, "validate driver before update") // We have to get a fresh copy before updating it because the - // operator should have modified the status. - deployment = deploy.GetDeploymentCR(f, deployment.Name) + // operator should have modified the status, and only the status. + modifiedDeployment := deploy.GetDeploymentCR(f, deployment.Name) + Expect(modifiedDeployment.Spec).To(Equal(deployment.Spec), "spec unmodified") + Expect(modifiedDeployment.Status.Phase).To(Equal(api.DeploymentPhaseRunning), "deployment phase") restored := false if restart { @@ -447,15 +462,15 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { }() } - testcase.Mutate(&deployment) - deployment = deploy.UpdateDeploymentCR(f, deployment) + testcase.Mutate(&modifiedDeployment) + deployment = deploy.UpdateDeploymentCR(f, modifiedDeployment) if restart { startOperator(c, d) restored = true } - validateDriver(deployment, "validate driver after update and restart") + validateDriver(modifiedDeployment, "validate driver after update and restart") } It("while running", func() { diff --git a/test/e2e/operator/driver.go b/test/e2e/operator/driver.go index 1d6077c9cb..76399bfc4e 100644 --- a/test/e2e/operator/driver.go +++ b/test/e2e/operator/driver.go @@ -49,7 +49,7 @@ var _ = deploy.DescribeForSome("driver", func(d *deploy.Deployment) bool { deployment := d.GetDriverDeployment() deployment = deploy.GetDeploymentCR(f, deployment.Name) - framework.ExpectNoError(validate.DriverDeploymentEventually(ctx, client, *k8sver, d.Namespace, deployment), "validate driver") + framework.ExpectNoError(validate.DriverDeploymentEventually(ctx, client, *k8sver, d.Namespace, deployment, true), "validate driver") }) // Just very minimal testing at the moment. diff --git a/test/e2e/operator/validate/validate.go b/test/e2e/operator/validate/validate.go index 4442fce6c2..9b929fe575 100644 --- a/test/e2e/operator/validate/validate.go +++ b/test/e2e/operator/validate/validate.go @@ -36,7 +36,7 @@ import ( // objects for a certain deployment spec. deploymentSpec should only have those fields // set which are not the defaults. This call will wait for the expected objects until // the context times out. -func DriverDeploymentEventually(ctx context.Context, client client.Client, k8sver version.Version, namespace string, deployment api.Deployment) error { +func DriverDeploymentEventually(ctx context.Context, client client.Client, k8sver version.Version, namespace string, deployment api.Deployment, initialCreation bool) error { ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() @@ -44,37 +44,71 @@ func DriverDeploymentEventually(ctx context.Context, client client.Client, k8sve return errors.New("deployment not an object that was stored in the API server, no UID") } + // Track resource versions to detect modifications? + var resourceVersions map[string]string + if initialCreation { + resourceVersions = map[string]string{} + } + // TODO: once there is a better way to detect that the // operator has finished updating the deployment, check for // that here instead of repeatedly checking the objects. // As it stands now, permanent differences will only be // reported when the test times out. - ready := func() (err error) { - return DriverDeployment(client, k8sver, namespace, deployment) + ready := func() (final bool, err error) { + return DriverDeployment(client, k8sver, namespace, deployment, resourceVersions) } - if err := ready(); err != nil { + if final, err := ready(); err != nil { + if final { + return err + } + loop: for { select { case <-ticker.C: - err = ready() + final, err = ready() if err == nil { - return nil + break loop + } + if final { + return err } case <-ctx.Done(): return fmt.Errorf("timed out waiting for deployment, last error: %v", err) } } } - return nil + + // Now wait a bit longer to see whether the objects change again - they shouldn't. + // The longer we wait, the more certainty we have that we have reached a stable state. + deadline, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + for { + select { + case <-ticker.C: + if _, err := ready(); err != nil { + return fmt.Errorf("objects were changed after reaching expected state: %v", err) + } + case <-deadline.Done(): + return nil + } + } } // DriverDeployment compares all objects as deployed by the operator against the expected // objects for a certain deployment spec. deploymentSpec should only have those fields // set which are not the defaults. The caller must ensure that the operator is done // with creating objects. -func DriverDeployment(client client.Client, k8sver version.Version, namespace string, deployment api.Deployment) error { +// +// resourceVersions is used to track which resource versions were encountered +// for generated objects. If not nil, the version must not change (i.e. the operator +// must not update the objects after creating them). +// +// A final error is returned when observing a problem that is not going to go away, +// like an unexpected update of an object. +func DriverDeployment(client client.Client, k8sver version.Version, namespace string, deployment api.Deployment, resourceVersions map[string]string) (final bool, finalErr error) { if deployment.GetUID() == "" { - return errors.New("deployment not an object that was stored in the API server, no UID") + return true, errors.New("deployment not an object that was stored in the API server, no UID") } // The operator currently always uses the production image. We @@ -85,16 +119,32 @@ func DriverDeployment(client client.Client, k8sver version.Version, namespace st // Validate sub-objects. A sub-object is anything that has the deployment object as owner. objects, err := listAllDeployedObjects(client, deployment) if err != nil { - return err + return false, err } expectedObjects, err := deployments.LoadAndCustomizeObjects(k8sver, deployment.Spec.DeviceMode, namespace, deployment) if err != nil { - return fmt.Errorf("customize expected objects: %v", err) + return true, fmt.Errorf("customize expected objects: %v", err) } var diffs []string for _, actual := range objects { + // When the CR just got created, the operator should + // immediately create objects with the right content and then + // not update them again. + if resourceVersions != nil { + uid := string(actual.GetUID()) + currentResourceVersion := actual.GetResourceVersion() + oldResourceVersion, ok := resourceVersions[uid] + if ok { + if oldResourceVersion != currentResourceVersion { + diffs = append(diffs, fmt.Sprintf("object was modified unnecessarily: %s", prettyPrintObjectID(actual))) + final = true + } + } else { + resourceVersions[uid] = currentResourceVersion + } + } expected := findObject(expectedObjects, actual) if expected == nil { if actual.GetKind() == "Secret" { @@ -164,10 +214,9 @@ func DriverDeployment(client client.Client, k8sver version.Version, namespace st } } if diffs != nil { - return fmt.Errorf("deployed driver different from expected deployment:\n%s", strings.Join(diffs, "\n")) + finalErr = fmt.Errorf("deployed driver different from expected deployment:\n%s", strings.Join(diffs, "\n")) } - - return nil + return } func createObject(gvk schema.GroupVersionKind, name, namespace string) unstructured.Unstructured {