Skip to content

Commit

Permalink
pmem-csi-operator: delete obsolete objects of a deployment
Browse files Browse the repository at this point in the history
Different operator releases might result in obsolete sub-resources that
were created for a deployment. Operator shall detect these obsolete
objects if any and delete them in the reconcile loop.

There is no direct API to detect all the objects owned by an object.
So, we keep hard-coded list of all types that operator could deal and we
use this list to query and fetch the pre-deployed objects and check if
that object is owned by the deployment.

For tesing this pperator creates an additional dummy ConfigMap object
for pre-known test version. The test ensures that the ConfigMap gets
deleted upon change in the operator version.

FIXES intel#595
avalluri committed Sep 11, 2020

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 55476f3 commit a654839
Showing 11 changed files with 652 additions and 44 deletions.
6 changes: 6 additions & 0 deletions deploy/kustomize/operator/operator.yaml
Original file line number Diff line number Diff line change
@@ -4,6 +4,12 @@ metadata:
name: pmem-csi-operator
namespace: default
---
#
# These RBAC rules must be kept in sync with the
# AllObjectTypes list defined in
# pkg/pmem-csi-operator/controller/deployment/controller_driver.go.
# So that operator could list/get/delete the resources
# that were obsolete.
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
267 changes: 267 additions & 0 deletions deploy/olm-catalog/0.7.0/pmem-csi-operator.clusterserviceversion.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v1
kind: ServiceAccount
metadata:
creationTimestamp: null
name: pmem-csi-operator
namespace: default
154 changes: 154 additions & 0 deletions deploy/olm-catalog/0.7.0/pmem-csi.intel.com_deployments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.3.0
creationTimestamp: null
name: deployments.pmem-csi.intel.com
spec:
group: pmem-csi.intel.com
names:
kind: Deployment
listKind: DeploymentList
plural: deployments
singular: deployment
scope: Cluster
subresources:
status: {}
validation:
openAPIV3Schema:
description: Deployment is the Schema for the deployments API
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: DeploymentSpec defines the desired state of Deployment
properties:
caCert:
description: CACert encoded root certificate of the CA by which the registry and node controller certificates are signed If not provided operator uses a self-signed CA certificate
format: byte
type: string
controllerResources:
description: ControllerResources Compute resources required by Controller driver
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
type: object
deviceMode:
description: DeviceMode to use to manage PMEM devices. One of lvm, direct
type: string
image:
description: PMEM-CSI driver container image
type: string
imagePullPolicy:
description: PullPolicy image pull policy one of Always, Never, IfNotPresent
type: string
kubeletDir:
description: KubeletDir kubelet's root directory path
type: string
labels:
additionalProperties:
type: string
description: Labels contains additional labels for all objects created by the operator.
type: object
logLevel:
description: LogLevel number for the log verbosity kubebuilder:default=3
type: integer
nodeControllerCert:
description: NodeControllerCert encoded certificate signed by a CA for node controller server authentication If not provided, provisioned one by the operator using self-signed CA
format: byte
type: string
nodeControllerKey:
description: NodeControllerPrivateKey encoded private key used for node controller server certificate If not provided, provisioned one by the operator
format: byte
type: string
nodeRegistrarImage:
description: NodeRegistrarImage CSI node driver registrar sidecar image
type: string
nodeResources:
description: NodeResources Compute resources required by Node driver
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
type: object
nodeSelector:
additionalProperties:
type: string
description: NodeSelector node labels to use for selection of driver node
type: object
pmemPercentage:
description: PMEMPercentage represents the percentage of space to be used by the driver in each PMEM region on every node. This is only valid for driver in LVM mode. -kubebuilder:validation:Minimum=1 -kubebuilder:validation:Maximum=100 -kubebuilder:default=100
type: integer
provisionerImage:
description: ProvisionerImage CSI provisioner sidecar image
type: string
registryCert:
description: RegistryCert encoded certificate signed by a CA for registry server authentication If not provided, provisioned one by the operator using self-signed CA
format: byte
type: string
registryKey:
description: RegistryPrivateKey encoded private key used for registry server certificate If not provided, provisioned one by the operator
format: byte
type: string
type: object
status:
description: DeploymentStatus defines the observed state of Deployment
properties:
lastUpdated:
description: LastUpdated time of the deployment status
format: date-time
type: string
phase:
description: Phase indicates the state of the deployment
type: string
type: object
type: object
version: v1alpha1
versions:
- name: v1alpha1
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []
5 changes: 5 additions & 0 deletions deploy/olm-catalog/pmem-csi-operator.package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- currentCSV: pmem-csi-operator.v0.7.0
name: alpha
defaultChannel: alpha
packageName: pmem-csi-operator
11 changes: 11 additions & 0 deletions deploy/scorecard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
scorecard:
output: json
olmdeployed: true
plugins:
- basic:
cr-manifest:
- "deploy/common/pmem-csi.intel.com_v1alpha1_deployment_cr.yaml"
- olm:
cr-manifest:
- "deploy/common/pmem-csi.intel.com_v1alpha1_deployment_cr.yaml"
csv-path: "deploy/olm-catalog/0.7.0/pmem-csi.operator.clusterserviceversion.yaml"
15 changes: 15 additions & 0 deletions pkg/apis/pmemcsi/v1alpha1/deployment_types.go
Original file line number Diff line number Diff line change
@@ -391,6 +391,21 @@ func (d *Deployment) GetHyphenedName() string {
return strings.ReplaceAll(d.GetName(), ".", "-")
}

// GetOwnerReference returns self owner reference could be used by other object
// to add this deployment to it's owner reference list.
func (d *Deployment) GetOwnerReference() metav1.OwnerReference {
blockOwnerDeletion := true
isController := true
return metav1.OwnerReference{
APIVersion: d.APIVersion,
Kind: d.Kind,
Name: d.GetName(),
UID: d.GetUID(),
BlockOwnerDeletion: &blockOwnerDeletion,
Controller: &isController,
}
}

func GetDeploymentCRDSchema() *apiextensions.JSONSchemaProps {
One := float64(1)
Hundred := float64(100)
116 changes: 96 additions & 20 deletions pkg/pmem-csi-operator/controller/deployment/controller_driver.go
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ SPDX-License-Identifier: Apache-2.0
package deployment

import (
"context"
"crypto/rsa"
"crypto/tls"
"fmt"
@@ -25,10 +26,15 @@ import (
rbacv1 "k8s.io/api/rbac/v1"
storagev1beta1 "k8s.io/api/storage/v1beta1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
apiruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog"
"k8s.io/kubectl/pkg/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
@@ -37,6 +43,25 @@ const (
nodeControllerPort = 10001
)

// A list of all object types potentially created by the operator,
// in this or any previous release. In other words, this list may grow,
// but never shrink, because a newer release needs to delete objects
// created by an older release.
// This list also must be kept in sync with the operator RBAC rules.
var AllObjectTypes = []schema.GroupVersionKind{
rbacv1.SchemeGroupVersion.WithKind("RoleList"),
rbacv1.SchemeGroupVersion.WithKind("ClusterRoleList"),
rbacv1.SchemeGroupVersion.WithKind("RoleBindingList"),
rbacv1.SchemeGroupVersion.WithKind("ClusterRoleBindingList"),
corev1.SchemeGroupVersion.WithKind("ServiceAccountList"),
corev1.SchemeGroupVersion.WithKind("SecretList"),
corev1.SchemeGroupVersion.WithKind("ServiceList"),
corev1.SchemeGroupVersion.WithKind("ConfigMapList"),
appsv1.SchemeGroupVersion.WithKind("DaemonSetList"),
appsv1.SchemeGroupVersion.WithKind("StatefulSetList"),
storagev1beta1.SchemeGroupVersion.WithKind("CSIDriverList"),
}

type PmemCSIDriver struct {
*api.Deployment
// operators namespace used for creating sub-resources
@@ -137,6 +162,15 @@ func (d *PmemCSIDriver) reconcileDeploymentChanges(r *ReconcileDeployment, chang
return true, err
}
objects = append(objects, objs...)

// If not found cache might be result of operator restart
// check if this deployment has any objects to be deleted
if !foundInCache {
if err := d.deleteObsoleteObjects(r, objs); err != nil {
klog.Infof("Failed to delete obsolete objects: %v", err)
return true, err
}
}
} else {
if updateSecrets {
objs, err := d.getSecrets()
@@ -203,15 +237,70 @@ func (d *PmemCSIDriver) reconcileDeploymentChanges(r *ReconcileDeployment, chang
return false, nil
}

func (d *PmemCSIDriver) deployObjects(r *ReconcileDeployment) error {
objects, err := d.getDeploymentObjects()
if err != nil {
return err
func objectIsObsolete(objList []apiruntime.Object, toFind unstructured.Unstructured) (bool, error) {
for i := range objList {
metaObj, err := meta.Accessor(objList[i])
if err != nil {
return false, err
}
if metaObj.GetName() == toFind.GetName() &&
objList[i].GetObjectKind().GroupVersionKind() == toFind.GetObjectKind().GroupVersionKind() {
return false, nil
}
}
for _, obj := range objects {
if err := r.Create(obj); err != nil {

return true, nil
}

func (d *PmemCSIDriver) isOwnerOf(obj unstructured.Unstructured) bool {
for _, owner := range obj.GetOwnerReferences() {
if owner.UID == d.GetUID() {
return true
}
}

return false
}

func (d *PmemCSIDriver) deleteObsoleteObjects(r *ReconcileDeployment, newObjects []apiruntime.Object) error {
for _, gvk := range AllObjectTypes {
list := &unstructured.UnstructuredList{}
list.SetGroupVersionKind(gvk)
opts := &client.ListOptions{
Namespace: d.namespace,
}

klog.Infof("Fetching '%s' list with options: %v", gvk, opts.Namespace)
if err := r.client.List(context.TODO(), list, opts); err != nil {
return err
}

for _, obj := range list.Items {
if !d.isOwnerOf(obj) {
continue
}
obsolete, err := objectIsObsolete(newObjects, obj)
if err != nil {
return err
}
if !obsolete {
continue
}
o, err := scheme.Scheme.New(obj.GetObjectKind().GroupVersionKind())
if err != nil {
return err
}
metaObj, err := meta.Accessor(o)
if err != nil {
return err
}
metaObj.SetName(obj.GetName())
metaObj.SetNamespace(obj.GetNamespace())

if err := r.Delete(o); err != nil && !errors.IsNotFound(err) {
return err
}
}
}
return nil
}
@@ -354,19 +443,6 @@ func validateCertificates(caCert, regKey, regCert, ncKey, ncCert []byte) error {
return nil
}

func (d *PmemCSIDriver) getOwnerReference() metav1.OwnerReference {
blockOwnerDeletion := true
isController := true
return metav1.OwnerReference{
APIVersion: d.APIVersion,
Kind: d.Kind,
Name: d.GetName(),
UID: d.GetUID(),
BlockOwnerDeletion: &blockOwnerDeletion,
Controller: &isController,
}
}

func (d *PmemCSIDriver) getCSIDriver() *storagev1beta1.CSIDriver {
attachRequired := false
podInfoOnMount := true
@@ -1038,7 +1114,7 @@ func (d *PmemCSIDriver) getObjectMeta(name string, isClusterResource bool) metav
meta := metav1.ObjectMeta{
Name: name,
OwnerReferences: []metav1.OwnerReference{
d.getOwnerReference(),
d.GetOwnerReference(),
},
Labels: d.Spec.Labels,
}
Original file line number Diff line number Diff line change
@@ -159,16 +159,21 @@ func (r *ReconcileDeployment) Reconcile(request reconcile.Request) (reconcile.Re
return reconcile.Result{Requeue: false}, nil
}

patch := client.MergeFrom(deployment.DeepCopy())
d := &PmemCSIDriver{deployment, r.namespace, r.k8sVersion}

// update status
// update status and deployment
defer func() {
klog.Infof("Updating deployment status....")
d.Deployment.Status.LastUpdated = metav1.Now()
if statusErr := r.client.Status().Update(context.TODO(), d.Deployment); statusErr != nil {
klog.Warningf("failed to update status %q for deployment %q: %v",
d.Deployment.Status.Phase, d.Name, statusErr)
}

if err := r.client.Patch(context.TODO(), d.Deployment, patch); err != nil {
klog.Warningf("Failed update deployment %q: %s", d.GetName(), err)
}
}()

requeue, err = d.Reconcile(r)
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@ import (

corev1 "k8s.io/api/core/v1"
storagev1beta1 "k8s.io/api/storage/v1beta1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -547,6 +548,84 @@ func TestDeploymentController(t *testing.T) {
})
}
})

t.Run("delete obsolete objects", func(t *testing.T) {
tc := setup(t)
defer teardown(t, tc)
d := &pmemDeployment{
name: "test-driver-upgrades",
}

dep := getDeployment(d)

err := tc.c.Create(context.TODO(), dep)
require.NoError(t, err, "failed to create deployment")
testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning)
validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning})

err = tc.c.Get(context.TODO(), client.ObjectKey{Name: d.name}, dep)
require.NoError(t, err, "get deployment")

cm1 := &corev1.ConfigMap{
TypeMeta: metav1.TypeMeta{
Kind: "ConfigMap",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: dep.GetHyphenedName(),
OwnerReferences: []metav1.OwnerReference{
dep.GetOwnerReference(),
},
Namespace: testNamespace,
},
Data: map[string]string{},
}
err = tc.c.Create(context.TODO(), cm1)
require.NoError(t, err, "create configmap owned by deployment")

cm2 := &corev1.ConfigMap{
TypeMeta: metav1.TypeMeta{
Kind: "ConfigMap",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "unrelated-cm",
Namespace: testNamespace,
},
Data: map[string]string{},
}
err = tc.c.Create(context.TODO(), cm2)
require.NoError(t, err, "create configmap: %s", cm2.Name)

defer func() {
err := tc.c.Delete(context.TODO(), cm1)
if err != nil && !errors.IsNotFound(err) {
require.NoErrorf(t, err, "delete configmap: %s", cm1.Name)
}
err = tc.c.Delete(context.TODO(), cm2)
if err != nil && !errors.IsNotFound(err) {
require.NoErrorf(t, err, "delete configmap: %s", cm2.Name)
}
}()

// Use a fresh reconciler to mimic operator restart
tc.rc = newReconcileDeployment(tc.c, tc.cs)

// A fresh reconcile should delete the newly created above ConfigMap
testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning)
err = tc.c.Get(context.TODO(), client.ObjectKey{Name: d.name}, dep)
require.NoError(t, err, "get deployment")
validateDriver(t, tc, dep, []string{api.EventReasonNew, api.EventReasonRunning})

cm := &corev1.ConfigMap{}
err = tc.c.Get(context.TODO(), client.ObjectKey{Name: cm1.Name, Namespace: testNamespace}, cm)
require.Errorf(t, err, "get '%s' config map after reconcile", cm1.Name)
require.True(t, errors.IsNotFound(err), "config map not found after reconcile")

// operator should not delete the objects unrelated to any deployment
err = tc.c.Get(context.TODO(), client.ObjectKey{Name: cm2.Name, Namespace: testNamespace}, cm)
require.NoErrorf(t, err, "get '%s' config map after reconcile", cm2.Name)
})
}

t.Parallel()
30 changes: 7 additions & 23 deletions test/e2e/operator/validate/validate.go
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ import (

api "github.com/intel/pmem-csi/pkg/apis/pmemcsi/v1alpha1"
"github.com/intel/pmem-csi/pkg/deployments"
operatordeployment "github.com/intel/pmem-csi/pkg/pmem-csi-operator/controller/deployment"
"github.com/intel/pmem-csi/pkg/version"

"gopkg.in/yaml.v2"
@@ -422,35 +423,18 @@ func prettyPrintObjectID(object unstructured.Unstructured) string {
object.GetNamespace())
}

// A list of all object types potentially created by the
// operator. It's okay and desirable to list more than actually used
// at the moment, to catch new objects.
var allObjectTypes = []schema.GroupVersionKind{
schema.GroupVersionKind{"", "v1", "SecretList"},
schema.GroupVersionKind{"", "v1", "ServiceList"},
schema.GroupVersionKind{"", "v1", "ServiceAccountList"},
schema.GroupVersionKind{"admissionregistration.k8s.io", "v1beta1", "MutatingWebhookConfigurationList"},
schema.GroupVersionKind{"apps", "v1", "DaemonSetList"},
schema.GroupVersionKind{"apps", "v1", "DeploymentList"},
schema.GroupVersionKind{"apps", "v1", "ReplicaSetList"},
schema.GroupVersionKind{"apps", "v1", "StatefulSetList"},
schema.GroupVersionKind{"rbac.authorization.k8s.io", "v1", "ClusterRoleList"},
schema.GroupVersionKind{"rbac.authorization.k8s.io", "v1", "ClusterRoleBindingList"},
schema.GroupVersionKind{"rbac.authorization.k8s.io", "v1", "RoleList"},
schema.GroupVersionKind{"rbac.authorization.k8s.io", "v1", "RoleBindingList"},
schema.GroupVersionKind{"storage.k8s.io", "v1beta1", "CSIDriverList"},
}

func listAllDeployedObjects(client client.Client, deployment api.Deployment) ([]unstructured.Unstructured, error) {
func listAllDeployedObjects(c client.Client, deployment api.Deployment) ([]unstructured.Unstructured, error) {
objects := []unstructured.Unstructured{}

for _, gvk := range allObjectTypes {
for _, gvk := range operatordeployment.AllObjectTypes {
list := &unstructured.UnstructuredList{}
list.SetGroupVersionKind(gvk)
opts := &client.ListOptions{
Namespace: deployment.Namespace,
}
// Filtering by owner doesn't work, so we have to use brute-force and look at all
// objects.
// TODO (?): filter at least by namespace, where applicable.
if err := client.List(context.Background(), list); err != nil {
if err := c.List(context.Background(), list, opts); err != nil {
return objects, fmt.Errorf("list %s: %v", gvk, err)
}
outer:

0 comments on commit a654839

Please sign in to comment.