From f5469940dff5bb56b5370e918843320f6e3375d7 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Fri, 13 Dec 2019 11:19:07 +0530 Subject: [PATCH] CSI: cephfs and rbd daemonset upgrade strategy CSI nodeplugins, specifically when using cephfs FUSE or rbd-nbd as the mounters, when upgraded, will cause existing mounts to become stale/not-rechable (usually connection timeout errors). This is due to losing the mount processes running within the CSI nodeplugin pods. This PR add updated the Daemonset update strategy based on the ENV variable to take care of above issue with some manual steps Moreinfo: https://github.com/ceph/ceph-csi/issues/703 Resolves: https://github.com/rook/rook/issues/4248 Signed-off-by: Madhu Rajanna --- Documentation/ceph-block.md | 5 + Documentation/ceph-filesystem.md | 6 ++ Documentation/ceph-upgrade.md | 14 +++ Documentation/helm-operator.md | 100 +++++++++--------- .../rook-ceph/templates/deployment.yaml | 8 ++ cluster/charts/rook-ceph/values.yaml | 7 ++ .../csi/template/cephfs/csi-cephfsplugin.yaml | 2 + .../ceph/csi/template/rbd/csi-rbdplugin.yaml | 2 + .../kubernetes/ceph/operator-openshift.yaml | 8 ++ .../examples/kubernetes/ceph/operator.yaml | 8 ++ pkg/operator/ceph/csi/spec.go | 45 +++++--- 11 files changed, 142 insertions(+), 63 deletions(-) diff --git a/Documentation/ceph-block.md b/Documentation/ceph-block.md index 5eeae4666270a..5c5f49194956b 100644 --- a/Documentation/ceph-block.md +++ b/Documentation/ceph-block.md @@ -27,6 +27,11 @@ Before Rook can provision storage, a [`StorageClass`](https://kubernetes.io/docs Each OSD must be located on a different node, because the [`failureDomain`](ceph-pool-crd.md#spec) is set to `host` and the `replicated.size` is set to `3`. +> **IMPORTANT**: If you are using rbd-nbd as a mounter in storageclass. During upgrade you will be hitting a ceph-csi +[bug](https://github.com/ceph/ceph-csi/issues/703) you need to follow +> [upgrade steps](ceph-upgrade.md#1.-Update-the-Rook-Operator) which requires +> node draining. + > **NOTE**: This example uses the CSI driver, which is the preferred driver going forward for K8s 1.13 and newer. Examples are found in the [CSI RBD](https://github.com/rook/rook/tree/{{ branchName }}/cluster/examples/kubernetes/ceph/csi/rbd) directory. For an example of a storage class using the flex driver (required for K8s 1.12 or earlier), see the [Flex Driver](#flex-driver) section below, which has examples in the [flex](https://github.com/rook/rook/tree/{{ branchName }}/cluster/examples/kubernetes/ceph/flex) directory. Save this `StorageClass` definition as `storageclass.yaml`: diff --git a/Documentation/ceph-filesystem.md b/Documentation/ceph-filesystem.md index d47f47216e617..0ea3a81424054 100644 --- a/Documentation/ceph-filesystem.md +++ b/Documentation/ceph-filesystem.md @@ -79,6 +79,12 @@ $ ceph status Before Rook can start provisioning storage, a StorageClass needs to be created based on the filesystem. This is needed for Kubernetes to interoperate with the CSI driver to create persistent volumes. +> **IMPORTANT**: Do not use CephFS CSI driver, if the kernel is not +supporting ceph quotas (kernel version <4.17) +ceph-fuse client will be used as the default mounter. During upgrade you will be hitting a ceph-csi +[bug](https://github.com/ceph/ceph-csi/issues/703). you need to follow +[upgrade steps](ceph-upgrade.md#1.-Update-the-Rook-Operator) which requires node draining. + > **NOTE**: This example uses the CSI driver, which is the preferred driver going forward for K8s 1.13 and newer. Examples are found in the [CSI CephFS](https://github.com/rook/rook/tree/{{ branchName }}/cluster/examples/kubernetes/ceph/csi/cephfs) directory. For an example of a volume using the flex driver (required for K8s 1.12 and earlier), see the [Flex Driver](#flex-driver) section below. Save this storage class definition as `storageclass.yaml`: diff --git a/Documentation/ceph-upgrade.md b/Documentation/ceph-upgrade.md index d042c91656300..c748cd52c5ca6 100644 --- a/Documentation/ceph-upgrade.md +++ b/Documentation/ceph-upgrade.md @@ -239,6 +239,20 @@ kubectl apply -f upgrade-from-v1.1-apply.yaml The largest portion of the upgrade is triggered when the operator's image is updated to `v1.2.x`. When the operator is updated, it will proceed to update all of the Ceph daemons. +If you are using ceph-fuse or nbd-rbd mounter. when upgraded, will cause +existing mounts to become stale/not-rechable. please add below to the operator +`env` variables by editing operator deployment. + +```yaml + env: + # Add CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY env if you are using ceph-fuse mounter in storageclass or kernel is not supporting ceph quota(<4.17) + - name: CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY + value: "OnDelete" + # Add CSI_RBD_PLUGIN_UPDATE_STRATEGY env if you are using rbd-nbd mounter + - name: CSI_RBD_PLUGIN_UPDATE_STRATEGY + value: "OnDelete" +``` + ```sh kubectl -n $ROOK_SYSTEM_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.2.0 ``` diff --git a/Documentation/helm-operator.md b/Documentation/helm-operator.md index 8f7b2faa6d721..0cad1f57439e0 100644 --- a/Documentation/helm-operator.md +++ b/Documentation/helm-operator.md @@ -95,55 +95,57 @@ The command removes all the Kubernetes components associated with the chart and The following tables lists the configurable parameters of the rook-operator chart and their default values. -| Parameter | Description | Default | -| ------------------------------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ | -| `image.repository` | Image | `rook/ceph` | -| `image.tag` | Image tag | `master` | -| `image.pullPolicy` | Image pull policy | `IfNotPresent` | -| `rbacEnable` | If true, create & use RBAC resources | `true` | -| `pspEnable` | If true, create & use PSP resources | `true` | -| `resources` | Pod resource requests & limits | `{}` | -| `annotations` | Pod annotations | `{}` | -| `logLevel` | Global log level | `INFO` | -| `nodeSelector` | Kubernetes `nodeSelector` to add to the Deployment. | | -| `tolerations` | List of Kubernetes `tolerations` to add to the Deployment. | `[]` | -| `currentNamespaceOnly` | Whether the operator should watch cluster CRD in its own namespace or not | `false` | -| `hostpathRequiresPrivileged` | Runs Ceph Pods as privileged to be able to write to `hostPath`s in OpenShift with SELinux restrictions. | `false` | -| `mon.healthCheckInterval` | The frequency for the operator to check the mon health | `45s` | -| `mon.monOutTimeout` | The time to wait before failing over an unhealthy mon | `600s` | -| `discover.priorityClassName` | The priority class name to add to the discover pods | | -| `discover.toleration` | Toleration for the discover pods | | -| `discover.tolerationKey` | The specific key of the taint to tolerate | | -| `discover.tolerations` | Array of tolerations in YAML format which will be added to discover deployment | | -| `discover.nodeAffinity` | The node labels for affinity of `discover-agent` (***) | | -| `csi.enableRbdDriver` | Enable Ceph CSI RBD driver. | `true` | -| `csi.enableCephfsDriver` | Enable Ceph CSI CephFS driver. | `true` | -| `csi.enableGrpcMetrics` | Enable Ceph CSI GRPC Metrics. | `true` | -| `csi.provisionerTolerations` | Array of tolerations in YAML format which will be added to CSI provisioner deployment. | | -| `csi.provisionerNodeAffinity` | The node labels for affinity of the CSI provisioner deployment (***) | | -| `csi.pluginTolerations` | Array of tolerations in YAML format which will be added to Ceph CSI plugin DaemonSet | | -| `csi.pluginNodeAffinity` | The node labels for affinity of the Ceph CSI plugin DaemonSet (***) | | -| `csi.cephfsGrpcMetricsPort` | CSI CephFS driver GRPC metrics port. | `9091` | -| `csi.cephfsLivenessMetricsPort` | CSI CephFS driver metrics port. | `9081` | -| `csi.rbdGrpcMetricsPort` | Ceph CSI RBD driver GRPC metrics port. | `9090` | -| `csi.rbdLivenessMetricsPort` | Ceph CSI RBD driver metrics port. | `8080` | -| `csi.enableSnapshotter` | Enable deployment of snapshotter container in ceph-csi provisioner. | `true` | -| `csi.forceCephFSKernelClient` | Enable Ceph Kernel clients on kernel < 4.17 which support quotas for Cephfs. | `true` | -| `csi.kubeletDirPath` | Kubelet root directory path (if the Kubelet uses a different path for the `--root-dir` flag) | `/var/lib/kubelet` | -| `csi.cephcsi.image` | Ceph CSI image. | `quay.io/cephcsi/cephcsi:v1.2.2` | -| `csi.registrar.image` | Kubernetes CSI registrar image. | `quay.io/k8scsi/csi-node-driver-registrar:v1.1.0` | -| `csi.provisioner.image` | Kubernetes CSI provisioner image. | `quay.io/k8scsi/csi-provisioner:v1.4.0` | -| `csi.snapshotter.image` | Kubernetes CSI snapshotter image. | `quay.io/k8scsi/csi-snapshotter:v1.2.2` | -| `csi.attacher.image` | Kubernetes CSI Attacher image. | `quay.io/k8scsi/csi-attacher:v1.2.0` | -| `agent.flexVolumeDirPath` | Path where the Rook agent discovers the flex volume plugins (*) | `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` | -| `agent.libModulesDirPath` | Path where the Rook agent should look for kernel modules (*) | `/lib/modules` | -| `agent.mounts` | Additional paths to be mounted in the agent container (**) | | -| `agent.mountSecurityMode` | Mount Security Mode for the agent. | `Any` | -| `agent.priorityClassName` | The priority class name to add to the agent pods | | -| `agent.toleration` | Toleration for the agent pods | | -| `agent.tolerationKey` | The specific key of the taint to tolerate | | -| `agent.tolerations` | Array of tolerations in YAML format which will be added to agent deployment | | -| `agent.nodeAffinity` | The node labels for affinity of `rook-agent` (***) | | +| Parameter | Description | Default | +| -------------------------------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ | +| `image.repository` | Image | `rook/ceph` | +| `image.tag` | Image tag | `master` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `rbacEnable` | If true, create & use RBAC resources | `true` | +| `pspEnable` | If true, create & use PSP resources | `true` | +| `resources` | Pod resource requests & limits | `{}` | +| `annotations` | Pod annotations | `{}` | +| `logLevel` | Global log level | `INFO` | +| `nodeSelector` | Kubernetes `nodeSelector` to add to the Deployment. | | +| `tolerations` | List of Kubernetes `tolerations` to add to the Deployment. | `[]` | +| `currentNamespaceOnly` | Whether the operator should watch cluster CRD in its own namespace or not | `false` | +| `hostpathRequiresPrivileged` | Runs Ceph Pods as privileged to be able to write to `hostPath`s in OpenShift with SELinux restrictions. | `false` | +| `mon.healthCheckInterval` | The frequency for the operator to check the mon health | `45s` | +| `mon.monOutTimeout` | The time to wait before failing over an unhealthy mon | `600s` | +| `discover.priorityClassName` | The priority class name to add to the discover pods | | +| `discover.toleration` | Toleration for the discover pods | | +| `discover.tolerationKey` | The specific key of the taint to tolerate | | +| `discover.tolerations` | Array of tolerations in YAML format which will be added to discover deployment | | +| `discover.nodeAffinity` | The node labels for affinity of `discover-agent` (***) | | +| `csi.enableRbdDriver` | Enable Ceph CSI RBD driver. | `true` | +| `csi.enableCephfsDriver` | Enable Ceph CSI CephFS driver. | `true` | +| `csi.enableGrpcMetrics` | Enable Ceph CSI GRPC Metrics. | `true` | +| `csi.provisionerTolerations` | Array of tolerations in YAML format which will be added to CSI provisioner deployment. | | +| `csi.provisionerNodeAffinity` | The node labels for affinity of the CSI provisioner deployment (***) | | +| `csi.pluginTolerations` | Array of tolerations in YAML format which will be added to Ceph CSI plugin DaemonSet | | +| `csi.pluginNodeAffinity` | The node labels for affinity of the Ceph CSI plugin DaemonSet (***) | | +| `csi.cephfsGrpcMetricsPort` | CSI CephFS driver GRPC metrics port. | `9091` | +| `csi.cephfsLivenessMetricsPort` | CSI CephFS driver metrics port. | `9081` | +| `csi.rbdGrpcMetricsPort` | Ceph CSI RBD driver GRPC metrics port. | `9090` | +| `csi.rbdLivenessMetricsPort` | Ceph CSI RBD driver metrics port. | `8080` | +| `csi.enableSnapshotter` | Enable deployment of snapshotter container in ceph-csi provisioner. | `true` | +| `csi.forceCephFSKernelClient` | Enable Ceph Kernel clients on kernel < 4.17 which support quotas for Cephfs. | `true` | +| `csi.kubeletDirPath` | Kubelet root directory path (if the Kubelet uses a different path for the `--root-dir` flag) | `/var/lib/kubelet` | +| `csi.cephcsi.image` | Ceph CSI image. | `quay.io/cephcsi/cephcsi:v1.2.2` | +| `csi.rbdPluginUpdateStrategy` | CSI Rbd plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. | `OnDelete` | +| `csi.cephFSPluginUpdateStrategy` | CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. | `OnDelete` | +| `csi.registrar.image` | Kubernetes CSI registrar image. | `quay.io/k8scsi/csi-node-driver-registrar:v1.1.0` | +| `csi.provisioner.image` | Kubernetes CSI provisioner image. | `quay.io/k8scsi/csi-provisioner:v1.4.0` | +| `csi.snapshotter.image` | Kubernetes CSI snapshotter image. | `quay.io/k8scsi/csi-snapshotter:v1.2.2` | +| `csi.attacher.image` | Kubernetes CSI Attacher image. | `quay.io/k8scsi/csi-attacher:v1.2.0` | +| `agent.flexVolumeDirPath` | Path where the Rook agent discovers the flex volume plugins (*) | `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` | +| `agent.libModulesDirPath` | Path where the Rook agent should look for kernel modules (*) | `/lib/modules` | +| `agent.mounts` | Additional paths to be mounted in the agent container (**) | | +| `agent.mountSecurityMode` | Mount Security Mode for the agent. | `Any` | +| `agent.priorityClassName` | The priority class name to add to the agent pods | | +| `agent.toleration` | Toleration for the agent pods | | +| `agent.tolerationKey` | The specific key of the taint to tolerate | | +| `agent.tolerations` | Array of tolerations in YAML format which will be added to agent deployment | | +| `agent.nodeAffinity` | The node labels for affinity of `rook-agent` (***) | | * For information on what to set `agent.flexVolumeDirPath` to, please refer to the [Rook flexvolume documentation](flexvolume.md) diff --git a/cluster/charts/rook-ceph/templates/deployment.yaml b/cluster/charts/rook-ceph/templates/deployment.yaml index 4b508ab0f6cc5..6c62fc7cf0136 100644 --- a/cluster/charts/rook-ceph/templates/deployment.yaml +++ b/cluster/charts/rook-ceph/templates/deployment.yaml @@ -108,6 +108,14 @@ spec: value: {{ .Values.csi.enableCephfsDriver | quote }} - name: CSI_ENABLE_SNAPSHOTTER value: {{ .Values.csi.enableSnapshotter | quote }} +{{- if .Values.csi.cephFSPluginUpdateStrategy }} + - name: CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY + value: {{ .Values.csi.cephFSPluginUpdateStrategy | quote }} +{{- end }} +{{- if .Values.csi.rbdPluginUpdateStrategy }} + - name: CSI_RBD_PLUGIN_UPDATE_STRATEGY + value: {{ .Values.csi.rbdPluginUpdateStrategy | quote }} +{{- end }} {{- if .Values.csi.kubeletDirPath }} - name: ROOK_CSI_KUBELET_DIR_PATH value: {{ .Values.csi.kubeletDirPath | quote }} diff --git a/cluster/charts/rook-ceph/values.yaml b/cluster/charts/rook-ceph/values.yaml index 2ee2d9bdc5197..38ebbd3086b0e 100644 --- a/cluster/charts/rook-ceph/values.yaml +++ b/cluster/charts/rook-ceph/values.yaml @@ -58,6 +58,13 @@ csi: enableCephfsDriver: true enableGrpcMetrics: true enableSnapshotter: true + # CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. + # Default value is RollingUpdate. + #rbdPluginUpdateStrategy: OnDelete + # CSI Rbd plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. + # Default value is RollingUpdate. + #cephFSPluginUpdateStrategy: OnDelete + # Set provisonerTolerations and provisionerNodeAffinity for provisioner pod. # The CSI provisioner would be best to start on the same nodes as other ceph daemons. # provisionerTolerations: diff --git a/cluster/examples/kubernetes/ceph/csi/template/cephfs/csi-cephfsplugin.yaml b/cluster/examples/kubernetes/ceph/csi/template/cephfs/csi-cephfsplugin.yaml index 476f69d940ddf..0959ed3ea9882 100644 --- a/cluster/examples/kubernetes/ceph/csi/template/cephfs/csi-cephfsplugin.yaml +++ b/cluster/examples/kubernetes/ceph/csi/template/cephfs/csi-cephfsplugin.yaml @@ -7,6 +7,8 @@ spec: selector: matchLabels: app: csi-cephfsplugin + updateStrategy: + type: {{ .CephFSPluginUpdateStrategy }} template: metadata: labels: diff --git a/cluster/examples/kubernetes/ceph/csi/template/rbd/csi-rbdplugin.yaml b/cluster/examples/kubernetes/ceph/csi/template/rbd/csi-rbdplugin.yaml index 8f7b7aed3ae12..1a16b5901b7c8 100644 --- a/cluster/examples/kubernetes/ceph/csi/template/rbd/csi-rbdplugin.yaml +++ b/cluster/examples/kubernetes/ceph/csi/template/rbd/csi-rbdplugin.yaml @@ -7,6 +7,8 @@ spec: selector: matchLabels: app: csi-rbdplugin + updateStrategy: + type: {{ .RBDPluginUpdateStrategy }} template: metadata: labels: diff --git a/cluster/examples/kubernetes/ceph/operator-openshift.yaml b/cluster/examples/kubernetes/ceph/operator-openshift.yaml index 91d8aa02a5ca2..6b15534303fed 100644 --- a/cluster/examples/kubernetes/ceph/operator-openshift.yaml +++ b/cluster/examples/kubernetes/ceph/operator-openshift.yaml @@ -240,6 +240,14 @@ spec: # See the upgrade guide: https://rook.io/docs/rook/v1.2/ceph-upgrade.html - name: CSI_FORCE_CEPHFS_KERNEL_CLIENT value: "true" + # CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. + # Default value is RollingUpdate. + #- name: CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY + # value: "OnDelete" + # CSI Rbd plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. + # Default value is RollingUpdate. + #- name: CSI_RBD_PLUGIN_UPDATE_STRATEGY + # value: "OnDelete" # kubelet directory path, if kubelet configured to use other than /var/lib/kubelet path. #- name: ROOK_CSI_KUBELET_DIR_PATH # value: "/var/lib/kubelet" diff --git a/cluster/examples/kubernetes/ceph/operator.yaml b/cluster/examples/kubernetes/ceph/operator.yaml index 16f61e7fb88c2..6d881b8a5a151 100644 --- a/cluster/examples/kubernetes/ceph/operator.yaml +++ b/cluster/examples/kubernetes/ceph/operator.yaml @@ -190,6 +190,14 @@ spec: # See the upgrade guide: https://rook.io/docs/rook/v1.2/ceph-upgrade.html - name: CSI_FORCE_CEPHFS_KERNEL_CLIENT value: "true" + # CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. + # Default value is RollingUpdate. + #- name: CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY + # value: "OnDelete" + # CSI Rbd plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. + # Default value is RollingUpdate. + #- name: CSI_RBD_PLUGIN_UPDATE_STRATEGY + # value: "OnDelete" # The default version of CSI supported by Rook will be started. To change the version # of the CSI driver to something other than what is officially supported, change # these images to the desired release of the CSI driver. diff --git a/pkg/operator/ceph/csi/spec.go b/pkg/operator/ceph/csi/spec.go index 3c2e2f9423dae..0e4eef12e6724 100644 --- a/pkg/operator/ceph/csi/spec.go +++ b/pkg/operator/ceph/csi/spec.go @@ -35,20 +35,22 @@ import ( ) type Param struct { - CSIPluginImage string - RegistrarImage string - ProvisionerImage string - AttacherImage string - SnapshotterImage string - DriverNamePrefix string - EnableSnapshotter string - EnableCSIGRPCMetrics string - KubeletDirPath string - ForceCephFSKernelClient string - CephFSGRPCMetricsPort uint16 - CephFSLivenessMetricsPort uint16 - RBDGRPCMetricsPort uint16 - RBDLivenessMetricsPort uint16 + CSIPluginImage string + RegistrarImage string + ProvisionerImage string + AttacherImage string + SnapshotterImage string + DriverNamePrefix string + EnableSnapshotter string + EnableCSIGRPCMetrics string + KubeletDirPath string + ForceCephFSKernelClient string + CephFSPluginUpdateStrategy string + RBDPluginUpdateStrategy string + CephFSGRPCMetricsPort uint16 + CephFSLivenessMetricsPort uint16 + RBDGRPCMetricsPort uint16 + RBDLivenessMetricsPort uint16 } type templateParam struct { @@ -222,6 +224,21 @@ func StartCSIDrivers(namespace string, clientset kubernetes.Interface, ver *vers if !strings.EqualFold(enableSnap, "false") { tp.EnableSnapshotter = "true" } + + updateStrategy := os.Getenv("CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY") + if strings.EqualFold(updateStrategy, "ondelete") { + tp.CephFSPluginUpdateStrategy = "OnDelete" + } else { + tp.CephFSPluginUpdateStrategy = "RollingUpdate" + } + + updateStrategy = os.Getenv("CSI_RBD_PLUGIN_UPDATE_STRATEGY") + if strings.EqualFold(updateStrategy, "ondelete") { + tp.RBDPluginUpdateStrategy = "OnDelete" + } else { + tp.RBDPluginUpdateStrategy = "RollingUpdate" + } + if ver.Major > KubeMinMajor || (ver.Major == KubeMinMajor && ver.Minor < provDeploymentSuppVersion) { deployProvSTS = true }