From 5e5163cd1e5f15749e7dc708512d41d41ea7db27 Mon Sep 17 00:00:00 2001 From: Kevin Lefevre Date: Fri, 5 Mar 2021 19:42:44 +0100 Subject: [PATCH] Squashed 'aws-ebs-csi-driver/' changes from 4790c45471..ee3b46c279 ee3b46c279 Merge pull request #776 from AndyXiangLi/not-tolerate-all-taint daa2555262 Merge pull request #778 from tsunny/tsunny/remove-prestophook a724307db1 add toleration time to NoExecute effect 1dbc8acfcd Merge pull request #777 from vdhanan/lockfile 6e59160eea Removing prestop hook for node-driver-registrar 7c1de6f013 go mod tidy f013880f63 Merge pull request #774 from AndyXiangLi/add-self-as-approver bc33f06696 add self as reviewer c07e65470a Merge pull request #751 from vdhanan/master 1e644f81d6 Merge remote-tracking branch 'upstream/master' 25c0e590e5 add readiness probe and bump helm chart version a3a0bc328e Merge pull request #768 from people-ai/master 75038e8d1b Merge branch 'master' into master 5241bed348 Merge pull request #772 from ayberk/patch-4 208bcf0172 Update chart version ca5d05e54b Merge pull request #770 from arcivanov/chart_tscs c159142a35 Merge pull request #765 from rubroboletus/master 1ca1f8bf25 Merge pull request #762 from nvnmandadhi/master 69000c93c6 Merge pull request #771 from AndyXiangLi/delete-volume-on-leak fc70a3846f Merge pull request #756 from mowangdk/upgrade_livenessprobe_version 5b2faf99c7 delete leaked volume if driver don't know the volume status 0bdb9a6ed3 Bumped chart version 5b31c9334b Merge pull request #767 from jsafrane/fix-iops-param-error 25193d74c3 Add ability to specify topologySpreadConstraints dd696f1f23 Merge pull request #769 from josselin-c/master 1ef44be8c4 Aws client config: increase MaxRetries ea61adeffa Fix error message when IOPSPerGB is missing in io1 volumes ffed9c62b7 Allow setting http proxy and no proxy environment values 341eb8b77f removed harcoded NAMESPACE 4d783ddaae Update livenessprobe image version from 2.1.0 to 2.2.0 69535ad954 Merge pull request #755 from mtougeron/image-pull-secrets 7cf4d1b3d3 add a document separator for storageclass template file ae12a26bf9 Merge pull request #759 from ayberk/patch-4 7d5601786a Update test k8s version to 1.18.16 40ab0bd52f Use the imagePullSecrets if set 6b8257af1d Merge pull request #752 from mtougeron/helm-set-default-enableVolumeScheduling-to-true 061a9c5f2d bump the chart version 786dbfa5e5 Merge branch 'master' into helm-set-default-enableVolumeScheduling-to-true 2c7a0d127a Merge pull request #702 from AndyXiangLi/node-concurrent-issue 5828d42515 Merge pull request #734 from nicholasmhughes/add-storage-class-annotations 8171f836db Merge branch 'master' into add-storage-class-annotations bebc32bfa5 closes kubernetes-sigs/aws-ebs-csi-driver#733; add storage class annotation and label handling to chart 8155fe8ed4 Refactor inFlight key to add lock per volumeId for node service 1a15fcc8ff Merge pull request #744 from chrishenzie/create-volume-idempotency 7556d0baeb Set enableVolumeScheduling to true by default in the helm chart 8cf86b7c42 Make CreateVolume idempotent c547161652 Merge pull request #711 from ig0rsky/master 78d23ca81c add snapshot controller variables 85699e170f Merge pull request #745 from ayberk/ecr_overlay e2de85d3a4 Update ECR overlay 21fe2a87d3 Merge pull request #742 from AndyXiangLi/update-gcr-image-kustomization 41d8f2e262 correct kustomization gcr image repo f5172422f6 Merge pull request #740 from AndyXiangLi/update-gcr-image-kustomization 288c177395 patch stable release to use gcr image 62ebcbfa83 Merge pull request #735 from PhilThurston/patch-1 d22d0db501 Merge pull request #732 from dntosas/allow-resources-override-for-node 5b1bb6a972 Updated installation to use latest 0.9 release 5a03305031 [chart] Ability to add priorityClassName f3a57d3042 [chart] Allow resources override for node DaemonSet git-subtree-dir: aws-ebs-csi-driver git-subtree-split: ee3b46c279b57ee778b604a686168caa6764432b --- OWNERS | 2 + charts/aws-ebs-csi-driver/Chart.yaml | 2 +- charts/aws-ebs-csi-driver/templates/NOTES.txt | 2 +- .../clusterrolebinding-attacher.yaml | 2 +- .../clusterrolebinding-provisioner.yaml | 2 +- .../templates/clusterrolebinding-resizer.yaml | 2 +- ...lusterrolebinding-snapshot-controller.yaml | 2 +- .../clusterrolebinding-snapshotter.yaml | 2 +- .../templates/controller.yaml | 70 +++++++- charts/aws-ebs-csi-driver/templates/node.yaml | 53 +++++- ...le-snapshot-controller-leaderelection.yaml | 1 - ...ng-snapshot-controller-leaderelection.yaml | 3 +- .../serviceaccount-csi-controller.yaml | 1 - .../templates/serviceaccount-csi-node.yaml | 1 - .../serviceaccount-snapshot-controller.yaml | 1 - .../templates/statefulset.yaml | 20 ++- .../templates/storageclass.yaml | 9 +- charts/aws-ebs-csi-driver/values.yaml | 37 +++- deploy/kubernetes/base/controller.yaml | 20 ++- deploy/kubernetes/base/node.yaml | 14 +- .../overlays/stable/ecr/kustomization.yaml | 18 +- .../overlays/stable/kustomization.yaml | 10 +- docs/README.md | 4 +- go.sum | 6 - hack/e2e/run.sh | 2 +- pkg/cloud/cloud.go | 9 + pkg/cloud/cloud_test.go | 35 ++-- pkg/driver/controller.go | 16 ++ pkg/driver/controller_test.go | 153 ++++++++++++++++- pkg/driver/internal/inflight.go | 18 +- pkg/driver/internal/inflight_test.go | 158 +++--------------- pkg/driver/node.go | 36 +++- pkg/driver/node_test.go | 119 +++++++++++++ pkg/driver/sanity_test.go | 1 + 34 files changed, 602 insertions(+), 229 deletions(-) diff --git a/OWNERS b/OWNERS index 042c0595..e9b5be35 100644 --- a/OWNERS +++ b/OWNERS @@ -7,6 +7,7 @@ approvers: - leakingtapan - wongma7 - ayberk +- AndyXiangLi reviewers: - bertinatto - jsafrane @@ -16,3 +17,4 @@ reviewers: - leakingtapan - wongma7 - ayberk +- AndyXiangLi diff --git a/charts/aws-ebs-csi-driver/Chart.yaml b/charts/aws-ebs-csi-driver/Chart.yaml index 13a922b4..e2a26cd9 100644 --- a/charts/aws-ebs-csi-driver/Chart.yaml +++ b/charts/aws-ebs-csi-driver/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v1 appVersion: "0.9.0" name: aws-ebs-csi-driver description: A Helm chart for AWS EBS CSI Driver -version: 0.9.4 +version: 0.9.14 kubeVersion: ">=1.17.0-0" home: https://github.com/kubernetes-sigs/aws-ebs-csi-driver sources: diff --git a/charts/aws-ebs-csi-driver/templates/NOTES.txt b/charts/aws-ebs-csi-driver/templates/NOTES.txt index 34db916b..3717647d 100644 --- a/charts/aws-ebs-csi-driver/templates/NOTES.txt +++ b/charts/aws-ebs-csi-driver/templates/NOTES.txt @@ -1,3 +1,3 @@ To verify that aws-ebs-csi-driver has started, run: - kubectl get pod -n kube-system -l "app.kubernetes.io/name={{ include "aws-ebs-csi-driver.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" + kubectl get pod -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "aws-ebs-csi-driver.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" diff --git a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-attacher.yaml b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-attacher.yaml index 92a8b40f..c75cb9b1 100644 --- a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-attacher.yaml +++ b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-attacher.yaml @@ -8,7 +8,7 @@ metadata: subjects: - kind: ServiceAccount name: {{ .Values.serviceAccount.controller.name }} - namespace: kube-system + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: ebs-external-attacher-role diff --git a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-provisioner.yaml b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-provisioner.yaml index e2478b93..4a9174b7 100644 --- a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-provisioner.yaml +++ b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-provisioner.yaml @@ -8,7 +8,7 @@ metadata: subjects: - kind: ServiceAccount name: {{ .Values.serviceAccount.controller.name }} - namespace: kube-system + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: ebs-external-provisioner-role diff --git a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-resizer.yaml b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-resizer.yaml index 997dc28e..6fe42d12 100644 --- a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-resizer.yaml +++ b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-resizer.yaml @@ -9,7 +9,7 @@ metadata: subjects: - kind: ServiceAccount name: {{ .Values.serviceAccount.controller.name }} - namespace: kube-system + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: ebs-external-resizer-role diff --git a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshot-controller.yaml b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshot-controller.yaml index cb467309..b74484f9 100644 --- a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshot-controller.yaml +++ b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshot-controller.yaml @@ -9,7 +9,7 @@ metadata: subjects: - kind: ServiceAccount name: {{ .Values.serviceAccount.snapshot.name }} - namespace: kube-system + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: ebs-snapshot-controller-role diff --git a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshotter.yaml b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshotter.yaml index f55c38e4..cbc1169e 100644 --- a/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshotter.yaml +++ b/charts/aws-ebs-csi-driver/templates/clusterrolebinding-snapshotter.yaml @@ -9,7 +9,7 @@ metadata: subjects: - kind: ServiceAccount name: {{ .Values.serviceAccount.controller.name }} - namespace: kube-system + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: ebs-external-snapshotter-role diff --git a/charts/aws-ebs-csi-driver/templates/controller.yaml b/charts/aws-ebs-csi-driver/templates/controller.yaml index 43b5b082..835a5eea 100644 --- a/charts/aws-ebs-csi-driver/templates/controller.yaml +++ b/charts/aws-ebs-csi-driver/templates/controller.yaml @@ -3,7 +3,6 @@ kind: Deployment apiVersion: apps/v1 metadata: name: ebs-csi-controller - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} spec: @@ -27,17 +26,30 @@ spec: {{ toYaml . | indent 8 }} {{- end }} serviceAccountName: {{ .Values.serviceAccount.controller.name }} - priorityClassName: system-cluster-critical + priorityClassName: {{ .Values.priorityClassName | default "system-cluster-critical" }} {{- with .Values.affinity }} affinity: {{ toYaml . | nindent 8 }} {{- end }} tolerations: {{- if .Values.tolerateAllTaints }} - operator: Exists + {{- else }} + - key: CriticalAddonsOnly + operator: Exists + - operator: Exists + effect: NoExecute + tolerationSeconds: 300 {{- end }} {{- with .Values.tolerations }} {{ toYaml . | indent 8 }} {{- end }} +{{- if .Values.topologySpreadConstraints }} +{{- $tscLabelSelector := dict "labelSelector" ( dict "matchLabels" ( dict "app" "ebs-csi-controller" ) ) }} + topologySpreadConstraints: + {{- range .Values.topologySpreadConstraints }} + - {{ mergeOverwrite . $tscLabelSelector | toJson }} + {{- end }} +{{- end }} containers: - name: ebs-plugin image: {{ .Values.image.repository }}:{{ .Values.image.tag }} @@ -76,6 +88,14 @@ spec: - name: AWS_REGION value: {{ .Values.region }} {{- end }} +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ @@ -91,6 +111,14 @@ spec: timeoutSeconds: 3 periodSeconds: 10 failureThreshold: 5 + readinessProbe: + httpGet: + path: /healthz + port: healthz + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + failureThreshold: 5 {{- with .Values.resources }} resources: {{ toYaml . | nindent 12 }} {{- end }} @@ -110,6 +138,14 @@ spec: env: - name: ADDRESS value: /var/lib/csi/sockets/pluginproxy/csi.sock +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ @@ -125,6 +161,14 @@ spec: env: - name: ADDRESS value: /var/lib/csi/sockets/pluginproxy/csi.sock +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ @@ -140,6 +184,14 @@ spec: env: - name: ADDRESS value: /var/lib/csi/sockets/pluginproxy/csi.sock +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ @@ -157,6 +209,14 @@ spec: env: - name: ADDRESS value: /var/lib/csi/sockets/pluginproxy/csi.sock +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ @@ -174,6 +234,12 @@ spec: {{- with .Values.resources }} resources: {{ toYaml . | nindent 12 }} {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + {{- range .Values.imagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} volumes: - name: socket-dir emptyDir: {} diff --git a/charts/aws-ebs-csi-driver/templates/node.yaml b/charts/aws-ebs-csi-driver/templates/node.yaml index b32958d1..fcb4e8af 100644 --- a/charts/aws-ebs-csi-driver/templates/node.yaml +++ b/charts/aws-ebs-csi-driver/templates/node.yaml @@ -3,7 +3,6 @@ kind: DaemonSet apiVersion: apps/v1 metadata: name: ebs-csi-node - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} spec: @@ -36,10 +35,16 @@ spec: {{- end }} hostNetwork: true serviceAccountName: {{ .Values.serviceAccount.node.name }} - priorityClassName: system-node-critical + priorityClassName: {{ .Values.node.priorityClassName | default "system-cluster-critical" }} tolerations: {{- if .Values.node.tolerateAllTaints }} - operator: Exists + {{- else }} + - key: CriticalAddonsOnly + operator: Exists + - operator: Exists + effect: NoExecute + tolerationSeconds: 300 {{- end }} {{- with .Values.node.tolerations }} {{ toYaml . | indent 8 }} @@ -60,6 +65,14 @@ spec: env: - name: CSI_ENDPOINT value: unix:/csi/csi.sock +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: kubelet-dir mountPath: /var/lib/kubelet @@ -80,32 +93,48 @@ spec: timeoutSeconds: 3 periodSeconds: 10 failureThreshold: 5 + {{- if .Values.node.resources }} + {{- with .Values.node.resources }} + resources: {{ toYaml . | nindent 12 }} + {{- end }} + {{- else }} {{- with .Values.resources }} resources: {{ toYaml . | nindent 12 }} {{- end }} + {{- end }} - name: node-driver-registrar image: {{ printf "%s:%s" .Values.sidecars.nodeDriverRegistrarImage.repository .Values.sidecars.nodeDriverRegistrarImage.tag }} args: - --csi-address=$(ADDRESS) - --kubelet-registration-path=$(DRIVER_REG_SOCK_PATH) - --v=5 - lifecycle: - preStop: - exec: - command: ["/bin/sh", "-c", "rm -rf /registration/ebs.csi.aws.com-reg.sock /csi/csi.sock"] env: - name: ADDRESS value: /csi/csi.sock - name: DRIVER_REG_SOCK_PATH value: /var/lib/kubelet/plugins/ebs.csi.aws.com/csi.sock +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} volumeMounts: - name: plugin-dir mountPath: /csi - name: registration-dir mountPath: /registration + {{- if .Values.node.resources }} + {{- with .Values.node.resources }} + resources: {{ toYaml . | nindent 12 }} + {{- end }} + {{- else }} {{- with .Values.resources }} resources: {{ toYaml . | nindent 12 }} {{- end }} + {{- end }} - name: liveness-probe image: {{ printf "%s:%s" .Values.sidecars.livenessProbeImage.repository .Values.sidecars.livenessProbeImage.tag }} args: @@ -113,9 +142,21 @@ spec: volumeMounts: - name: plugin-dir mountPath: /csi + {{- if .Values.node.resources }} + {{- with .Values.node.resources }} + resources: {{ toYaml . | nindent 12 }} + {{- end }} + {{- else }} {{- with .Values.resources }} resources: {{ toYaml . | nindent 12 }} {{- end }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + {{- range .Values.imagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} volumes: - name: kubelet-dir hostPath: diff --git a/charts/aws-ebs-csi-driver/templates/role-snapshot-controller-leaderelection.yaml b/charts/aws-ebs-csi-driver/templates/role-snapshot-controller-leaderelection.yaml index 947d241e..4d09e4ca 100644 --- a/charts/aws-ebs-csi-driver/templates/role-snapshot-controller-leaderelection.yaml +++ b/charts/aws-ebs-csi-driver/templates/role-snapshot-controller-leaderelection.yaml @@ -4,7 +4,6 @@ kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: name: ebs-snapshot-controller-leaderelection - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} rules: diff --git a/charts/aws-ebs-csi-driver/templates/rolebinding-snapshot-controller-leaderelection.yaml b/charts/aws-ebs-csi-driver/templates/rolebinding-snapshot-controller-leaderelection.yaml index 0670c705..e8248bd8 100644 --- a/charts/aws-ebs-csi-driver/templates/rolebinding-snapshot-controller-leaderelection.yaml +++ b/charts/aws-ebs-csi-driver/templates/rolebinding-snapshot-controller-leaderelection.yaml @@ -4,13 +4,12 @@ kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: ebs-snapshot-controller-leaderelection - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} subjects: - kind: ServiceAccount name: {{ .Values.serviceAccount.snapshot.name }} - namespace: kube-system + namespace: {{ .Release.Namespace }} roleRef: kind: Role name: ebs-snapshot-controller-leaderelection diff --git a/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-controller.yaml b/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-controller.yaml index 8ec4c4e0..0490c327 100644 --- a/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-controller.yaml +++ b/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-controller.yaml @@ -3,7 +3,6 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ .Values.serviceAccount.controller.name }} - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} {{- with .Values.serviceAccount.controller.annotations }} diff --git a/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-node.yaml b/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-node.yaml index afe02185..2e93f727 100644 --- a/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-node.yaml +++ b/charts/aws-ebs-csi-driver/templates/serviceaccount-csi-node.yaml @@ -3,7 +3,6 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ .Values.serviceAccount.node.name }} - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} {{- with .Values.serviceAccount.node.annotations }} diff --git a/charts/aws-ebs-csi-driver/templates/serviceaccount-snapshot-controller.yaml b/charts/aws-ebs-csi-driver/templates/serviceaccount-snapshot-controller.yaml index 3b5ef2bc..19d27cb8 100644 --- a/charts/aws-ebs-csi-driver/templates/serviceaccount-snapshot-controller.yaml +++ b/charts/aws-ebs-csi-driver/templates/serviceaccount-snapshot-controller.yaml @@ -5,7 +5,6 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ .Values.serviceAccount.snapshot.name }} - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} {{- with .Values.serviceAccount.snapshot.annotations }} diff --git a/charts/aws-ebs-csi-driver/templates/statefulset.yaml b/charts/aws-ebs-csi-driver/templates/statefulset.yaml index 38fcb9f6..7c594c3b 100644 --- a/charts/aws-ebs-csi-driver/templates/statefulset.yaml +++ b/charts/aws-ebs-csi-driver/templates/statefulset.yaml @@ -4,7 +4,6 @@ kind: StatefulSet apiVersion: apps/v1 metadata: name: ebs-snapshot-controller - namespace: kube-system labels: {{- include "aws-ebs-csi-driver.labels" . | nindent 4 }} spec: @@ -26,7 +25,7 @@ spec: {{- with .Values.nodeSelector }} {{ toYaml . | indent 8 }} {{- end }} - priorityClassName: system-cluster-critical + priorityClassName: {{ .Values.priorityClassName | default "system-cluster-critical" }} {{- with .Values.affinity }} affinity: {{ toYaml . | nindent 8 }} {{- end }} @@ -39,8 +38,23 @@ spec: {{- end }} containers: - name: snapshot-controller - image: k8s.gcr.io/sig-storage/snapshot-controller:v3.0.3 + image: {{ printf "%s:%s" .Values.snapshotController.repository .Values.snapshotController.tag }} + env: +{{- if .Values.proxy.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: HTTPS_PROXY + value: {{ .Values.proxy.http_proxy | quote }} + - name: NO_PROXY + value: {{ .Values.proxy.no_proxy | quote }} +{{- end }} args: - --v=5 - --leader-election=false + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + {{- range .Values.imagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} {{- end }} diff --git a/charts/aws-ebs-csi-driver/templates/storageclass.yaml b/charts/aws-ebs-csi-driver/templates/storageclass.yaml index c69af7dc..3da90e3d 100644 --- a/charts/aws-ebs-csi-driver/templates/storageclass.yaml +++ b/charts/aws-ebs-csi-driver/templates/storageclass.yaml @@ -1,8 +1,15 @@ {{- range .Values.storageClasses }} +--- kind: StorageClass apiVersion: storage.k8s.io/v1 metadata: name: {{ .name }} + {{- if .annotations }} + annotations: {{- .annotations | toYaml | trim | nindent 4 }} + {{- end }} + {{- if .labels }} + labels: {{- .labels | toYaml | trim | nindent 4 }} + {{- end }} provisioner: ebs.csi.aws.com -{{ omit (dict "volumeBindingMode" "WaitForFirstConsumer" | merge .) "name" | toYaml }} +{{ omit (dict "volumeBindingMode" "WaitForFirstConsumer" | merge .) "name" "annotations" "labels" | toYaml }} {{- end }} diff --git a/charts/aws-ebs-csi-driver/values.yaml b/charts/aws-ebs-csi-driver/values.yaml index bb4a609b..062e1477 100644 --- a/charts/aws-ebs-csi-driver/values.yaml +++ b/charts/aws-ebs-csi-driver/values.yaml @@ -21,7 +21,7 @@ sidecars: tag: "v3.0.3" livenessProbeImage: repository: k8s.gcr.io/sig-storage/livenessprobe - tag: "v2.1.0" + tag: "v2.2.0" resizerImage: repository: k8s.gcr.io/sig-storage/csi-resizer tag: "v1.0.0" @@ -29,6 +29,14 @@ sidecars: repository: k8s.gcr.io/sig-storage/csi-node-driver-registrar tag: "v2.0.1" +snapshotController: + repository: k8s.gcr.io/sig-storage/snapshot-controller + tag: "v3.0.3" + +proxy: {} +# http_proxy: +# no_proxy: + imagePullSecrets: [] nameOverride: "" fullnameOverride: "" @@ -36,7 +44,7 @@ fullnameOverride: "" podAnnotations: {} # True if enable volume scheduling for dynamic volume provisioning -enableVolumeScheduling: false +enableVolumeScheduling: true # True if enable volume resizing enableVolumeResizing: false @@ -60,13 +68,26 @@ resources: # cpu: 100m # memory: 128Mi +priorityClassName: "" nodeSelector: {} - tolerateAllTaints: true tolerations: [] - affinity: {} +# TSCs without the label selector stanza +# +# Example: +# +# topologySpreadConstraints: +# - maxSkew: 1 +# topologyKey: topology.kubernetes.io/zone +# whenUnsatisfiable: ScheduleAnyway +# - maxSkew: 1 +# topologyKey: kubernetes.io/hostname +# whenUnsatisfiable: ScheduleAnyway + +topologySpreadConstraints: [] + # Extra volume tags to attach to each dynamically provisioned volume. # --- # extraVolumeTags: @@ -87,10 +108,12 @@ k8sTagClusterId: "" region: "" node: + priorityClassName: "" nodeSelector: {} podAnnotations: {} tolerateAllTaints: true tolerations: [] + resources: {} serviceAccount: controller: @@ -109,6 +132,12 @@ serviceAccount: storageClasses: [] # Add StorageClass resources like: # - name: ebs-sc +# # annotation metadata +# annotations: +# storageclass.kubernetes.io/is-default-class: "true" +# # label metadata +# labels: +# my-label-is: supercool # # defaults to WaitForFirstConsumer # volumeBindingMode: WaitForFirstConsumer # # defaults to Delete diff --git a/deploy/kubernetes/base/controller.yaml b/deploy/kubernetes/base/controller.yaml index 24ac7cad..ac911e04 100644 --- a/deploy/kubernetes/base/controller.yaml +++ b/deploy/kubernetes/base/controller.yaml @@ -25,10 +25,14 @@ spec: serviceAccountName: ebs-csi-controller-sa priorityClassName: system-cluster-critical tolerations: + - key: CriticalAddonsOnly + operator: Exists - operator: Exists + effect: NoExecute + tolerationSeconds: 300 containers: - name: ebs-plugin - image: k8s.gcr.io/provider-aws/aws-ebs-csi-driver:latest + image: k8s.gcr.io/provider-aws/aws-ebs-csi-driver:v0.9.0 imagePullPolicy: IfNotPresent args: # - {all,controller,node} # specify the driver mode @@ -65,8 +69,16 @@ spec: timeoutSeconds: 3 periodSeconds: 10 failureThreshold: 5 + readinessProbe: + httpGet: + path: /healthz + port: healthz + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + failureThreshold: 5 - name: csi-provisioner - image: quay.io/k8scsi/csi-provisioner:v2.0.2 + image: k8s.gcr.io/sig-storage/csi-provisioner:v2.0.2 args: - --csi-address=$(ADDRESS) - --v=5 @@ -80,7 +92,7 @@ spec: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ - name: csi-attacher - image: quay.io/k8scsi/csi-attacher:v3.0.0 + image: k8s.gcr.io/sig-storage/csi-attacher:v3.0.0 args: - --csi-address=$(ADDRESS) - --v=5 @@ -92,7 +104,7 @@ spec: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ - name: liveness-probe - image: quay.io/k8scsi/livenessprobe:v2.1.0 + image: k8s.gcr.io/sig-storage/livenessprobe:v2.2.0 args: - --csi-address=/csi/csi.sock volumeMounts: diff --git a/deploy/kubernetes/base/node.yaml b/deploy/kubernetes/base/node.yaml index 79c332d8..78399b16 100644 --- a/deploy/kubernetes/base/node.yaml +++ b/deploy/kubernetes/base/node.yaml @@ -34,12 +34,16 @@ spec: serviceAccountName: ebs-csi-node-sa priorityClassName: system-node-critical tolerations: + - key: CriticalAddonsOnly + operator: Exists - operator: Exists + effect: NoExecute + tolerationSeconds: 300 containers: - name: ebs-plugin securityContext: privileged: true - image: k8s.gcr.io/provider-aws/aws-ebs-csi-driver:latest + image: k8s.gcr.io/provider-aws/aws-ebs-csi-driver:v0.9.0 args: - node - --endpoint=$(CSI_ENDPOINT) @@ -69,15 +73,11 @@ spec: periodSeconds: 10 failureThreshold: 5 - name: node-driver-registrar - image: quay.io/k8scsi/csi-node-driver-registrar:v2.0.1 + image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.0.1 args: - --csi-address=$(ADDRESS) - --kubelet-registration-path=$(DRIVER_REG_SOCK_PATH) - --v=5 - lifecycle: - preStop: - exec: - command: ["/bin/sh", "-c", "rm -rf /registration/ebs.csi.aws.com-reg.sock /csi/csi.sock"] env: - name: ADDRESS value: /csi/csi.sock @@ -89,7 +89,7 @@ spec: - name: registration-dir mountPath: /registration - name: liveness-probe - image: quay.io/k8scsi/livenessprobe:v2.1.0 + image: k8s.gcr.io/sig-storage/livenessprobe:v2.2.0 args: - --csi-address=/csi/csi.sock volumeMounts: diff --git a/deploy/kubernetes/overlays/stable/ecr/kustomization.yaml b/deploy/kubernetes/overlays/stable/ecr/kustomization.yaml index 87763d30..6ce8adcb 100644 --- a/deploy/kubernetes/overlays/stable/ecr/kustomization.yaml +++ b/deploy/kubernetes/overlays/stable/ecr/kustomization.yaml @@ -5,16 +5,16 @@ bases: images: - name: k8s.gcr.io/provider-aws/aws-ebs-csi-driver newName: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-ebs-csi-driver - newTag: v0.7.1 + newTag: v0.9.0 - name: quay.io/k8scsi/csi-provisioner - newName: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/csi-provisioner - newTag: v1.5.0 + newName: public.ecr.aws/eks-distro/kubernetes-csi/external-provisioner + newTag: v2.0.3-eks-1-18-1 - name: quay.io/k8scsi/csi-attacher - newName: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/csi-attacher - newTag: v1.2.0 + newName: public.ecr.aws/eks-distro/kubernetes-csi/external-attacher + newTag: v3.0.1-eks-1-18-1 - name: quay.io/k8scsi/livenessprobe - newName: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/csi-liveness-probe - newTag: v1.1.0 + newName: public.ecr.aws/eks-distro/kubernetes-csi/livenessprobe + newTag: v2.1.0-eks-1-18-1 - name: quay.io/k8scsi/csi-node-driver-registrar - newName: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/csi-node-driver-registrar - newTag: v1.1.0 + newName: public.ecr.aws/eks-distro/kubernetes-csi/node-driver-registrar + newTag: v2.0.1-eks-1-18-1 diff --git a/deploy/kubernetes/overlays/stable/kustomization.yaml b/deploy/kubernetes/overlays/stable/kustomization.yaml index 3c90c258..8d159534 100644 --- a/deploy/kubernetes/overlays/stable/kustomization.yaml +++ b/deploy/kubernetes/overlays/stable/kustomization.yaml @@ -5,11 +5,11 @@ bases: images: - name: k8s.gcr.io/provider-aws/aws-ebs-csi-driver newTag: v0.9.0 - - name: quay.io/k8scsi/csi-provisioner + - name: k8s.gcr.io/sig-storage/csi-provisioner newTag: v2.0.2 - - name: quay.io/k8scsi/csi-attacher + - name: k8s.gcr.io/sig-storage/csi-attacher newTag: v3.0.0 - - name: quay.io/k8scsi/livenessprobe - newTag: v2.1.0 - - name: quay.io/k8scsi/csi-node-driver-registrar + - name: k8s.gcr.io/sig-storage/livenessprobe + newTag: v2.2.0 + - name: k8s.gcr.io/sig-storage/csi-node-driver-registrar newTag: v2.0.1 diff --git a/docs/README.md b/docs/README.md index e6f1cedc..80746f96 100644 --- a/docs/README.md +++ b/docs/README.md @@ -110,13 +110,15 @@ If your cluster is v1.14+, you can skip this step. Install the `CSINodeInfo` CRD ```sh kubectl create -f https://raw.githubusercontent.com/kubernetes/csi-api/release-1.13/pkg/crd/manifests/csinodeinfo.yaml ``` +#### Config node toleration settings +By default, driver can be deployed on any nodes, to have driver tolerate taint `CriticalAddonsOnly`, please set helm `Value.node.tolerateAllTaints` and `Value.tolerateAllTaints` to false before deployment #### Deploy driver Please see the compatibility matrix above before you deploy the driver If you want to deploy the stable driver without alpha features: ```sh -kubectl apply -k "github.com/kubernetes-sigs/aws-ebs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-0.8" +kubectl apply -k "github.com/kubernetes-sigs/aws-ebs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-0.9" ``` If you want to deploy the driver with alpha features: diff --git a/go.sum b/go.sum index 28abc539..e67c21f0 100644 --- a/go.sum +++ b/go.sum @@ -304,7 +304,6 @@ github.com/imdario/mergo v0.3.7/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU= github.com/jimstudt/http-authentication v0.0.0-20140401203705-3eca13d6893a/go.mod h1:wK6yTYYcgjHE1Z1QtXACPDjcFJyBskHEdagmnq3vsP8= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= @@ -332,7 +331,6 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -431,7 +429,6 @@ github.com/pelletier/go-toml v1.1.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/9 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -564,7 +561,6 @@ golang.org/x/crypto v0.0.0-20190424203555-c05e17bb3b2d/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190617133340-57b3e21c3d56/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586 h1:7KByu05hhLed2MO29w7p1XfZvZ13m8mub3shuVftRs0= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -640,7 +636,6 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220220014-0732a990476f h1:72l8qCJ1nGxMGH26QVBVIxKd/D34cfGt0OvrPtpemyY= golang.org/x/sys v0.0.0-20191220220014-0732a990476f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -649,7 +644,6 @@ golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fq golang.org/x/text v0.0.0-20170915090833-1cbadb444a80/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/hack/e2e/run.sh b/hack/e2e/run.sh index 12db9c22..595d5ef5 100755 --- a/hack/e2e/run.sh +++ b/hack/e2e/run.sh @@ -40,7 +40,7 @@ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) IMAGE_NAME=${IMAGE_NAME:-${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${DRIVER_NAME}} IMAGE_TAG=${IMAGE_TAG:-${TEST_ID}} -K8S_VERSION=${K8S_VERSION:-1.18.10} +K8S_VERSION=${K8S_VERSION:-1.18.16} KOPS_VERSION=${KOPS_VERSION:-1.18.2} KOPS_STATE_FILE=${KOPS_STATE_FILE:-s3://k8s-kops-csi-e2e} KOPS_FEATURE_GATES_FILE=${KOPS_FEATURE_GATES_FILE:-./hack/feature-gates.yaml} diff --git a/pkg/cloud/cloud.go b/pkg/cloud/cloud.go index be40d03c..29cd53e1 100644 --- a/pkg/cloud/cloud.go +++ b/pkg/cloud/cloud.go @@ -235,6 +235,8 @@ func newEC2Cloud(region string) (Cloud, error) { awsConfig := &aws.Config{ Region: aws.String(region), CredentialsChainVerboseErrors: aws.Bool(true), + // Set MaxRetries to a high value. It will be "ovewritten" if context deadline comes sooner. + MaxRetries: aws.Int(8), } endpoint := os.Getenv("AWS_EC2_ENDPOINT") @@ -341,6 +343,13 @@ func (c *cloud) CreateDisk(ctx context.Context, volumeName string, diskOptions * } if err := c.waitForVolume(ctx, volumeID); err != nil { + // To avoid leaking volume, we should delete the volume just created + // TODO: Need to figure out how to handle DeleteDisk failed scenario instead of just log the error + if _, error := c.DeleteDisk(ctx, volumeID); error != nil { + klog.Errorf("%v failed to be deleted, this may cause volume leak", volumeID) + } else { + klog.V(5).Infof("%v is deleted because it is not in desired state within retry limit", volumeID) + } return nil, fmt.Errorf("failed to get an available volume in EC2: %v", err) } diff --git a/pkg/cloud/cloud_test.go b/pkg/cloud/cloud_test.go index e13bbbdb..81d3b92d 100644 --- a/pkg/cloud/cloud_test.go +++ b/pkg/cloud/cloud_test.go @@ -41,14 +41,15 @@ const ( func TestCreateDisk(t *testing.T) { testCases := []struct { - name string - volumeName string - volState string - diskOptions *DiskOptions - expDisk *Disk - expErr error - expCreateVolumeErr error - expDescVolumeErr error + name string + volumeName string + volState string + diskOptions *DiskOptions + expDisk *Disk + cleanUpFailedVolume bool + expErr error + expCreateVolumeErr error + expDescVolumeErr error }{ { name: "success: normal", @@ -163,7 +164,7 @@ func TestCreateDisk(t *testing.T) { expErr: nil, }, { - name: "fail: CreateVolume returned CreateVolume error", + name: "fail: ec2.CreateVolume returned CreateVolume error", volumeName: "vol-test-name-error", diskOptions: &DiskOptions{ CapacityBytes: util.GiBToBytes(1), @@ -174,7 +175,7 @@ func TestCreateDisk(t *testing.T) { expCreateVolumeErr: fmt.Errorf("CreateVolume generic error"), }, { - name: "fail: CreateVolume returned a DescribeVolumes error", + name: "fail: ec2.DescribeVolumes error after volume created", volumeName: "vol-test-name-error", volState: "creating", diskOptions: &DiskOptions{ @@ -182,11 +183,12 @@ func TestCreateDisk(t *testing.T) { Tags: map[string]string{VolumeNameTagKey: "vol-test"}, AvailabilityZone: "", }, - expErr: fmt.Errorf("could not create volume in EC2: DescribeVolumes generic error"), - expCreateVolumeErr: fmt.Errorf("DescribeVolumes generic error"), + expErr: fmt.Errorf("failed to get an available volume in EC2: DescribeVolumes generic error"), + expDescVolumeErr: fmt.Errorf("DescribeVolumes generic error"), + cleanUpFailedVolume: true, }, { - name: "fail: CreateVolume returned a volume with wrong state", + name: "fail: Volume is not ready to use, volume stuck in creating status and controller timed out waiting for the condition", volumeName: "vol-test-name-error", volState: "creating", diskOptions: &DiskOptions{ @@ -194,7 +196,8 @@ func TestCreateDisk(t *testing.T) { Tags: map[string]string{VolumeNameTagKey: "vol-test"}, AvailabilityZone: "", }, - expErr: fmt.Errorf("failed to get an available volume in EC2: timed out waiting for the condition"), + cleanUpFailedVolume: true, + expErr: fmt.Errorf("failed to get an available volume in EC2: timed out waiting for the condition"), }, { name: "success: normal from snapshot", @@ -243,7 +246,9 @@ func TestCreateDisk(t *testing.T) { if len(tc.diskOptions.SnapshotID) > 0 { mockEC2.EXPECT().DescribeSnapshotsWithContext(gomock.Eq(ctx), gomock.Any()).Return(&ec2.DescribeSnapshotsOutput{Snapshots: []*ec2.Snapshot{snapshot}}, nil).AnyTimes() } - + if tc.cleanUpFailedVolume == true { + mockEC2.EXPECT().DeleteVolumeWithContext(gomock.Eq(ctx), gomock.Any()).Return(&ec2.DeleteVolumeOutput{}, nil) + } if len(tc.diskOptions.AvailabilityZone) == 0 { mockEC2.EXPECT().DescribeAvailabilityZonesWithContext(gomock.Eq(ctx), gomock.Any()).Return(&ec2.DescribeAvailabilityZonesOutput{ AvailabilityZones: []*ec2.AvailabilityZone{ diff --git a/pkg/driver/controller.go b/pkg/driver/controller.go index f0aff469..ea923dce 100644 --- a/pkg/driver/controller.go +++ b/pkg/driver/controller.go @@ -27,6 +27,7 @@ import ( csi "github.com/container-storage-interface/spec/lib/go/csi" "github.com/golang/protobuf/ptypes" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/cloud" + "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver/internal" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/util" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -56,6 +57,7 @@ var ( // controllerService represents the controller service of CSI driver type controllerService struct { cloud cloud.Cloud + inFlight *internal.InFlight driverOptions *DriverOptions } @@ -87,6 +89,7 @@ func newControllerService(driverOptions *DriverOptions) controllerService { return controllerService{ cloud: cloud, + inFlight: internal.NewInFlight(), driverOptions: driverOptions, } } @@ -180,6 +183,12 @@ func (d *controllerService) CreateVolume(ctx context.Context, req *csi.CreateVol } } + if volumeType == cloud.VolumeTypeIO1 { + if iopsPerGB == 0 { + return nil, status.Errorf(codes.InvalidArgument, "The parameter IOPSPerGB must be specified for io1 volumes") + } + } + snapshotID := "" volumeSource := req.GetVolumeContentSource() if volumeSource != nil { @@ -201,6 +210,13 @@ func (d *controllerService) CreateVolume(ctx context.Context, req *csi.CreateVol return newCreateVolumeResponse(disk), nil } + // check if a request is already in-flight because the CreateVolume API is not idempotent + if ok := d.inFlight.Insert(req.String()); !ok { + msg := fmt.Sprintf("Create volume request for %s is already in progress", volName) + return nil, status.Error(codes.Aborted, msg) + } + defer d.inFlight.Delete(req.String()) + // create a new volume zone := pickAvailabilityZone(req.GetAccessibilityRequirements()) outpostArn := getOutpostArn(req.GetAccessibilityRequirements()) diff --git a/pkg/driver/controller_test.go b/pkg/driver/controller_test.go index aa16c7cd..b4b1d7ec 100644 --- a/pkg/driver/controller_test.go +++ b/pkg/driver/controller_test.go @@ -31,6 +31,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "github.com/golang/mock/gomock" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/cloud" + "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver/internal" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver/mocks" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/util" "google.golang.org/grpc/codes" @@ -198,6 +199,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -268,6 +270,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -335,6 +338,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -390,6 +394,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -445,6 +450,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -472,6 +478,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -526,6 +533,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -612,6 +620,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -675,6 +684,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -736,6 +746,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -792,6 +803,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -834,6 +846,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -876,6 +889,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -917,6 +931,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -958,6 +973,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -999,6 +1015,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1041,6 +1058,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1062,6 +1080,7 @@ func TestCreateVolume(t *testing.T) { VolumeCapabilities: stdVolCap, Parameters: map[string]string{ VolumeTypeKey: cloud.VolumeTypeIO1, + IopsPerGBKey: "5", "unknownKey": "unknownValue", }, } @@ -1076,6 +1095,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1116,6 +1136,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1156,6 +1177,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1230,6 +1252,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1309,7 +1332,8 @@ func TestCreateVolume(t *testing.T) { mockCloud.EXPECT().CreateDisk(gomock.Eq(ctx), gomock.Eq(req.Name), gomock.Eq(diskOptions)).Return(mockDisk, nil) awsDriver := controllerService{ - cloud: mockCloud, + cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{ extraTags: map[string]string{ extraVolumeTagKey: extraVolumeTagValue, @@ -1370,7 +1394,8 @@ func TestCreateVolume(t *testing.T) { mockCloud.EXPECT().CreateDisk(gomock.Eq(ctx), gomock.Eq(req.Name), gomock.Eq(diskOptions)).Return(mockDisk, nil) awsDriver := controllerService{ - cloud: mockCloud, + cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{ kubernetesClusterID: clusterID, }, @@ -1437,6 +1462,7 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1459,6 +1485,7 @@ func TestCreateVolume(t *testing.T) { VolumeCapabilities: invalidVolCap, Parameters: map[string]string{ VolumeTypeKey: cloud.VolumeTypeIO1, + IopsPerGBKey: "5", "unknownKey": "unknownValue", }, } @@ -1472,6 +1499,89 @@ func TestCreateVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), + driverOptions: &DriverOptions{}, + } + + _, err := awsDriver.CreateVolume(ctx, req) + if err == nil { + t.Fatalf("Expected CreateVolume to fail but got no error") + } + + srvErr, ok := status.FromError(err) + if !ok { + t.Fatalf("Could not get error status code from error: %v", srvErr) + } + if srvErr.Code() != codes.InvalidArgument { + t.Fatalf("Expect InvalidArgument but got: %s", srvErr.Code()) + } + }, + }, + { + name: "fail with in-flight request", + testFunc: func(t *testing.T) { + req := &csi.CreateVolumeRequest{ + Name: "random-vol-name", + CapacityRange: stdCapRange, + VolumeCapabilities: stdVolCap, + Parameters: nil, + } + + ctx := context.Background() + + mockCtl := gomock.NewController(t) + defer mockCtl.Finish() + + mockCloud := mocks.NewMockCloud(mockCtl) + mockCloud.EXPECT().GetDiskByName(gomock.Eq(ctx), gomock.Eq(req.Name), gomock.Eq(stdVolSize)).Return(nil, cloud.ErrNotFound) + + inFlight := internal.NewInFlight() + inFlight.Insert(req.String()) + defer inFlight.Delete(req.String()) + + awsDriver := controllerService{ + cloud: mockCloud, + inFlight: inFlight, + driverOptions: &DriverOptions{}, + } + + _, err := awsDriver.CreateVolume(ctx, req) + if err == nil { + t.Fatalf("Expected CreateVolume to fail but got no error") + } + + srvErr, ok := status.FromError(err) + if !ok { + t.Fatalf("Could not get error status code from error: %v", srvErr) + } + if srvErr.Code() != codes.Aborted { + t.Fatalf("Expected Aborted but got: %s", srvErr.Code()) + } + }, + }, + { + name: "fail with missing iopsPerGB parameter", + testFunc: func(t *testing.T) { + req := &csi.CreateVolumeRequest{ + Name: "vol-test", + CapacityRange: stdCapRange, + VolumeCapabilities: stdVolCap, + Parameters: map[string]string{ + VolumeTypeKey: cloud.VolumeTypeIO1, + }, + } + + ctx := context.Background() + + mockCtl := gomock.NewController(t) + defer mockCtl.Finish() + + mockCloud := mocks.NewMockCloud(mockCtl) + mockCloud.EXPECT().GetDiskByName(gomock.Eq(ctx), gomock.Eq(req.Name), gomock.Eq(stdVolSize)).Return(nil, cloud.ErrNotFound) + + awsDriver := controllerService{ + cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -1517,6 +1627,7 @@ func TestDeleteVolume(t *testing.T) { mockCloud.EXPECT().DeleteDisk(gomock.Eq(ctx), gomock.Eq(req.VolumeId)).Return(true, nil) awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } resp, err := awsDriver.DeleteVolume(ctx, req) @@ -1548,6 +1659,7 @@ func TestDeleteVolume(t *testing.T) { mockCloud.EXPECT().DeleteDisk(gomock.Eq(ctx), gomock.Eq(req.VolumeId)).Return(false, cloud.ErrNotFound) awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } resp, err := awsDriver.DeleteVolume(ctx, req) @@ -1578,6 +1690,7 @@ func TestDeleteVolume(t *testing.T) { mockCloud.EXPECT().DeleteDisk(gomock.Eq(ctx), gomock.Eq(req.VolumeId)).Return(false, fmt.Errorf("DeleteDisk could not delete volume")) awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } resp, err := awsDriver.DeleteVolume(ctx, req) @@ -1837,6 +1950,7 @@ func TestCreateSnapshot(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } resp, err := awsDriver.CreateSnapshot(context.Background(), req) @@ -1891,7 +2005,8 @@ func TestCreateSnapshot(t *testing.T) { mockCloud.EXPECT().GetSnapshotByName(gomock.Eq(ctx), gomock.Eq(req.GetName())).Return(nil, cloud.ErrNotFound) awsDriver := controllerService{ - cloud: mockCloud, + cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{ kubernetesClusterID: clusterID, }, @@ -1944,7 +2059,8 @@ func TestCreateSnapshot(t *testing.T) { mockCloud.EXPECT().GetSnapshotByName(gomock.Eq(ctx), gomock.Eq(req.GetName())).Return(nil, cloud.ErrNotFound) awsDriver := controllerService{ - cloud: mockCloud, + cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{ extraTags: map[string]string{ extraVolumeTagKey: extraVolumeTagValue, @@ -1976,6 +2092,7 @@ func TestCreateSnapshot(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } if _, err := awsDriver.CreateSnapshot(context.Background(), req); err != nil { @@ -2024,6 +2141,7 @@ func TestCreateSnapshot(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } resp, err := awsDriver.CreateSnapshot(context.Background(), req) @@ -2090,6 +2208,7 @@ func TestCreateSnapshot(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } resp, err := awsDriver.CreateSnapshot(context.Background(), req) @@ -2131,6 +2250,7 @@ func TestDeleteSnapshot(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2155,6 +2275,7 @@ func TestDeleteSnapshot(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2211,6 +2332,7 @@ func TestListSnapshots(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2237,6 +2359,7 @@ func TestListSnapshots(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2272,6 +2395,7 @@ func TestListSnapshots(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2301,6 +2425,7 @@ func TestListSnapshots(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2330,6 +2455,7 @@ func TestListSnapshots(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2361,6 +2487,7 @@ func TestListSnapshots(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2423,6 +2550,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2453,7 +2581,11 @@ func TestControllerPublishVolume(t *testing.T) { mockCloud := mocks.NewMockCloud(mockCtl) mockCloud.EXPECT().DetachDisk(gomock.Eq(ctx), req.VolumeId, req.NodeId).Return(cloud.ErrNotFound) - awsDriver := controllerService{cloud: mockCloud} + awsDriver := controllerService{ + cloud: mockCloud, + inFlight: internal.NewInFlight(), + driverOptions: &DriverOptions{}, + } resp, err := awsDriver.ControllerUnpublishVolume(ctx, req) if err != nil { t.Fatalf("Unexpected error: %v", err) @@ -2478,6 +2610,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2510,6 +2643,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2543,6 +2677,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2581,6 +2716,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2616,6 +2752,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2652,6 +2789,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2693,6 +2831,7 @@ func TestControllerPublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2743,6 +2882,7 @@ func TestControllerUnpublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2770,6 +2910,7 @@ func TestControllerUnpublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2802,6 +2943,7 @@ func TestControllerUnpublishVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } @@ -2881,6 +3023,7 @@ func TestControllerExpandVolume(t *testing.T) { awsDriver := controllerService{ cloud: mockCloud, + inFlight: internal.NewInFlight(), driverOptions: &DriverOptions{}, } diff --git a/pkg/driver/internal/inflight.go b/pkg/driver/internal/inflight.go index f987918e..5f0d2a9a 100644 --- a/pkg/driver/internal/inflight.go +++ b/pkg/driver/internal/inflight.go @@ -17,6 +17,7 @@ limitations under the License. package internal import ( + "k8s.io/klog" "sync" ) @@ -29,7 +30,7 @@ type Idempotent interface { String() string } -// InFlight is a struct used to manage in flight requests. +// InFlight is a struct used to manage in flight requests per volumeId. type InFlight struct { mux *sync.Mutex inFlight map[string]bool @@ -43,28 +44,27 @@ func NewInFlight() *InFlight { } } -// Insert inserts the entry to the current list of inflight requests. +// Insert inserts the entry to the current list of inflight request key is volumeId for node and req hash for controller . // Returns false when the key already exists. -func (db *InFlight) Insert(entry Idempotent) bool { +func (db *InFlight) Insert(key string) bool { db.mux.Lock() defer db.mux.Unlock() - hash := entry.String() - - _, ok := db.inFlight[hash] + _, ok := db.inFlight[key] if ok { return false } - db.inFlight[hash] = true + db.inFlight[key] = true return true } // Delete removes the entry from the inFlight entries map. // It doesn't return anything, and will do nothing if the specified key doesn't exist. -func (db *InFlight) Delete(h Idempotent) { +func (db *InFlight) Delete(key string) { db.mux.Lock() defer db.mux.Unlock() - delete(db.inFlight, h.String()) + delete(db.inFlight, key) + klog.V(4).Infof("Node Service: volume=%q operation finished", key) } diff --git a/pkg/driver/internal/inflight_test.go b/pkg/driver/internal/inflight_test.go index b0d5c824..faaeb74c 100644 --- a/pkg/driver/internal/inflight_test.go +++ b/pkg/driver/internal/inflight_test.go @@ -18,37 +18,14 @@ package internal import ( "testing" - - "github.com/container-storage-interface/spec/lib/go/csi" - "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/util" ) type testRequest struct { - request *csi.CreateVolumeRequest - expResp bool - delete bool + volumeId string + expResp bool + delete bool } -var stdVolCap = []*csi.VolumeCapability{ - { - AccessType: &csi.VolumeCapability_Mount{ - Mount: &csi.VolumeCapability_MountVolume{}, - }, - AccessMode: &csi.VolumeCapability_AccessMode{ - Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, - }, - }, -} - -var ( - stdVolSize = int64(5 * util.GiB) - stdCapRange = &csi.CapacityRange{RequiredBytes: stdVolSize} - stdParams = map[string]string{ - "key1": "value1", - "key2": "value2", - } -) - func TestInFlight(t *testing.T) { testCases := []struct { name string @@ -58,137 +35,54 @@ func TestInFlight(t *testing.T) { name: "success normal", requests: []testRequest{ { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: true, - }, - }, - }, - { - name: "success adding request with different name", - requests: []testRequest{ - { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-foobar", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: true, - }, - { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name-foobar", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: true, - }, - }, - }, - { - name: "success adding request with different parameters", - requests: []testRequest{ - { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name-foobar", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: map[string]string{"foo": "bar"}, - }, - expResp: true, - }, - { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name-foobar", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - }, - expResp: true, + + volumeId: "random-vol-name", + expResp: true, }, }, }, { - name: "success adding request with different parameters", + name: "success adding request with different volumeId", requests: []testRequest{ { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name-foobar", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: map[string]string{"foo": "bar"}, - }, - expResp: true, + volumeId: "random-vol-foobar", + expResp: true, }, { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name-foobar", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: map[string]string{"foo": "baz"}, - }, - expResp: true, + volumeId: "random-vol-name-foobar", + expResp: true, }, }, }, { - name: "failure adding copy of request", + name: "failed adding request with same volumeId", requests: []testRequest{ { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: true, + volumeId: "random-vol-name-foobar", + expResp: true, }, { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: false, + volumeId: "random-vol-name-foobar", + expResp: false, }, }, }, + { name: "success add, delete, add copy", requests: []testRequest{ { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: true, + volumeId: "random-vol-name", + expResp: true, }, { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: false, - delete: true, + volumeId: "random-vol-name", + expResp: false, + delete: true, }, { - request: &csi.CreateVolumeRequest{ - Name: "random-vol-name", - CapacityRange: stdCapRange, - VolumeCapabilities: stdVolCap, - Parameters: stdParams, - }, - expResp: true, + volumeId: "random-vol-name", + expResp: true, }, }, }, @@ -200,9 +94,9 @@ func TestInFlight(t *testing.T) { for _, r := range tc.requests { var resp bool if r.delete { - db.Delete(r.request) + db.Delete(r.volumeId) } else { - resp = db.Insert(r.request) + resp = db.Insert(r.volumeId) } if r.expResp != resp { t.Fatalf("expected insert to be %+v, got %+v", r.expResp, resp) diff --git a/pkg/driver/node.go b/pkg/driver/node.go index 96f68f3c..2c59f099 100644 --- a/pkg/driver/node.go +++ b/pkg/driver/node.go @@ -57,6 +57,9 @@ const ( // defaultMaxEBSNitroVolumes is the limit of volumes for some smaller instances, like c5 and m5. defaultMaxEBSNitroVolumes = 25 + + // VolumeOperationAlreadyExists is message fmt returned to CO when there is another in-flight call on the given volumeID + VolumeOperationAlreadyExists = "An operation with the given volume=%q is already in progress" ) var ( @@ -141,14 +144,12 @@ func (d *nodeService) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol } } - if ok := d.inFlight.Insert(req); !ok { - msg := fmt.Sprintf("request to stage volume=%q is already in progress", volumeID) - return nil, status.Error(codes.Internal, msg) + if ok := d.inFlight.Insert(volumeID); !ok { + return nil, status.Errorf(codes.Aborted, VolumeOperationAlreadyExists, volumeID) } defer func() { - klog.V(4).Infof("NodeStageVolume: volume=%q operation finished", req.GetVolumeId()) - d.inFlight.Delete(req) - klog.V(4).Info("donedone") + klog.V(4).Infof("NodeStageVolume: volume=%q operation finished", volumeID) + d.inFlight.Delete(volumeID) }() devicePath, ok := req.PublishContext[DevicePathKey] @@ -218,6 +219,14 @@ func (d *nodeService) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag return nil, status.Error(codes.InvalidArgument, "Staging target not provided") } + if ok := d.inFlight.Insert(volumeID); !ok { + return nil, status.Errorf(codes.Aborted, VolumeOperationAlreadyExists, volumeID) + } + defer func() { + klog.V(4).Infof("NodeUnStageVolume: volume=%q operation finished", volumeID) + d.inFlight.Delete(volumeID) + }() + // Check if target directory is a mount point. GetDeviceNameFromMount // given a mnt point, finds the device from /proc/mounts // returns the device name, reference count, and error code @@ -344,6 +353,14 @@ func (d *nodeService) NodePublishVolume(ctx context.Context, req *csi.NodePublis return nil, status.Error(codes.InvalidArgument, "Volume capability not supported") } + if ok := d.inFlight.Insert(volumeID); !ok { + return nil, status.Errorf(codes.Aborted, VolumeOperationAlreadyExists, volumeID) + } + defer func() { + klog.V(4).Infof("NodePublishVolume: volume=%q operation finished", volumeID) + d.inFlight.Delete(volumeID) + }() + mountOptions := []string{"bind"} if req.GetReadonly() { mountOptions = append(mountOptions, "ro") @@ -374,6 +391,13 @@ func (d *nodeService) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu if len(target) == 0 { return nil, status.Error(codes.InvalidArgument, "Target path not provided") } + if ok := d.inFlight.Insert(volumeID); !ok { + return nil, status.Errorf(codes.Aborted, VolumeOperationAlreadyExists, volumeID) + } + defer func() { + klog.V(4).Infof("NodeUnPublishVolume: volume=%q operation finished", volumeID) + d.inFlight.Delete(volumeID) + }() klog.V(5).Infof("NodeUnpublishVolume: unmounting %s", target) err := d.mounter.Unmount(target) diff --git a/pkg/driver/node_test.go b/pkg/driver/node_test.go index 3a2572db..d864f344 100644 --- a/pkg/driver/node_test.go +++ b/pkg/driver/node_test.go @@ -477,6 +477,39 @@ func TestNodeStageVolume(t *testing.T) { } }, }, + { + name: "fail with in-flight request", + testFunc: func(t *testing.T) { + mockCtl := gomock.NewController(t) + defer mockCtl.Finish() + + mockMetadata := mocks.NewMockMetadataService(mockCtl) + mockMounter := mocks.NewMockMounter(mockCtl) + + req := &csi.NodeStageVolumeRequest{ + PublishContext: map[string]string{DevicePathKey: devicePath}, + StagingTargetPath: targetPath, + VolumeCapability: stdVolCap, + VolumeId: "vol-test", + } + + inFlight := internal.NewInFlight() + inFlight.Insert(req.VolumeId) + defer inFlight.Delete(req.VolumeId) + + awsDriver := &nodeService{ + metadata: mockMetadata, + mounter: mockMounter, + inFlight: inFlight, + } + + _, err := awsDriver.NodeStageVolume(context.TODO(), req) + if err == nil { + t.Fatalf("Expect error but got no error") + } + expectErr(t, err, codes.Aborted) + }, + }, } for _, tc := range testCases { @@ -648,6 +681,31 @@ func TestNodeUnstageVolume(t *testing.T) { expectErr(t, err, codes.Internal) }, }, + { + name: "fail another operation in-flight on given volumeId", + testFunc: func(t *testing.T) { + mockCtl := gomock.NewController(t) + defer mockCtl.Finish() + + mockMetadata := mocks.NewMockMetadataService(mockCtl) + mockMounter := mocks.NewMockMounter(mockCtl) + + awsDriver := &nodeService{ + metadata: mockMetadata, + mounter: mockMounter, + inFlight: internal.NewInFlight(), + } + + req := &csi.NodeUnstageVolumeRequest{ + StagingTargetPath: targetPath, + VolumeId: "vol-test", + } + + awsDriver.inFlight.Insert("vol-test") + _, err := awsDriver.NodeUnstageVolume(context.TODO(), req) + expectErr(t, err, codes.Aborted) + }, + }, } for _, tc := range testCases { @@ -1076,6 +1134,42 @@ func TestNodePublishVolume(t *testing.T) { }, }, + { + name: "fail another operation in-flight on given volumeId", + testFunc: func(t *testing.T) { + mockCtl := gomock.NewController(t) + defer mockCtl.Finish() + + mockMetadata := mocks.NewMockMetadataService(mockCtl) + mockMounter := mocks.NewMockMounter(mockCtl) + + awsDriver := &nodeService{ + metadata: mockMetadata, + mounter: mockMounter, + inFlight: internal.NewInFlight(), + } + + req := &csi.NodePublishVolumeRequest{ + PublishContext: map[string]string{DevicePathKey: "/dev/fake"}, + StagingTargetPath: "/test/staging/path", + TargetPath: "/test/target/path", + VolumeId: "vol-test", + VolumeCapability: &csi.VolumeCapability{ + AccessType: &csi.VolumeCapability_Block{ + Block: &csi.VolumeCapability_BlockVolume{}, + }, + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + }, + } + awsDriver.inFlight.Insert("vol-test") + + _, err := awsDriver.NodePublishVolume(context.TODO(), req) + expectErr(t, err, codes.Aborted) + + }, + }, } for _, tc := range testCases { @@ -1274,6 +1368,31 @@ func TestNodeUnpublishVolume(t *testing.T) { expectErr(t, err, codes.Internal) }, }, + { + name: "fail another operation in-flight on given volumeId", + testFunc: func(t *testing.T) { + mockCtl := gomock.NewController(t) + defer mockCtl.Finish() + + mockMetadata := mocks.NewMockMetadataService(mockCtl) + mockMounter := mocks.NewMockMounter(mockCtl) + + awsDriver := &nodeService{ + metadata: mockMetadata, + mounter: mockMounter, + inFlight: internal.NewInFlight(), + } + + req := &csi.NodeUnpublishVolumeRequest{ + TargetPath: targetPath, + VolumeId: "vol-test", + } + + awsDriver.inFlight.Insert("vol-test") + _, err := awsDriver.NodeUnpublishVolume(context.TODO(), req) + expectErr(t, err, codes.Aborted) + }, + }, } for _, tc := range testCases { diff --git a/pkg/driver/sanity_test.go b/pkg/driver/sanity_test.go index e2bf9504..203608d0 100644 --- a/pkg/driver/sanity_test.go +++ b/pkg/driver/sanity_test.go @@ -47,6 +47,7 @@ func TestSanity(t *testing.T) { options: driverOptions, controllerService: controllerService{ cloud: newFakeCloudProvider(), + inFlight: internal.NewInFlight(), driverOptions: driverOptions, }, nodeService: nodeService{