From 53688fed8aae70d1047f5851166535292d835c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20=22WanzenBug=22=20Wanzenb=C3=B6ck?= Date: Wed, 29 Jul 2020 11:15:44 +0200 Subject: [PATCH] support multiple replicas for linstor-controller Running multiple replicas requires special support from the linstor controller. The controller container will start a leader election process when it detects the presence of the K8S_AWAIT_ELECTION_* variables. The election process determines which pod is allowed to start the linstor-controller process. Only this pod will be added as endpoint for the controller service. Should the leader crash or the node its running on goes offline, a new leader will be elected and allowed to start the controller process. Note: in case the full node goes offline, the old pod will still be marked as ready. By using ClusterIP: "" on our service, we ensure we create an actual proxy (which automatically chooses the responding pod) instead of each client having to try multiple DNS responses. --- CHANGELOG.md | 12 ++ README.md | 18 ++ UPGRADE.md | 11 +- ...eus.linbit.com_linstorcontrollers_crd.yaml | 9 + charts/piraeus/templates/controller-rbac.yml | 26 +++ .../templates/operator-controller.yaml | 2 + charts/piraeus/values.cn.yaml | 7 +- charts/piraeus/values.yaml | 7 +- deploy/piraeus/templates/controller-rbac.yml | 28 +++ .../templates/operator-controller.yaml | 4 +- .../templates/operator-satelliteset.yaml | 4 +- doc/helm-values.adoc | 11 +- go.mod | 6 +- go.sum | 6 + .../piraeus/v1/linstorcontroller_types.go | 9 + pkg/apis/piraeus/v1/zz_generated.deepcopy.go | 5 + .../linstorcontroller_controller.go | 163 +++++++++++------- 17 files changed, 251 insertions(+), 77 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b08688f..e38aa6d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Breaking + +* The LINSTOR controller image given in `operator.controller.controllerImage` has to have + its entrypoint set to [`k8s-await-election v0.2.0`](https://github.com/LINBIT/k8s-await-election/) + or newer. Learn more in the [upgrade guide](./UPGRADE.md#upgrade-from-v10-to-head). + ### Added +* LINSTOR controller can be started with multiple replicas. See [`operator.controller.replicas`](./doc/helm-values.adoc#operatorcontrollerreplicas). + NOTE: This requires support from the container. You need `piraeus-server:v1.8.0` or newer. * The `pv-hostpath` helper chart automatically sets up permissions for non-root etcd containers. * Disable securityContext enforcement by setting `global.setSecurityContext=false`. * Add cluster roles to work with OpenShift's SCC system. @@ -20,6 +28,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +* Default values: + * `operator.controller.controllerImage`: `quay.io/piraeusdatastore/piraeus-server:v1.9.0` + * `operator.satelliteSet.satelliteImage`: `quay.io/piraeusdatastore/piraeus-server:v1.9.0` + * `operator.satelliteSet.kernelModuleInjectionImage`: `quay.io/piraeusdatastore/drbd9-bionic:v9.0.25` * linstor-controller no longer starts in a privileged container. ### Removed diff --git a/README.md b/README.md index c28b0bec..b4c0f172 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,24 @@ this [example chart configuration.](./examples/resource-requirements.yaml) [resource requests and limits]: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ +### Running multiple replicas + +Running multiple replicas of pods is recommended for high availability and fast error recovery. +The following components can be started with multiple replicas: + +* Operator: Set [`operator.replicas`] to the desired number of operator pods. +* CSI: Set [`csi.controllerReplicas`] to the desired number of CSI Controller pods. +* Linstor Controller: Set [`operator.controller.replicas`] to the desired number of LINSTOR controller pods. +* CSI Snapshotter: Set [`csi-snapshotter.replicas`] to the desired number of CSI Snapshot Controller pods. +* Etcd: Set [`etcd.replicas`] to the desired number of Etcd pods. +* Stork: Set [`stork.replicas`] to the desired number of both Stork plugin and Stork scheduler pods. + +[`operator.replicas`]: ./doc/helm-values.adoc#operatorreplicas +[`csi.controllerReplicas`]: ./doc/helm-values.adoc#csicontrollerreplicas +[`operator.controller.replicas`]: ./doc/helm-values.adoc#operatorcontrollerreplicas +[`csi-snapshotter.replicas`]: ./doc/helm-values.adoc#csi-snapshotterreplicas +[`etcd.replicas`]: ./doc/helm-values.adoc#etcdreplicas +[`stork.replicas`]: ./doc/helm-values.adoc#storkreplicas ### Terminating Helm deployment diff --git a/UPGRADE.md b/UPGRADE.md index e285bb65..dda07870 100644 --- a/UPGRADE.md +++ b/UPGRADE.md @@ -1,4 +1,13 @@ -The following document describes how to convert +# Upgrade from v1.0 to HEAD + +* The LINSTOR controller image given in `operator.controller.controllerImage` has to have + its entrypoint set to [`k8s-await-election v0.2.0`](https://github.com/LINBIT/k8s-await-election/) + or newer. All images starting with `piraeus-server:v1.8.0` meet this requirement. + + Older images will not work, as the `Service` will not automatically pick up on the active pod. + + To upgrade, first update the deployed LINSTOR image to a compatible version, then upgrade the + operator. # Upgrade to v1.0 diff --git a/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml b/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml index 14c1ee04..2229f1c8 100644 --- a/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml +++ b/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml @@ -657,6 +657,11 @@ spec: description: priorityClassName is the name of the PriorityClass for the controller pods type: string + replicas: + description: Number of replicas in the controller deployment + format: int32 + nullable: true + type: integer resources: description: Resource requirements for the LINSTOR controller pod nullable: true @@ -676,6 +681,10 @@ spec: value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' type: object type: object + serviceAccountName: + description: Name of the service account that runs leader elections + for linstor + type: string sslSecret: description: Name of k8s secret that holds the SSL key for a node (called `keystore.jks`) and the trusted certificates (called `certificates.jks`) diff --git a/charts/piraeus/templates/controller-rbac.yml b/charts/piraeus/templates/controller-rbac.yml index a66a7b72..57130cbe 100644 --- a/charts/piraeus/templates/controller-rbac.yml +++ b/charts/piraeus/templates/controller-rbac.yml @@ -6,3 +6,29 @@ kind: ServiceAccount metadata: name: linstor-controller namespace: {{ .Release.Namespace }} +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linstor-leader-elector + namespace: {{ .Release.Namespace }} +rules: + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "watch", "list", "delete", "update", "create"] + - apiGroups: [""] + resources: ["endpoints"] + verbs: ["create", "patch", "update"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linstor-leader-elector + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: linstor-leader-elector +subjects: + - kind: ServiceAccount + name: linstor-controller diff --git a/charts/piraeus/templates/operator-controller.yaml b/charts/piraeus/templates/operator-controller.yaml index 33e80030..cbe8d72c 100644 --- a/charts/piraeus/templates/operator-controller.yaml +++ b/charts/piraeus/templates/operator-controller.yaml @@ -17,3 +17,5 @@ spec: affinity: {{ .Values.operator.controller.affinity | toJson }} tolerations: {{ .Values.operator.controller.tolerations | toJson}} resources: {{ .Values.operator.controller.resources | toJson }} + replicas: {{ .Values.operator.controller.replicas }} + serviceAccountName: linstor-controller diff --git a/charts/piraeus/values.cn.yaml b/charts/piraeus/values.cn.yaml index 1158e8cc..276fb038 100644 --- a/charts/piraeus/values.cn.yaml +++ b/charts/piraeus/values.cn.yaml @@ -51,7 +51,7 @@ operator: image: daocloud.io/piraeus/piraeus-operator:latest resources: {} controller: - controllerImage: daocloud.io/piraeus/piraeus-server:v1.7.1 + controllerImage: daocloud.io/piraeus/piraeus-server:v1.9.0 luksSecret: "" dbCertSecret: "" dbUseClientCert: false @@ -59,14 +59,15 @@ operator: affinity: {} tolerations: [] resources: {} + replicas: 1 satelliteSet: - satelliteImage: daocloud.io/piraeus/piraeus-server:v1.7.1 + satelliteImage: daocloud.io/piraeus/piraeus-server:v1.9.0 storagePools: null sslSecret: "" automaticStorageType: None affinity: {} tolerations: [] resources: {} - kernelModuleInjectionImage: daocloud.io/piraeus/drbd9-bionic:v9.0.24 + kernelModuleInjectionImage: daocloud.io/piraeus/drbd9-bionic:v9.0.25 kernelModuleInjectionMode: Compile kernelModuleInjectionResources: {} diff --git a/charts/piraeus/values.yaml b/charts/piraeus/values.yaml index ad457473..249add74 100644 --- a/charts/piraeus/values.yaml +++ b/charts/piraeus/values.yaml @@ -51,7 +51,7 @@ operator: image: quay.io/piraeusdatastore/piraeus-operator:latest resources: {} controller: - controllerImage: quay.io/piraeusdatastore/piraeus-server:v1.7.1 + controllerImage: quay.io/piraeusdatastore/piraeus-server:v1.9.0 luksSecret: "" dbCertSecret: "" dbUseClientCert: false @@ -59,14 +59,15 @@ operator: affinity: {} tolerations: [] resources: {} + replicas: 1 satelliteSet: - satelliteImage: quay.io/piraeusdatastore/piraeus-server:v1.7.1 + satelliteImage: quay.io/piraeusdatastore/piraeus-server:v1.9.0 storagePools: null sslSecret: "" automaticStorageType: None affinity: {} tolerations: [] resources: {} - kernelModuleInjectionImage: quay.io/piraeusdatastore/drbd9-bionic:v9.0.24 + kernelModuleInjectionImage: quay.io/piraeusdatastore/drbd9-bionic:v9.0.25 kernelModuleInjectionMode: Compile kernelModuleInjectionResources: {} diff --git a/deploy/piraeus/templates/controller-rbac.yml b/deploy/piraeus/templates/controller-rbac.yml index 4209dafb..1eb82855 100644 --- a/deploy/piraeus/templates/controller-rbac.yml +++ b/deploy/piraeus/templates/controller-rbac.yml @@ -7,5 +7,33 @@ metadata: namespace: default --- # Source: piraeus/templates/controller-rbac.yml +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linstor-leader-elector + namespace: default +rules: + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "watch", "list", "delete", "update", "create"] + - apiGroups: [""] + resources: ["endpoints"] + verbs: ["create", "patch", "update"] +--- +# Source: piraeus/templates/controller-rbac.yml +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linstor-leader-elector + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: linstor-leader-elector +subjects: + - kind: ServiceAccount + name: linstor-controller +--- +# Source: piraeus/templates/controller-rbac.yml # This YAML file contains all RBAC objects that are necessary to run a # LINSTOR controller pod diff --git a/deploy/piraeus/templates/operator-controller.yaml b/deploy/piraeus/templates/operator-controller.yaml index aad6192b..9ee17c07 100644 --- a/deploy/piraeus/templates/operator-controller.yaml +++ b/deploy/piraeus/templates/operator-controller.yaml @@ -12,10 +12,12 @@ spec: dbCertSecret: dbUseClientCert: false drbdRepoCred: "" - controllerImage: quay.io/piraeusdatastore/piraeus-server:v1.7.1 + controllerImage: quay.io/piraeusdatastore/piraeus-server:v1.9.0 imagePullPolicy: "IfNotPresent" linstorHttpsControllerSecret: "" linstorHttpsClientSecret: "" affinity: {} tolerations: [] resources: {} + replicas: 1 + serviceAccountName: linstor-controller diff --git a/deploy/piraeus/templates/operator-satelliteset.yaml b/deploy/piraeus/templates/operator-satelliteset.yaml index 24b0e3c7..fe0f1c0c 100644 --- a/deploy/piraeus/templates/operator-satelliteset.yaml +++ b/deploy/piraeus/templates/operator-satelliteset.yaml @@ -9,7 +9,7 @@ spec: sslSecret: drbdRepoCred: "" imagePullPolicy: "IfNotPresent" - satelliteImage: quay.io/piraeusdatastore/piraeus-server:v1.7.1 + satelliteImage: quay.io/piraeusdatastore/piraeus-server:v1.9.0 linstorHttpsClientSecret: "" controllerEndpoint: http://piraeus-op-cs.default.svc:3370 automaticStorageType: "None" @@ -17,5 +17,5 @@ spec: tolerations: [] resources: {} kernelModuleInjectionMode: "Compile" - kernelModuleInjectionImage: "quay.io/piraeusdatastore/drbd9-bionic:v9.0.24" + kernelModuleInjectionImage: "quay.io/piraeusdatastore/drbd9-bionic:v9.0.25" kernelModuleInjectionResources: {} diff --git a/doc/helm-values.adoc b/doc/helm-values.adoc index 3b9172e4..5651e051 100644 --- a/doc/helm-values.adoc +++ b/doc/helm-values.adoc @@ -208,7 +208,7 @@ Valid values:: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-po Description:: Affinity settings for controller pods. Can be used to restrict the pods to specific nodes. === `operator.controller.controllerImage` -Default:: `quay.io/piraeusdatastore/piraeus-server:v1.7.1` +Default:: `quay.io/piraeusdatastore/piraeus-server:v1.9.0` Valid values:: image ref Description:: Name of the image to use for the controller. @@ -229,6 +229,11 @@ Default:: `""` Valid values:: secret name Description:: Name of the secret that contains the master passphrase to use for encrypted volumes. Check link:./security.md#automatically-set-the-passphrase-for-encrypted-volumes[the security guide]. +=== `operator.controller.replicas` +Default:: `1` +Valid values:: number +Description:: Number of replicas to use for the Linstor controller. + === `operator.controller.resources` Default:: `{}` Valid values:: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/[resource requests] @@ -269,7 +274,7 @@ Description:: Automatically create storage pools of the specified type. Check t * `ZFS`: create a ZFS based storage pool === `operator.satelliteSet.kernelModuleInjectionImage` -Default:: `quay.io/piraeusdatastore/drbd9-bionic:v9.0.24` +Default:: `quay.io/piraeusdatastore/drbd9-bionic:v9.0.25` Valid values:: image ref Description:: Name of the image to use for loading kernel modules. This is specific to the nodes host system. Check https://quay.io/organization/piraeusdatastore[the available `drbd9` images] @@ -302,7 +307,7 @@ Description:: Resource requests and limits to apply to the satellite containers. Note: at least 750MiB memory is recommended. === `operator.satelliteSet.satelliteImage` -Default:: `quay.io/piraeusdatastore/piraeus-server:v1.7.1` +Default:: `quay.io/piraeusdatastore/piraeus-server:v1.9.0` Valid values:: image ref Description:: Name of the image to use for the satellites. diff --git a/go.mod b/go.mod index 01516d73..ebbc6b26 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,13 @@ go 1.13 require ( github.com/BurntSushi/toml v0.3.1 github.com/LINBIT/golinstor v0.26.1-0.20200520122514-71747751b6af + github.com/linbit/k8s-await-election v0.2.0 github.com/operator-framework/operator-sdk v0.16.0 - github.com/sirupsen/logrus v1.4.2 + github.com/sirupsen/logrus v1.6.0 github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.4.0 gopkg.in/ini.v1 v1.56.0 k8s.io/api v0.0.0 - k8s.io/apimachinery v0.0.0 + k8s.io/apimachinery v0.18.4 k8s.io/client-go v12.0.0+incompatible sigs.k8s.io/controller-runtime v0.4.0 ) diff --git a/go.sum b/go.sum index 4aca2de1..bcdb621b 100644 --- a/go.sum +++ b/go.sum @@ -421,6 +421,8 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -434,6 +436,8 @@ github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc= github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= +github.com/linbit/k8s-await-election v0.2.0 h1:ICvl2wd4h6mWgsdMumJ6e2/NS0Mp11DvK2cM/a3l2jw= +github.com/linbit/k8s-await-election v0.2.0/go.mod h1:VCRtUTvVQmfNyqW7OSNyCOCh9mi29fgQ75XtUIfP5WE= github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= github.com/lithammer/shortuuid v3.0.0+incompatible h1:NcD0xWW/MZYXEHa6ITy6kaXN5nwm/V115vj2YXfhS0w= github.com/lithammer/shortuuid v3.0.0+incompatible/go.mod h1:FR74pbAuElzOUuenUHTK2Tciko1/vKuIKS9dSkDrA4w= @@ -623,6 +627,8 @@ github.com/sirupsen/logrus v1.3.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a h1:pa8hGb/2YqsZKovtsgrwcDH1RZhVbTKCjLp47XpqCDs= diff --git a/pkg/apis/piraeus/v1/linstorcontroller_types.go b/pkg/apis/piraeus/v1/linstorcontroller_types.go index 8b1f591b..d6385898 100644 --- a/pkg/apis/piraeus/v1/linstorcontroller_types.go +++ b/pkg/apis/piraeus/v1/linstorcontroller_types.go @@ -88,6 +88,15 @@ type LinstorControllerSpec struct { // +nullable Tolerations []corev1.Toleration `json:"tolerations"` + // Number of replicas in the controller deployment + // +optional + // +nullable + Replicas *int32 `json:"replicas"` + + // Name of the service account that runs leader elections for linstor + // +optional + ServiceAccountName string `json:"serviceAccountName"` + shared.LinstorClientConfig `json:",inline"` } diff --git a/pkg/apis/piraeus/v1/zz_generated.deepcopy.go b/pkg/apis/piraeus/v1/zz_generated.deepcopy.go index bd1522d1..1be567e6 100644 --- a/pkg/apis/piraeus/v1/zz_generated.deepcopy.go +++ b/pkg/apis/piraeus/v1/zz_generated.deepcopy.go @@ -238,6 +238,11 @@ func (in *LinstorControllerSpec) DeepCopyInto(out *LinstorControllerSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } out.LinstorClientConfig = in.LinstorClientConfig return } diff --git a/pkg/controller/linstorcontroller/linstorcontroller_controller.go b/pkg/controller/linstorcontroller/linstorcontroller_controller.go index da9df9db..d1cbf55a 100644 --- a/pkg/controller/linstorcontroller/linstorcontroller_controller.go +++ b/pkg/controller/linstorcontroller/linstorcontroller_controller.go @@ -19,6 +19,7 @@ package linstorcontroller import ( "context" + "encoding/json" "fmt" "strings" "time" @@ -33,6 +34,7 @@ import ( lc "github.com/piraeusdatastore/piraeus-operator/pkg/linstor/client" "github.com/BurntSushi/toml" + awaitelection "github.com/linbit/k8s-await-election/pkg/consts" "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -345,14 +347,6 @@ func (r *ReconcileLinstorController) reconcileControllers(ctx context.Context, p } } - if len(ourPods.Items) > 1 { - log.WithField("#controllerPods", len(ourPods.Items)).Debug("requeue because multiple controller pods are present") - return &reconcileutil.TemporaryError{ - RequeueAfter: time.Minute, - Source: fmt.Errorf("multiple controller pods present"), - } - } - return nil } @@ -364,16 +358,12 @@ func (r *ReconcileLinstorController) reconcileStatus(ctx context.Context, pcs *p log.Info("reconcile status") log.Debug("find active controller pod") - pod, err := r.findActiveControllerPod(ctx, pcs) + + controllerName, err := r.findActiveControllerPodName(ctx) if err != nil { log.Warnf("failed to find active controller pod: %v", err) } - controllerName := "" - if pod != nil { - controllerName = pod.Name - } - pcs.Status.ControllerStatus = &shared.NodeStatus{ NodeName: controllerName, RegisteredOnController: false, @@ -387,7 +377,7 @@ func (r *ReconcileLinstorController) reconcileStatus(ctx context.Context, pcs *p } for _, node := range allNodes { - if pod != nil && node.Name == pod.Name { + if node.Name == controllerName { pcs.Status.ControllerStatus.RegisteredOnController = true } } @@ -428,33 +418,28 @@ func (r *ReconcileLinstorController) reconcileStatus(ctx context.Context, pcs *p return r.client.Status().Update(ctx, pcs) } -func (r *ReconcileLinstorController) findActiveControllerPod(ctx context.Context, pcs *piraeusv1.LinstorController) (*corev1.Pod, error) { - ourPods := &corev1.PodList{} - labelSelector := labels.SelectorFromSet(pcsLabels(pcs)) - err := r.client.List(ctx, ourPods, client.InNamespace(pcs.Namespace), client.MatchingLabelsSelector{Selector: labelSelector}) +func (r *ReconcileLinstorController) findActiveControllerPodName(ctx context.Context) (string, error) { + allNodes, err := r.linstorClient.Nodes.GetAll(ctx) if err != nil { - return nil, err + return "", fmt.Errorf("failed to fetch nodes from linstor: %w", err) } - // Find the single currently serving pod - var candidatePods []corev1.Pod - for _, pod := range ourPods.Items { - for _, condition := range pod.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - candidatePods = append(candidatePods, pod) - break - } + var onlineControllers []*lapi.Node + + for i := range allNodes { + node := &allNodes[i] + + registrar, ok := node.Props[kubeSpec.LinstorRegistrationProperty] + if ok && registrar == kubeSpec.Name && node.Type == lc.Controller && node.ConnectionStatus == lc.Online { + onlineControllers = append(onlineControllers, node) } } - switch len(candidatePods) { - case 1: - return &candidatePods[0], nil - case 0: - return nil, nil - default: - return nil, fmt.Errorf("expected one controller pod, got multiple: %v", candidatePods) + if len(onlineControllers) != 1 { + return "", fmt.Errorf("expected one online controller, instead got: %v", onlineControllers) } + + return onlineControllers[0].Name, nil } // finalizeControllerSet returns whether it is finished as well as potentially an error @@ -553,12 +538,75 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme pullSecrets = append(pullSecrets, corev1.LocalObjectReference{Name: pcs.Spec.DrbdRepoCred}) } + const healthzPort = 9999 + port := lc.DefaultHttpPort + if pcs.Spec.LinstorHttpsControllerSecret != "" { + port = lc.DefaultHttpsPort + } + + servicePorts := []corev1.EndpointPort{ + {Name: pcs.Name, Port: int32(port)}, + } + + servicePortsJSON, err := json.Marshal(servicePorts) + if err != nil { + panic(err) + } + env := []corev1.EnvVar{ { Name: kubeSpec.JavaOptsName, // Workaround for https://github.com/LINBIT/linstor-server/issues/123 Value: "-Djdk.tls.acknowledgeCloseNotify=true", }, + { + Name: awaitelection.AwaitElectionEnabledKey, + Value: "1", + }, + { + Name: awaitelection.AwaitElectionNameKey, + Value: "linstor-controller", + }, + { + Name: awaitelection.AwaitElectionLockNameKey, + Value: pcs.Name, + }, + { + Name: awaitelection.AwaitElectionLockNamespaceKey, + Value: pcs.Namespace, + }, + { + Name: awaitelection.AwaitElectionIdentityKey, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: awaitelection.AwaitElectionPodIP, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + { + Name: awaitelection.AwaitElectionServiceName, + Value: pcs.Name, + }, + { + Name: awaitelection.AwaitElectionServiceNamespace, + Value: pcs.Namespace, + }, + { + Name: awaitelection.AwaitElectionServicePortsJson, + Value: string(servicePortsJSON), + }, + { + Name: awaitelection.AwaitElectionStatusEndpointKey, + Value: fmt.Sprintf(":%d", healthzPort), + }, } volumes := []corev1.Volume{ @@ -662,6 +710,17 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme }) } + // This probe should be able to deal with "new" images which start a leader election process, + // as well as images without leader election helper + livenessProbe := corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/", + Port: intstr.FromInt(healthzPort), + }, + }, + } + return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: pcs.Name + "-controller", @@ -669,8 +728,8 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme Labels: labels, }, Spec: appsv1.DeploymentSpec{ - Strategy: appsv1.DeploymentStrategy{Type: appsv1.RecreateDeploymentStrategyType}, Selector: &metav1.LabelSelector{MatchLabels: labels}, + Replicas: pcs.Spec.Replicas, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Name: pcs.Name + "-controller", @@ -678,8 +737,8 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme Labels: labels, }, Spec: corev1.PodSpec{ + ServiceAccountName: pcs.Spec.ServiceAccountName, PriorityClassName: pcs.Spec.PriorityClassName.GetName(pcs.Namespace), - ServiceAccountName: kubeSpec.LinstorControllerServiceAccount, Containers: []corev1.Container{ { Name: "linstor-controller", @@ -688,39 +747,22 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme ImagePullPolicy: pcs.Spec.ImagePullPolicy, Ports: []corev1.ContainerPort{ { - HostPort: 3376, ContainerPort: 3376, }, { - HostPort: 3377, ContainerPort: 3377, }, { - HostPort: lc.DefaultHttpPort, ContainerPort: lc.DefaultHttpPort, }, { - HostPort: lc.DefaultHttpsPort, ContainerPort: lc.DefaultHttpsPort, }, }, - VolumeMounts: volumeMounts, - Env: env, - ReadinessProbe: &corev1.Probe{ - Handler: corev1.Handler{ - HTTPGet: &corev1.HTTPGetAction{ - Path: "/", - // Http is always enabled (it will redirect to https if configured) - Scheme: corev1.URISchemeHTTP, - Port: intstr.FromInt(lc.DefaultHttpPort), - }, - }, - TimeoutSeconds: 10, - PeriodSeconds: 20, - FailureThreshold: 10, - InitialDelaySeconds: 5, - }, - Resources: pcs.Spec.Resources, + VolumeMounts: volumeMounts, + Env: env, + LivenessProbe: &livenessProbe, + Resources: pcs.Spec.Resources, }, }, Volumes: volumes, @@ -745,7 +787,7 @@ func newServiceForPCS(pcs *piraeusv1.LinstorController) *corev1.Service { Namespace: pcs.Namespace, }, Spec: corev1.ServiceSpec{ - ClusterIP: "None", + ClusterIP: "", Ports: []corev1.ServicePort{ { Name: pcs.Name, @@ -754,8 +796,7 @@ func newServiceForPCS(pcs *piraeusv1.LinstorController) *corev1.Service { TargetPort: intstr.FromInt(port), }, }, - Selector: pcsLabels(pcs), - Type: corev1.ServiceTypeClusterIP, + Type: corev1.ServiceTypeClusterIP, }, } }