diff --git a/CHANGELOG.md b/CHANGELOG.md index 188d5b39..729a63e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,12 +29,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Components deployed by the operator can now run with multiple replicas. Components elect a leader, that will take on the actual work as long as it is active. Should one pod go down, another replica will take over. - Currently these components support multiple replicas: + Currently, these components support multiple replicas: * `etcd` => set `etcd.replicas` to the desired count * `stork` => set `stork.replicas` to the desired count for stork scheduler and controller * `snapshot-controller` => set `csi-snapshotter.replicas` to the desired count for cluster-wide CSI snapshot controller * `csi-controller` => set `csi.controllerReplicas` to the desired count for the linstor CSI controller * `operator` => set `operator.replicas` to have multiple replicas of the operator running + * `piraeus-controller` => set `operator.controller.replicas` to have multiple replicas of the linstor controller. + NOTE: This requires support from the container. You need `piraeus-server:v1.8.0` or newer to use this feature. [resource requirements]: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ diff --git a/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml b/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml index e846ed4f..706242a8 100644 --- a/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml +++ b/charts/piraeus/crds/piraeus.linbit.com_linstorcontrollers_crd.yaml @@ -679,6 +679,11 @@ spec: description: priorityClassName is the name of the PriorityClass for the controller pods type: string + replicas: + description: Number of replicas in the controller deployment + format: int32 + nullable: true + type: integer resources: description: Resource requirements for the LINSTOR controller pod nullable: true @@ -698,6 +703,11 @@ spec: to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' type: object type: object + serviceAccountName: + description: Name of the service account that runs leader elections + for linstor + nullable: true + type: string sslSecret: description: Name of k8s secret that holds the SSL key for a node (called `keystore.jks`) and the trusted certificates (called `certificates.jks`) diff --git a/charts/piraeus/templates/linstor-controller-rbac.yml b/charts/piraeus/templates/linstor-controller-rbac.yml new file mode 100644 index 00000000..8cdc9688 --- /dev/null +++ b/charts/piraeus/templates/linstor-controller-rbac.yml @@ -0,0 +1,31 @@ +# This YAML file contains all RBAC objects that are necessary to run a +# linstor controller pod with leader election +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: linstor-controller + namespace: {{ .Release.Namespace }} +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linstor-leader-elector + namespace: {{ .Release.Namespace }} +rules: + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "watch", "list", "delete", "update", "create"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linstor-leader-elector + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: csi-leader-elector +subjects: + - kind: ServiceAccount + name: linstor-controller diff --git a/charts/piraeus/templates/operator-controller.yaml b/charts/piraeus/templates/operator-controller.yaml index 8accb3eb..67d37804 100644 --- a/charts/piraeus/templates/operator-controller.yaml +++ b/charts/piraeus/templates/operator-controller.yaml @@ -17,3 +17,5 @@ spec: affinity: {{ .Values.operator.controller.affinity | toJson }} tolerations: {{ .Values.operator.controller.tolerations | toJson}} resources: {{ .Values.operator.controller.resources | toJson }} + replicas: {{ .Values.operator.controller.replicas }} + serviceAccountName: linstor-controller diff --git a/charts/piraeus/values.cn.yaml b/charts/piraeus/values.cn.yaml index f1409197..a42d0140 100644 --- a/charts/piraeus/values.cn.yaml +++ b/charts/piraeus/values.cn.yaml @@ -53,6 +53,7 @@ operator: affinity: {} tolerations: [] resources: {} + replicas: 1 satelliteSet: satelliteImage: daocloud.io/piraeus/piraeus-server:v1.7.1 storagePools: null diff --git a/charts/piraeus/values.yaml b/charts/piraeus/values.yaml index fc9a79a5..e01590e2 100644 --- a/charts/piraeus/values.yaml +++ b/charts/piraeus/values.yaml @@ -53,6 +53,7 @@ operator: affinity: {} tolerations: [] resources: {} + replicas: 1 satelliteSet: satelliteImage: quay.io/piraeusdatastore/piraeus-server:v1.7.1 storagePools: null diff --git a/go.mod b/go.mod index 01516d73..af7ab458 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,14 @@ go 1.13 require ( github.com/BurntSushi/toml v0.3.1 github.com/LINBIT/golinstor v0.26.1-0.20200520122514-71747751b6af + github.com/linbit/k8s-await-election v0.1.0 github.com/operator-framework/operator-sdk v0.16.0 - github.com/sirupsen/logrus v1.4.2 + github.com/sirupsen/logrus v1.6.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.4.0 gopkg.in/ini.v1 v1.56.0 k8s.io/api v0.0.0 - k8s.io/apimachinery v0.0.0 + k8s.io/apimachinery v0.18.4 k8s.io/client-go v12.0.0+incompatible sigs.k8s.io/controller-runtime v0.4.0 ) diff --git a/go.sum b/go.sum index 4aca2de1..a9f4b42e 100644 --- a/go.sum +++ b/go.sum @@ -421,6 +421,8 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -434,6 +436,8 @@ github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc= github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= +github.com/linbit/k8s-await-election v0.1.0 h1:LFHQ15t7BRSnW41QfdVSLqNpVgHh5pv2V/jlUAokboo= +github.com/linbit/k8s-await-election v0.1.0/go.mod h1:VCRtUTvVQmfNyqW7OSNyCOCh9mi29fgQ75XtUIfP5WE= github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= github.com/lithammer/shortuuid v3.0.0+incompatible h1:NcD0xWW/MZYXEHa6ITy6kaXN5nwm/V115vj2YXfhS0w= github.com/lithammer/shortuuid v3.0.0+incompatible/go.mod h1:FR74pbAuElzOUuenUHTK2Tciko1/vKuIKS9dSkDrA4w= @@ -623,6 +627,8 @@ github.com/sirupsen/logrus v1.3.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a h1:pa8hGb/2YqsZKovtsgrwcDH1RZhVbTKCjLp47XpqCDs= diff --git a/pkg/apis/piraeus/v1/linstorcontroller_types.go b/pkg/apis/piraeus/v1/linstorcontroller_types.go index 8b1f591b..20cce23d 100644 --- a/pkg/apis/piraeus/v1/linstorcontroller_types.go +++ b/pkg/apis/piraeus/v1/linstorcontroller_types.go @@ -88,6 +88,16 @@ type LinstorControllerSpec struct { // +nullable Tolerations []corev1.Toleration `json:"tolerations"` + // Number of replicas in the controller deployment + // +optional + // +nullable + Replicas *int32 `json:"replicas"` + + // Name of the service account that runs leader elections for linstor + // +optional + // +nullable + ServiceAccountName string `json:"serviceAccountName"` + shared.LinstorClientConfig `json:",inline"` } diff --git a/pkg/apis/piraeus/v1/zz_generated.deepcopy.go b/pkg/apis/piraeus/v1/zz_generated.deepcopy.go index bd1522d1..1be567e6 100644 --- a/pkg/apis/piraeus/v1/zz_generated.deepcopy.go +++ b/pkg/apis/piraeus/v1/zz_generated.deepcopy.go @@ -238,6 +238,11 @@ func (in *LinstorControllerSpec) DeepCopyInto(out *LinstorControllerSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } out.LinstorClientConfig = in.LinstorClientConfig return } diff --git a/pkg/controller/linstorcontroller/linstorcontroller_controller.go b/pkg/controller/linstorcontroller/linstorcontroller_controller.go index d12207dc..3212018d 100644 --- a/pkg/controller/linstorcontroller/linstorcontroller_controller.go +++ b/pkg/controller/linstorcontroller/linstorcontroller_controller.go @@ -33,6 +33,7 @@ import ( lc "github.com/piraeusdatastore/piraeus-operator/pkg/linstor/client" "github.com/BurntSushi/toml" + awaitelection "github.com/linbit/k8s-await-election/pkg/consts" "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -555,6 +556,34 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme // Workaround for https://github.com/LINBIT/linstor-server/issues/123 Value: "-Djdk.tls.acknowledgeCloseNotify=true", }, + { + Name: awaitelection.AwaitElectionEnabledKey, + Value: "1", + }, + { + Name: awaitelection.AwaitElectionNameKey, + Value: "linstor-controller", + }, + { + Name: awaitelection.AwaitElectionLockNameKey, + Value: pcs.Name, + }, + { + Name: awaitelection.AwaitElectionLockNamespaceKey, + Value: pcs.Namespace, + }, + { + Name: awaitelection.AwaitElectionIdentityKey, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: awaitelection.AwaitElectionStatusEndpointKey, + Value: ":9999", + }, } volumes := []corev1.Volume{ @@ -658,6 +687,40 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme }) } + // This probe should be able to deal with "new" images which start a leader election process, + // as well as images without leader election helper + containerHealthCommand := fmt.Sprintf(` +if command -v k8s-await-election >/dev/null ; then + # query leader election endpoint + curl -f http://127.0.0.1:9999/ >/dev/null 2>&1 +else + # query linstor endpoint + curl -f http://127.0.0.1:%d/ >/dev/null 2>&1 +fi +`, lc.DefaultHttpPort) + leaderElectorProbe := corev1.Probe{ + Handler: corev1.Handler{ + Exec: &corev1.ExecAction{ + Command: []string{ + "/bin/sh", + "-ec", + containerHealthCommand, + }, + }, + }, + } + + linstorControllerProbe := corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/", + // Http is always enabled (it will redirect to https if configured) + Scheme: corev1.URISchemeHTTP, + Port: intstr.FromInt(lc.DefaultHttpPort), + }, + }, + } + return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: pcs.Name + "-controller", @@ -665,8 +728,8 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme Labels: labels, }, Spec: appsv1.DeploymentSpec{ - Strategy: appsv1.DeploymentStrategy{Type: appsv1.RecreateDeploymentStrategyType}, Selector: &metav1.LabelSelector{MatchLabels: labels}, + Replicas: pcs.Spec.Replicas, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Name: pcs.Name + "-controller", @@ -674,7 +737,8 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme Labels: labels, }, Spec: corev1.PodSpec{ - PriorityClassName: pcs.Spec.PriorityClassName.GetName(pcs.Namespace), + ServiceAccountName: pcs.Spec.ServiceAccountName, + PriorityClassName: pcs.Spec.PriorityClassName.GetName(pcs.Namespace), Containers: []corev1.Container{ { Name: "linstor-controller", @@ -700,23 +764,12 @@ func newDeploymentForResource(pcs *piraeusv1.LinstorController) *appsv1.Deployme ContainerPort: lc.DefaultHttpsPort, }, }, - VolumeMounts: volumeMounts, - Env: env, - ReadinessProbe: &corev1.Probe{ - Handler: corev1.Handler{ - HTTPGet: &corev1.HTTPGetAction{ - Path: "/", - // Http is always enabled (it will redirect to https if configured) - Scheme: corev1.URISchemeHTTP, - Port: intstr.FromInt(lc.DefaultHttpPort), - }, - }, - TimeoutSeconds: 10, - PeriodSeconds: 20, - FailureThreshold: 10, - InitialDelaySeconds: 5, - }, - Resources: pcs.Spec.Resources, + VolumeMounts: volumeMounts, + Env: env, + LivenessProbe: &leaderElectorProbe, + StartupProbe: &leaderElectorProbe, + ReadinessProbe: &linstorControllerProbe, + Resources: pcs.Spec.Resources, }, }, Volumes: volumes, @@ -745,7 +798,7 @@ func newServiceForPCS(pcs *piraeusv1.LinstorController) *corev1.Service { Namespace: pcs.Namespace, }, Spec: corev1.ServiceSpec{ - ClusterIP: "None", + ClusterIP: "", Ports: []corev1.ServicePort{ { Name: pcs.Name,