From 5863c0466fd521796c4e925f58a2d931e690d53a Mon Sep 17 00:00:00 2001 From: rigazilla Date: Tue, 4 Jun 2024 17:00:44 +0200 Subject: [PATCH] Configurable Tolerations and TopologySpreadConstraints. Fixes #1803 and #1844 --- api/v1/infinispan_types.go | 4 + api/v1/types_util.go | 8 + api/v1/zz_generated.deepcopy.go | 14 ++ .../crd/bases/infinispan.org_infinispans.yaml | 178 ++++++++++++++++++ .../handler/manage/statefulset_updates.go | 10 + .../handler/provision/statefulsets.go | 4 +- 6 files changed, 217 insertions(+), 1 deletion(-) diff --git a/api/v1/infinispan_types.go b/api/v1/infinispan_types.go index 66f755a7a..80679448b 100644 --- a/api/v1/infinispan_types.go +++ b/api/v1/infinispan_types.go @@ -470,6 +470,10 @@ type SchedulingSpec struct { Affinity *corev1.Affinity `json:"affinity,omitempty"` // +optional PriorityClassName string `json:"PriorityClassName,omitempty"` + // +optional + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + // +optional + TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` } // InfinispanSpec defines the desired state of Infinispan diff --git a/api/v1/types_util.go b/api/v1/types_util.go index d6e73385a..17c7fc20d 100644 --- a/api/v1/types_util.go +++ b/api/v1/types_util.go @@ -964,6 +964,14 @@ func (ispn *Infinispan) PriorityClassName() string { return "" } +func (ispn *Infinispan) Tolerations() []corev1.Toleration { + return ispn.Spec.Scheduling.Tolerations +} + +func (ispn *Infinispan) TopologySpreadConstraints() []corev1.TopologySpreadConstraint { + return ispn.Spec.Scheduling.TopologySpreadConstraints +} + func (c *ContainerProbeSpec) AssignDefaults(failureThreshold, initialDelay, period, successThreshold, timeout int32) { if c.FailureThreshold == nil { c.FailureThreshold = pointer.Int32(failureThreshold) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index ba9cbe0d9..d629ef78e 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -850,6 +850,20 @@ func (in *SchedulingSpec) DeepCopyInto(out *SchedulingSpec) { *out = new(corev1.Affinity) (*in).DeepCopyInto(*out) } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]corev1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.TopologySpreadConstraints != nil { + in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints + *out = make([]corev1.TopologySpreadConstraint, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingSpec. diff --git a/config/crd/bases/infinispan.org_infinispans.yaml b/config/crd/bases/infinispan.org_infinispans.yaml index fb53ab6d0..6f18854ac 100644 --- a/config/crd/bases/infinispan.org_infinispans.yaml +++ b/config/crd/bases/infinispan.org_infinispans.yaml @@ -1905,6 +1905,184 @@ spec: type: array type: object type: object + tolerations: + items: + description: The pod this Toleration is attached to tolerates + any taint that matches the triple using + the matching operator . + properties: + effect: + description: Effect indicates the taint effect to match. + Empty means match all taint effects. When specified, allowed + values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the toleration applies + to. Empty means match all taint keys. If the key is empty, + operator must be Exists; this combination means to match + all values and all keys. + type: string + operator: + description: Operator represents a key's relationship to + the value. Valid operators are Exists and Equal. Defaults + to Equal. Exists is equivalent to wildcard for value, + so that a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents the period of + time the toleration (which must be of effect NoExecute, + otherwise this field is ignored) tolerates the taint. + By default, it is not set, which means tolerate the taint + forever (do not evict). Zero and negative values will + be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches + to. If the operator is Exists, the value should be empty, + otherwise just a regular string. + type: string + type: object + type: array + topologySpreadConstraints: + items: + description: TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine + the number of pods in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + maxSkew: + description: 'MaxSkew describes the degree to which pods + may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, + it is the maximum permitted difference between the number + of matching pods in the target topology and the global + minimum. The global minimum is the minimum number of matching + pods in an eligible domain or zero if the number of eligible + domains is less than MinDomains. For example, in a 3-zone + cluster, MaxSkew is set to 1, and pods with the same labelSelector + spread as 2/2/1: In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | | P P | P P | P | - + if MaxSkew is 1, incoming pod can only be scheduled to + zone3 to become 2/2/2; scheduling it onto zone1(zone2) + would make the ActualSkew(3-1) on zone1(zone2) violate + MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled + onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, + it is used to give higher precedence to topologies that + satisfy it. It''s a required field. Default value is 1 + and 0 is not allowed.' + format: int32 + type: integer + minDomains: + description: "MinDomains indicates a minimum number of eligible + domains. When the number of eligible domains with matching + topology keys is less than minDomains, Pod Topology Spread + treats \"global minimum\" as 0, and then the calculation + of Skew is performed. And when the number of eligible + domains with matching topology keys equals or greater + than minDomains, this value has no effect on scheduling. + As a result, when the number of eligible domains is less + than minDomains, scheduler won't schedule more than maxSkew + Pods to those domains. If value is nil, the constraint + behaves as if MinDomains is equal to 1. Valid values are + integers greater than 0. When value is not nil, WhenUnsatisfiable + must be DoNotSchedule. \n For example, in a 3-zone cluster, + MaxSkew is set to 2, MinDomains is set to 5 and pods with + the same labelSelector spread as 2/2/2: | zone1 | zone2 + | zone3 | | P P | P P | P P | The number of domains + is less than 5(MinDomains), so \"global minimum\" is treated + as 0. In this situation, new pod with the same labelSelector + cannot be scheduled, because computed skew will be 3(3 + - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. \n This is an alpha field and + requires enabling MinDomainsInPodTopologySpread feature + gate." + format: int32 + type: integer + topologyKey: + description: TopologyKey is the key of node labels. Nodes + that have a label with this key and identical values are + considered to be in the same topology. We consider each + as a "bucket", and try to put balanced number + of pods into each bucket. We define a domain as a particular + instance of a topology. Also, we define an eligible domain + as a domain whose nodes match the node selector. e.g. + If TopologyKey is "kubernetes.io/hostname", each Node + is a domain of that topology. And, if TopologyKey is "topology.kubernetes.io/zone", + each zone is a domain of that topology. It's a required + field. + type: string + whenUnsatisfiable: + description: 'WhenUnsatisfiable indicates how to deal with + a pod if it doesn''t satisfy the spread constraint. - + DoNotSchedule (default) tells the scheduler not to schedule + it. - ScheduleAnyway tells the scheduler to schedule the + pod in any location, but giving higher precedence to topologies + that would help reduce the skew. A constraint is considered + "Unsatisfiable" for an incoming pod if and only if every + possible node assignment for that pod would violate "MaxSkew" + on some topology. For example, in a 3-zone cluster, MaxSkew + is set to 1, and pods with the same labelSelector spread + as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming + pod can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2) + as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). + In other words, the cluster can still be imbalanced, but + scheduler won''t make it *more* imbalanced. It''s a required + field.' + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array type: object security: description: InfinispanSecurity info for the user application connection diff --git a/pkg/reconcile/pipeline/infinispan/handler/manage/statefulset_updates.go b/pkg/reconcile/pipeline/infinispan/handler/manage/statefulset_updates.go index 47c73a434..dc5c98ca1 100644 --- a/pkg/reconcile/pipeline/infinispan/handler/manage/statefulset_updates.go +++ b/pkg/reconcile/pipeline/infinispan/handler/manage/statefulset_updates.go @@ -117,6 +117,16 @@ func StatefulSetRollingUpgrade(i *ispnv1.Infinispan, ctx pipeline.Context) { updateNeeded = true } + if !reflect.DeepEqual(spec.Tolerations, i.Tolerations()) { + spec.Tolerations = i.Tolerations() + updateNeeded = true + } + + if !reflect.DeepEqual(spec.TopologySpreadConstraints, i.TopologySpreadConstraints()) { + spec.TopologySpreadConstraints = i.TopologySpreadConstraints() + updateNeeded = true + } + if spec.PriorityClassName != i.PriorityClassName() { spec.PriorityClassName = i.PriorityClassName() updateNeeded = true diff --git a/pkg/reconcile/pipeline/infinispan/handler/provision/statefulsets.go b/pkg/reconcile/pipeline/infinispan/handler/provision/statefulsets.go index ee2759f0e..348ca39aa 100644 --- a/pkg/reconcile/pipeline/infinispan/handler/provision/statefulsets.go +++ b/pkg/reconcile/pipeline/infinispan/handler/provision/statefulsets.go @@ -105,7 +105,9 @@ func ClusterStatefulSetSpec(statefulSetName string, i *ispnv1.Infinispan, ctx pi Annotations: annotationsForPod, }, Spec: corev1.PodSpec{ - Affinity: i.Affinity(), + Affinity: i.Affinity(), + Tolerations: i.Tolerations(), + TopologySpreadConstraints: i.TopologySpreadConstraints(), Containers: []corev1.Container{{ Image: i.ImageName(), Args: BuildServerContainerArgs(ctx.ConfigFiles()),