From db8a7c5037b5baaf703f5a3e853126d8c772d7e0 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 8 Nov 2024 19:00:01 +0000 Subject: [PATCH 01/27] initial working prototype of controller revision --- Dockerfile | 1 + .../v1/leaderworkerset_types.go | 14 + .../v1/zz_generated.deepcopy.go | 5 + .../v1/leaderworkersetstatus.go | 27 ++ ...erworkerset.x-k8s.io_leaderworkersets.yaml | 17 + config/manager/kustomization.yaml | 4 +- config/rbac/role.yaml | 3 + pkg/controllers/leaderworkerset_controller.go | 14 + pkg/controllers/pod_controller.go | 19 +- pkg/history/controller_history.go | 363 ++++++++++++++++++ pkg/utils/controller/controller_utils.go | 196 ++++++++++ 11 files changed, 658 insertions(+), 5 deletions(-) create mode 100644 pkg/history/controller_history.go diff --git a/Dockerfile b/Dockerfile index da2ff121..fc98835a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ RUN go mod download COPY cmd/main.go cmd/main.go COPY api/ api/ COPY pkg/controllers/ pkg/controllers/ +COPY pkg/history/ pkg/history/ COPY pkg/cert/ pkg/cert/ COPY pkg/webhooks/ pkg/webhooks/ COPY pkg/utils pkg/utils diff --git a/api/leaderworkerset/v1/leaderworkerset_types.go b/api/leaderworkerset/v1/leaderworkerset_types.go index cfbae2c3..2f939cab 100644 --- a/api/leaderworkerset/v1/leaderworkerset_types.go +++ b/api/leaderworkerset/v1/leaderworkerset_types.go @@ -303,6 +303,20 @@ type LeaderWorkerSetStatus struct { // needed for HPA to know what pods belong to the LeaderWorkerSet object. Here // we only select the leader pods. HPAPodSelector string `json:"hpaPodSelector,omitempty"` + + // currentRevision, if not empty, indicates the version of the worker StatefulSet + // used to generate the worker pods in sequence [0,currentReplicas) + CurrentRevision string `json:"currentRevision,omitempty"` + + // updateRevision, if not empty, indicates the version of the worker StatefulSet + // used to generate the worker pods in sequence [replicas-updatedReplicas,replicas) + UpdateRevision string `json:"updateRevision,omitempty"` + + // collisionCount is the count of hash collisions for the StatefulSet. The StatefulSet controller + // uses this field as a collision avoidance mechanism when it needs to create the name for the + // newest ControllerRevision. + // +optional + CollisionCount *int32 `json:"collisionCount,omitempty"` } type LeaderWorkerSetConditionType string diff --git a/api/leaderworkerset/v1/zz_generated.deepcopy.go b/api/leaderworkerset/v1/zz_generated.deepcopy.go index 1baaab4f..6014f924 100644 --- a/api/leaderworkerset/v1/zz_generated.deepcopy.go +++ b/api/leaderworkerset/v1/zz_generated.deepcopy.go @@ -122,6 +122,11 @@ func (in *LeaderWorkerSetStatus) DeepCopyInto(out *LeaderWorkerSetStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.CollisionCount != nil { + in, out := &in.CollisionCount, &out.CollisionCount + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LeaderWorkerSetStatus. diff --git a/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go b/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go index 42f68aed..ce81a4c0 100644 --- a/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go +++ b/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go @@ -29,6 +29,9 @@ type LeaderWorkerSetStatusApplyConfiguration struct { UpdatedReplicas *int32 `json:"updatedReplicas,omitempty"` Replicas *int32 `json:"replicas,omitempty"` HPAPodSelector *string `json:"hpaPodSelector,omitempty"` + CurrentRevision *string `json:"currentRevision,omitempty"` + UpdateRevision *string `json:"updateRevision,omitempty"` + CollisionCount *int32 `json:"collisionCount,omitempty"` } // LeaderWorkerSetStatusApplyConfiguration constructs a declarative configuration of the LeaderWorkerSetStatus type for use with @@ -81,3 +84,27 @@ func (b *LeaderWorkerSetStatusApplyConfiguration) WithHPAPodSelector(value strin b.HPAPodSelector = &value return b } + +// WithCurrentRevision sets the CurrentRevision field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the CurrentRevision field is set to the value of the last call. +func (b *LeaderWorkerSetStatusApplyConfiguration) WithCurrentRevision(value string) *LeaderWorkerSetStatusApplyConfiguration { + b.CurrentRevision = &value + return b +} + +// WithUpdateRevision sets the UpdateRevision field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the UpdateRevision field is set to the value of the last call. +func (b *LeaderWorkerSetStatusApplyConfiguration) WithUpdateRevision(value string) *LeaderWorkerSetStatusApplyConfiguration { + b.UpdateRevision = &value + return b +} + +// WithCollisionCount sets the CollisionCount field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the CollisionCount field is set to the value of the last call. +func (b *LeaderWorkerSetStatusApplyConfiguration) WithCollisionCount(value int32) *LeaderWorkerSetStatusApplyConfiguration { + b.CollisionCount = &value + return b +} diff --git a/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml b/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml index 1feb3171..c313fe46 100644 --- a/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml +++ b/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml @@ -16204,6 +16204,13 @@ spec: status: description: LeaderWorkerSetStatus defines the observed state of LeaderWorkerSet properties: + collisionCount: + description: |- + collisionCount is the count of hash collisions for the StatefulSet. The StatefulSet controller + uses this field as a collision avoidance mechanism when it needs to create the name for the + newest ControllerRevision. + format: int32 + type: integer conditions: description: Conditions track the condition of the leaderworkerset. items: @@ -16261,6 +16268,11 @@ spec: - type type: object type: array + currentRevision: + description: |- + currentRevision, if not empty, indicates the version of the worker StatefulSet + used to generate the worker pods in sequence [0,currentReplicas) + type: string hpaPodSelector: description: |- HPAPodSelector for pods that belong to the LeaderWorkerSet object, this is @@ -16277,6 +16289,11 @@ spec: created (updated or not, ready or not) format: int32 type: integer + updateRevision: + description: |- + updateRevision, if not empty, indicates the version of the worker StatefulSet + used to generate the worker pods in sequence [replicas-updatedReplicas,replicas) + type: string updatedReplicas: description: UpdatedReplicas track the number of groups that have been updated (ready or not). diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index e6ccc001..5eb6a5a3 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: gcr.io/k8s-staging-lws/lws - newTag: main + newName: us-central1-docker.pkg.dev/edwinhernandez-gke-dev/lws/lws/lws + newTag: latest diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 03b44b66..5167252d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -66,6 +66,7 @@ rules: - apiGroups: - apps resources: + - controllerrevisions - statefulsets verbs: - create @@ -78,12 +79,14 @@ rules: - apiGroups: - apps resources: + - controllerrevisions/finalizers - statefulsets/finalizers verbs: - update - apiGroups: - apps resources: + - controllerrevisions/status - statefulsets/status verbs: - get diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index ab7320e2..1bf6b5ec 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -86,6 +86,9 @@ func NewLeaderWorkerSetReconciler(client client.Client, scheme *runtime.Scheme, //+kubebuilder:rbac:groups=apps,resources=statefulsets/finalizers,verbs=update //+kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch +// +kubebuilder:rbac:groups=apps,resources=controllerrevisions,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=apps,resources=controllerrevisions/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=apps,resources=controllerrevisions/finalizers,verbs=update func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { // Get leaderworkerset object @@ -96,6 +99,16 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) + currentRevision, updateRevision, collisionCount, err := controllerutils.GetStatefulSetRevisions(ctx, r.Client, lws) + if err != nil { + log.Error(err, "Getting StatefulSet revisions") + return ctrl.Result{}, err + } + lws.Status.CurrentRevision = currentRevision.Name + lws.Status.UpdateRevision = updateRevision.Name + lws.Status.CollisionCount = new(int32) + lws.Status.CollisionCount = &collisionCount + partition, replicas, err := r.rollingUpdateParameters(ctx, lws) if err != nil { log.Error(err, "Rolling partition error") @@ -547,6 +560,7 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor leaderworkerset.WorkerIndexLabelKey: "0", leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.TemplateRevisionHashKey: templateHash, + "leaderworkerset.controllerRevision": "lws-123", }) podAnnotations := make(map[string]string) podAnnotations[leaderworkerset.SizeAnnotationKey] = strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)) diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index 1ec64ede..7b0a1663 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -38,6 +38,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/lws/pkg/utils" acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" controllerutils "sigs.k8s.io/lws/pkg/utils/controller" podutils "sigs.k8s.io/lws/pkg/utils/pod" @@ -118,8 +119,12 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - - statefulSet, err := constructWorkerStatefulSetApplyConfiguration(pod, leaderWorkerSet) + currentRevision, _, _, err := controllerutils.GetStatefulSetRevisions(ctx, r.Client, &leaderWorkerSet) + if err != nil { + log.Error(err, "Getting StatefulSet revisions") + return ctrl.Result{}, err + } + statefulSet, err := constructWorkerStatefulSetApplyConfiguration(pod, leaderWorkerSet, currentRevision) if err != nil { return ctrl.Result{}, err } @@ -259,8 +264,16 @@ func setControllerReferenceWithStatefulSet(owner metav1.Object, sts *appsapplyv1 } // constructWorkerStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet -func constructWorkerStatefulSetApplyConfiguration(leaderPod corev1.Pod, lws leaderworkerset.LeaderWorkerSet) (*appsapplyv1.StatefulSetApplyConfiguration, error) { +func constructWorkerStatefulSetApplyConfiguration(leaderPod corev1.Pod, lws leaderworkerset.LeaderWorkerSet, currentRevision *appsv1.ControllerRevision) (*appsapplyv1.StatefulSetApplyConfiguration, error) { + updatedTemplateHash := utils.LeaderWorkerTemplateHash(&lws) podTemplateSpec := *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() + if updatedTemplateHash != leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] { + originalLws, err := controllerutils.ApplyRevision(&lws, currentRevision) + if err != nil { + return nil, err + } + podTemplateSpec = *originalLws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() + } // construct pod template spec configuration obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&podTemplateSpec) if err != nil { diff --git a/pkg/history/controller_history.go b/pkg/history/controller_history.go new file mode 100644 index 00000000..f4b207eb --- /dev/null +++ b/pkg/history/controller_history.go @@ -0,0 +1,363 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package history + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "hash" + "hash/fnv" + "sort" + "strconv" + + "github.com/davecgh/go-spew/spew" + appsv1 "k8s.io/api/apps/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/client-go/util/retry" +) + +// ControllerRevisionHashLabel is the label used to indicate the hash value of a ControllerRevision's Data. +const ControllerRevisionHashLabel = "controller.kubernetes.io/hash" + +// ControllerRevisionName returns the Name for a ControllerRevision in the form prefix-hash. If the length +// of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. +func ControllerRevisionName(prefix string, hash string) string { + if len(prefix) > 223 { + prefix = prefix[:223] + } + + return fmt.Sprintf("%s-%s", prefix, hash) +} + +// NewControllerRevision returns a ControllerRevision with a ControllerRef pointing to parent and indicating that +// parent is of parentKind. The ControllerRevision has labels matching template labels, contains Data equal to data, and +// has a Revision equal to revision. The collisionCount is used when creating the name of the ControllerRevision +// so the name is likely unique. If the returned error is nil, the returned ControllerRevision is valid. If the +// returned error is not nil, the returned ControllerRevision is invalid for use. +func NewControllerRevision(parent metav1.Object, + parentKind schema.GroupVersionKind, + templateLabels map[string]string, + data runtime.RawExtension, + revision int64, + collisionCount *int32) (*appsv1.ControllerRevision, error) { + labelMap := make(map[string]string) + for k, v := range templateLabels { + labelMap[k] = v + } + cr := &appsv1.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelMap, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(parent, parentKind)}, + Namespace: parent.GetNamespace(), + }, + Data: data, + Revision: revision, + } + hash := HashControllerRevision(cr, collisionCount) + cr.Name = ControllerRevisionName(parent.GetName(), hash) + cr.Labels[ControllerRevisionHashLabel] = hash + return cr, nil +} + +// HashControllerRevision hashes the contents of revision's Data using FNV hashing. If probe is not nil, the byte value +// of probe is added written to the hash as well. The returned hash will be a safe encoded string to avoid bad words. +func HashControllerRevision(revision *appsv1.ControllerRevision, probe *int32) string { + hf := fnv.New32() + if len(revision.Data.Raw) > 0 { + hf.Write(revision.Data.Raw) + } + if revision.Data.Object != nil { + DeepHashObject(hf, revision.Data.Object) + } + if probe != nil { + hf.Write([]byte(strconv.FormatInt(int64(*probe), 10))) + } + return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) +} + +// SortControllerRevisions sorts revisions by their Revision. +func SortControllerRevisions(revisions []*appsv1.ControllerRevision) { + sort.Stable(byRevision(revisions)) +} + +// EqualRevision returns true if lhs and rhs are either both nil, or both point to non-nil ControllerRevisions that +// contain semantically equivalent data. Otherwise this method returns false. +func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { + var lhsHash, rhsHash *uint32 + if lhs == nil || rhs == nil { + return lhs == rhs + } + if hs, found := lhs.Labels[ControllerRevisionHashLabel]; found { + hash, err := strconv.ParseInt(hs, 10, 32) + if err == nil { + lhsHash = new(uint32) + *lhsHash = uint32(hash) + } + } + if hs, found := rhs.Labels[ControllerRevisionHashLabel]; found { + hash, err := strconv.ParseInt(hs, 10, 32) + if err == nil { + rhsHash = new(uint32) + *rhsHash = uint32(hash) + } + } + if lhsHash != nil && rhsHash != nil && *lhsHash != *rhsHash { + return false + } + return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) +} + +// FindEqualRevisions returns all ControllerRevisions in revisions that are equal to needle using EqualRevision as the +// equality test. The returned slice preserves the order of revisions. +func FindEqualRevisions(revisions []*appsv1.ControllerRevision, needle *appsv1.ControllerRevision) []*appsv1.ControllerRevision { + var eq []*appsv1.ControllerRevision + for i := range revisions { + if EqualRevision(revisions[i], needle) { + eq = append(eq, revisions[i]) + } + } + return eq +} + +// byRevision implements sort.Interface to allow ControllerRevisions to be sorted by Revision. +type byRevision []*appsv1.ControllerRevision + +func (br byRevision) Len() int { + return len(br) +} + +// Less breaks ties first by creation timestamp, then by name +func (br byRevision) Less(i, j int) bool { + if br[i].Revision == br[j].Revision { + if br[j].CreationTimestamp.Equal(&br[i].CreationTimestamp) { + return br[i].Name < br[j].Name + } + return br[j].CreationTimestamp.After(br[i].CreationTimestamp.Time) + } + return br[i].Revision < br[j].Revision +} + +func (br byRevision) Swap(i, j int) { + br[i], br[j] = br[j], br[i] +} + +// Interface provides an interface allowing for management of a Controller's history as realized by recorded +// ControllerRevisions. An instance of Interface can be retrieved from NewHistory. Implementations must treat all +// pointer parameters as "in" parameter, and they must not be mutated. +type Interface interface { + // ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other + // controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the + // returned error is not nil, the returned slice is not valid. + ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) + // CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. If name + // collision occurs, collisionCount (incremented each time collision occurs except for the first time) is + // added to the hash of the revision and it is renamed using ControllerRevisionName. Implementations may + // cease to attempt to retry creation after some number of attempts and return an error. If the returned + // error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been + // created. + // Callers must make sure that collisionCount is not nil. An error is returned if it is. + CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision, collisionCount *int32) (*appsv1.ControllerRevision, error) + // DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. + DeleteControllerRevision(revision *appsv1.ControllerRevision) error + // UpdateControllerRevision updates revision such that its Revision is equal to newRevision. Implementations + // may retry on conflict. If the returned error is nil, the update was successful and returned ControllerRevision + // is valid. If the returned error is not nil, the update failed and the returned ControllerRevision is invalid. + UpdateControllerRevision(revision *appsv1.ControllerRevision, newRevision int64) (*appsv1.ControllerRevision, error) + // AdoptControllerRevision attempts to adopt revision by adding a ControllerRef indicating that the parent + // Object of parentKind is the owner of revision. If revision is already owned, an error is returned. If the + // resource patch fails, an error is returned. If no error is returned, the returned ControllerRevision is + // valid. + AdoptControllerRevision(parent metav1.Object, parentKind schema.GroupVersionKind, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) + // ReleaseControllerRevision attempts to release parent's ownership of revision by removing parent from the + // OwnerReferences of revision. If an error is returned, parent remains the owner of revision. If no error is + // returned, the returned ControllerRevision is valid. + ReleaseControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) +} + +// NewHistory returns an instance of Interface that uses client to communicate with the API Server and lister to list +// ControllerRevisions. This method should be used to create an Interface for all scenarios other than testing. +func NewHistory(k8sclient client.Client, context context.Context) Interface { + return &realHistory{k8sclient, context} +} + +type realHistory struct { + client.Client + context context.Context +} + +func (rh *realHistory) ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { + // List all revisions in the namespace that match the selector + revisionList := new(appsv1.ControllerRevisionList) + err := rh.List(rh.context, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) + if err != nil { + return nil, err + } + history := revisionList.Items + var owned []*appsv1.ControllerRevision + for i := range history { + ref := metav1.GetControllerOfNoCopy(&history[i]) + if ref == nil || ref.UID == parent.GetUID() { + owned = append(owned, &history[i]) + } + + } + return owned, err +} + +func (rh *realHistory) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision, collisionCount *int32) (*appsv1.ControllerRevision, error) { + if collisionCount == nil { + return nil, fmt.Errorf("collisionCount should not be nil") + } + + // Clone the input + clone := revision.DeepCopy() + + // Continue to attempt to create the revision updating the name with a new hash on each iteration + for { + hash := HashControllerRevision(revision, collisionCount) + // Update the revisions name + clone.Name = ControllerRevisionName(parent.GetName(), hash) + ns := parent.GetNamespace() + err := rh.Create(rh.context, clone) + if errors.IsAlreadyExists(err) { + exists := &appsv1.ControllerRevision{} + err := rh.Get(rh.context, types.NamespacedName{Namespace: ns, Name: clone.Name}, exists) + if err != nil { + return nil, err + } + if bytes.Equal(exists.Data.Raw, clone.Data.Raw) { + return exists, nil + } + *collisionCount++ + continue + } + return clone, err + } +} + +func (rh *realHistory) UpdateControllerRevision(revision *appsv1.ControllerRevision, newRevision int64) (*appsv1.ControllerRevision, error) { + clone := revision.DeepCopy() + err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if clone.Revision == newRevision { + return nil + } + clone.Revision = newRevision + updateErr := rh.Update(rh.context, clone) + if updateErr == nil { + return nil + } + updated := &appsv1.ControllerRevision{} + if err := rh.Get(rh.context, types.NamespacedName{Namespace: clone.Namespace, Name: clone.Name}, updated); err == nil { + // make a copy so we don't mutate the shared cache + clone = updated.DeepCopy() + } + return updateErr + }) + return clone, err +} + +func (rh *realHistory) DeleteControllerRevision(revision *appsv1.ControllerRevision) error { + return rh.Delete(rh.context, revision) +} + +type objectForPatch struct { + Metadata objectMetaForPatch `json:"metadata"` +} + +// objectMetaForPatch define object meta struct for patch operation +type objectMetaForPatch struct { + OwnerReferences []metav1.OwnerReference `json:"ownerReferences"` + UID types.UID `json:"uid"` +} + +func (rh *realHistory) AdoptControllerRevision(parent metav1.Object, parentKind schema.GroupVersionKind, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { + blockOwnerDeletion := true + isController := true + // Return an error if the revision is not orphan + if owner := metav1.GetControllerOfNoCopy(revision); owner != nil { + return nil, fmt.Errorf("attempt to adopt revision owned by %v", owner) + } + addControllerPatch := objectForPatch{ + Metadata: objectMetaForPatch{ + UID: revision.UID, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: parentKind.GroupVersion().String(), + Kind: parentKind.Kind, + Name: parent.GetName(), + UID: parent.GetUID(), + Controller: &isController, + BlockOwnerDeletion: &blockOwnerDeletion, + }}, + }, + } + patchBytes, err := json.Marshal(&addControllerPatch) + if err != nil { + return nil, err + } + // Use strategic merge patch to add an owner reference indicating a controller ref + err = rh.Patch(rh.context, revision, client.RawPatch(types.StrategicMergePatchType, patchBytes)) + return revision, err +} + +func (rh *realHistory) ReleaseControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { + dataBytes := GenerateDeleteOwnerRefStrategicMergeBytes(revision.UID, parent.GetUID()) + // Use strategic merge patch to add an owner reference indicating a controller ref + err := rh.Patch(rh.context, revision, client.RawPatch(types.StrategicMergePatchType, dataBytes)) + + if err != nil { + if errors.IsNotFound(err) { + // We ignore deleted revisions + return nil, nil + } + if errors.IsInvalid(err) { + // We ignore cases where the parent no longer owns the revision or where the revision has no + // owner. + return nil, nil + } + } + return revision, err +} + +func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { + hasher.Reset() + printer := spew.ConfigState{ + Indent: " ", + SortKeys: true, + DisableMethods: true, + SpewKeys: true, + } + _, err := printer.Fprintf(hasher, "%#v", objectToWrite) + if err != nil { + return + } +} + +func GenerateDeleteOwnerRefStrategicMergeBytes(revisionUID types.UID, parentUID types.UID) []byte { + return []byte(fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, revisionUID, parentUID)) +} diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index de15acd2..3136b070 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -17,18 +17,28 @@ limitations under the License. package controller import ( + "bytes" "context" + "encoding/json" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/strategicpatch" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/lws/pkg/history" ) +// controllerKind contains the schema.GroupVersionKind for this controller type. +var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") + func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, lws *leaderworkerset.LeaderWorkerSet, serviceName string, serviceSelector map[string]string, owner metav1.Object) error { log := ctrl.LoggerFrom(ctx) // If the headless service does not exist in the namespace, create it. @@ -61,3 +71,189 @@ func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Clie } return nil } + +// getStatefulSetRevisions returns the current and update ControllerRevisions for set. It also +// returns a collision count that records the number of name collisions set saw when creating +// new ControllerRevisions. This count is incremented on every name collision and is used in +// building the ControllerRevision names for name collision avoidance. This method may create +// a new revision, or modify the Revision of an existing revision if an update to set is detected. +// This method expects that revisions is sorted when supplied. +func GetStatefulSetRevisions( + ctx context.Context, + k8sClient client.Client, + lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, *appsv1.ControllerRevision, int32, error) { + var currentRevision *appsv1.ControllerRevision + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerHistory := history.NewHistory(k8sClient, ctx) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) + if err != nil { + return nil, nil, int32(-1), err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + log.Error(err, "Listing all controller revisions") + return nil, nil, int32(-1), err + } + revisionCount := len(revisions) + history.SortControllerRevisions(revisions) + + // Use a local copy of set.Status.CollisionCount to avoid modifying set.Status directly. + // This copy is returned so the value gets carried over to set.Status in updateStatefulSet. + var collisionCount int32 + if lws.Status.CollisionCount != nil { + collisionCount = *lws.Status.CollisionCount + } + + // create a new revision from the current set + updateRevision, err := NewRevision(lws, NextRevision(revisions), &collisionCount) + if err != nil { + log.Error(err, "Creating new revision for lws") + return nil, nil, collisionCount, err + } + + // find any equivalent revisions + equalRevisions := history.FindEqualRevisions(revisions, updateRevision) + equalCount := len(equalRevisions) + + if equalCount > 0 && history.EqualRevision(revisions[revisionCount-1], equalRevisions[equalCount-1]) { + // if the equivalent revision is immediately prior the update revision has not changed + updateRevision = revisions[revisionCount-1] + } else if equalCount > 0 { + // if the equivalent revision is not immediately prior we will roll back by incrementing the + // Revision of the equivalent revision + updateRevision, err = controllerHistory.UpdateControllerRevision( + equalRevisions[equalCount-1], + updateRevision.Revision) + if err != nil { + log.Error(err, "updating controller revision") + return nil, nil, collisionCount, err + } + } else { + //if there is no equivalent revision we create a new one + updateRevision, err = controllerHistory.CreateControllerRevision(lws, updateRevision, &collisionCount) + if err != nil { + log.Error(err, "Creating new controller revision for lws") + return nil, nil, collisionCount, err + } + } + + // attempt to find the revision that corresponds to the current revision + for i := range revisions { + if revisions[i].Name == lws.Status.CurrentRevision { + currentRevision = revisions[i] + break + } + } + + // if the current revision is nil we initialize the history by setting it to the update revision + if currentRevision == nil { + currentRevision = updateRevision + } + + return currentRevision, updateRevision, collisionCount, nil +} + +// getPatch returns a strategic merge patch that can be applied to restore a StatefulSet to a +// previous version. If the returned error is nil the patch is valid. The current state that we save is just the +// PodSpecTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously +// recorded patches. +func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { + str := &bytes.Buffer{} + err := unstructured.UnstructuredJSONScheme.Encode(lws, str) + if err != nil { + return nil, err + } + var raw map[string]interface{} + err = json.Unmarshal(str.Bytes(), &raw) + if err != nil { + return nil, err + } + objCopy := make(map[string]interface{}) + specCopy := make(map[string]interface{}) + spec := raw["spec"].(map[string]interface{}) + template := spec["leaderWorkerTemplate"].(map[string]interface{}) + specCopy["leaderWorkerTemplate"] = template + template["$patch"] = "replace" + objCopy["spec"] = specCopy + patch, err := json.Marshal(objCopy) + return patch, err +} + +// newRevision creates a new ControllerRevision containing a patch that reapplies the target state of set. +// The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned +// ControllerRevision is valid. StatefulSet revisions are stored as patches that re-apply the current state of set +// to a new StatefulSet using a strategic merge patch to replace the saved state of the new StatefulSet. +func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collisionCount *int32) (*appsv1.ControllerRevision, error) { + patch, err := getPatch(lws) + if err != nil { + return nil, err + } + combinedLabels := make(map[string]string) + for k, v := range lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Labels { + combinedLabels[k] = v + } + for k, v := range lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Labels { + combinedLabels[k] = v + } + cr, err := history.NewControllerRevision(lws, + controllerKind, + combinedLabels, + runtime.RawExtension{Raw: patch}, + revision, + collisionCount) + if err != nil { + return nil, err + } + if cr.ObjectMeta.Annotations == nil { + cr.ObjectMeta.Annotations = make(map[string]string) + } + for key, value := range lws.Annotations { + cr.ObjectMeta.Annotations[key] = value + } + return cr, nil +} + +// ApplyRevision returns a new StatefulSet constructed by restoring the state in revision to set. If the returned error +// is nil, the returned StatefulSet is valid. +func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*leaderworkerset.LeaderWorkerSet, error) { + clone := lws.DeepCopy() + str := &bytes.Buffer{} + err := unstructured.UnstructuredJSONScheme.Encode(lws, str) + if err != nil { + return nil, err + } + patched, err := strategicpatch.StrategicMergePatch(str.Bytes(), revision.Data.Raw, clone) + if err != nil { + return nil, err + } + restoredLws := &leaderworkerset.LeaderWorkerSet{} + err = json.Unmarshal(patched, restoredLws) + if err != nil { + return nil, err + } + return restoredLws, nil +} + +// nextRevision finds the next valid revision number based on revisions. If the length of revisions +// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method +// assumes that revisions has been sorted by Revision. +func NextRevision(revisions []*appsv1.ControllerRevision) int64 { + count := len(revisions) + if count <= 0 { + return 1 + } + return revisions[count-1].Revision + 1 +} + +// TruncateHistory cleans up all other controller revisions expect the currentRevision and updateRevision +func TruncateHistory(history history.Interface, revisions []*appsv1.ControllerRevision, updateRevision *appsv1.ControllerRevision, currentRevision *appsv1.ControllerRevision) error { + for i, revision := range revisions { + if revision.Name != updateRevision.Name && revision.Name != currentRevision.Name { + if err := history.DeleteControllerRevision(revisions[i]); err != nil { + return err + } + } + } + return nil +} From df4872bb889cea792370f52bc2b3853becd530ee Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 15 Nov 2024 00:23:10 +0000 Subject: [PATCH 02/27] update value of currentRevision after update is done --- pkg/controllers/leaderworkerset_controller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 1bf6b5ec..5d246fd9 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -400,6 +400,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) + lws.Status.CurrentRevision = lws.Status.UpdateRevision } else { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetProgressing)) } From de9dd703b3e57f9ac9de6ed5fdaf909c0108d51e Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 15 Nov 2024 18:37:47 +0000 Subject: [PATCH 03/27] switched names from sts to lws --- pkg/controllers/leaderworkerset_controller.go | 2 +- pkg/controllers/pod_controller.go | 2 +- pkg/utils/controller/controller_utils.go | 20 +++++++++---------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 5d246fd9..7cd234e3 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -99,7 +99,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - currentRevision, updateRevision, collisionCount, err := controllerutils.GetStatefulSetRevisions(ctx, r.Client, lws) + currentRevision, updateRevision, collisionCount, err := controllerutils.GetLeaderWorkerSetRevisions(ctx, r.Client, lws) if err != nil { log.Error(err, "Getting StatefulSet revisions") return ctrl.Result{}, err diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index 7b0a1663..a851fe6e 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -119,7 +119,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - currentRevision, _, _, err := controllerutils.GetStatefulSetRevisions(ctx, r.Client, &leaderWorkerSet) + currentRevision, _, _, err := controllerutils.GetLeaderWorkerSetRevisions(ctx, r.Client, &leaderWorkerSet) if err != nil { log.Error(err, "Getting StatefulSet revisions") return ctrl.Result{}, err diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index 3136b070..85aa632b 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -72,13 +72,13 @@ func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Clie return nil } -// getStatefulSetRevisions returns the current and update ControllerRevisions for set. It also +// GetLeaderWorkerSetRevisions returns the current and update ControllerRevisions for leaerWorkerSet. It also // returns a collision count that records the number of name collisions set saw when creating // new ControllerRevisions. This count is incremented on every name collision and is used in // building the ControllerRevision names for name collision avoidance. This method may create // a new revision, or modify the Revision of an existing revision if an update to set is detected. // This method expects that revisions is sorted when supplied. -func GetStatefulSetRevisions( +func GetLeaderWorkerSetRevisions( ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, *appsv1.ControllerRevision, int32, error) { @@ -154,9 +154,9 @@ func GetStatefulSetRevisions( return currentRevision, updateRevision, collisionCount, nil } -// getPatch returns a strategic merge patch that can be applied to restore a StatefulSet to a -// previous version. If the returned error is nil the patch is valid. The current state that we save is just the -// PodSpecTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously +// getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a +// previous version. If the returned error is nil the patch is valid. The current state that we save is the +// leaderWorkerTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously // recorded patches. func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { str := &bytes.Buffer{} @@ -180,10 +180,10 @@ func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { return patch, err } -// newRevision creates a new ControllerRevision containing a patch that reapplies the target state of set. +// newRevision creates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned -// ControllerRevision is valid. StatefulSet revisions are stored as patches that re-apply the current state of set -// to a new StatefulSet using a strategic merge patch to replace the saved state of the new StatefulSet. +// ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set +// to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collisionCount *int32) (*appsv1.ControllerRevision, error) { patch, err := getPatch(lws) if err != nil { @@ -214,8 +214,8 @@ func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collision return cr, nil } -// ApplyRevision returns a new StatefulSet constructed by restoring the state in revision to set. If the returned error -// is nil, the returned StatefulSet is valid. +// ApplyRevision returns a new LeaderWorkerSet constructed by restoring the state in revision to set. If the returned error +// is nil, the returned LeaderWorkerSet is valid. func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*leaderworkerset.LeaderWorkerSet, error) { clone := lws.DeepCopy() str := &bytes.Buffer{} From 267af378dde33e7011204dfdc337abef8fe830b1 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Wed, 27 Nov 2024 00:35:44 +0000 Subject: [PATCH 04/27] test changes --- pkg/controllers/leaderworkerset_controller.go | 1 - pkg/utils/controller/controller_utils.go | 2 +- pkg/utils/controller/controller_utils_test.go | 34 +++++++++++++++++++ test/testutils/wrappers.go | 1 + 4 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 pkg/utils/controller/controller_utils_test.go diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 7cd234e3..af3c0dac 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -561,7 +561,6 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor leaderworkerset.WorkerIndexLabelKey: "0", leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.TemplateRevisionHashKey: templateHash, - "leaderworkerset.controllerRevision": "lws-123", }) podAnnotations := make(map[string]string) podAnnotations[leaderworkerset.SizeAnnotationKey] = strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)) diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index 85aa632b..e894a1e1 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -72,7 +72,7 @@ func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Clie return nil } -// GetLeaderWorkerSetRevisions returns the current and update ControllerRevisions for leaerWorkerSet. It also +// GetLeaderWorkerSetRevisions returns the current and update ControllerRevisions for leaderWorkerSet. It also // returns a collision count that records the number of name collisions set saw when creating // new ControllerRevisions. This count is incremented on every name collision and is used in // building the ControllerRevision names for name collision avoidance. This method may create diff --git a/pkg/utils/controller/controller_utils_test.go b/pkg/utils/controller/controller_utils_test.go new file mode 100644 index 00000000..ce81ef6a --- /dev/null +++ b/pkg/utils/controller/controller_utils_test.go @@ -0,0 +1,34 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "testing" + + "sigs.k8s.io/lws/test/testutils" +) + +func TestCreateApplyRevision(t *testing.T) { + lws := testutils.BuildLeaderWorkerSet("default").Obj() + lws.Status.CollisionCount = new(int32) + revision, err := NewRevision(lws, 1, lws.Status.CollisionCount) + + lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" + + restoredLws, err := ApplyRevision(lws, re) + +} diff --git a/test/testutils/wrappers.go b/test/testutils/wrappers.go index c23e5c95..f10852d7 100644 --- a/test/testutils/wrappers.go +++ b/test/testutils/wrappers.go @@ -154,6 +154,7 @@ func BuildLeaderWorkerSet(nsName string) *LeaderWorkerSetWrapper { lws.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ SubdomainPolicy: &subdomainPolicy, } + return &LeaderWorkerSetWrapper{ lws, } From 7389ea10be2fb25e23f24b1d1766a181221987e6 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Thu, 5 Dec 2024 23:36:52 +0000 Subject: [PATCH 05/27] added unit, integration, and e2e tests --- config/manager/kustomization.yaml | 4 +- pkg/controllers/pod_controller_test.go | 168 ++++++++++++++---- pkg/history/controller_history_test.go | 137 ++++++++++++++ pkg/utils/controller/controller_utils_test.go | 36 +++- test/e2e/e2e_test.go | 31 ++++ .../controllers/leaderworkerset_test.go | 56 ++++++ test/testutils/validators.go | 20 +++ test/testutils/wrappers.go | 30 ++++ 8 files changed, 439 insertions(+), 43 deletions(-) create mode 100644 pkg/history/controller_history_test.go diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 5eb6a5a3..e6ccc001 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: us-central1-docker.pkg.dev/edwinhernandez-gke-dev/lws/lws/lws - newTag: latest + newName: gcr.io/k8s-staging-lws/lws + newTag: main diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index 3fa5c1f5..034e553b 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -28,18 +28,33 @@ import ( metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/ptr" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/lws/pkg/history" + "sigs.k8s.io/lws/pkg/utils" testutils "sigs.k8s.io/lws/test/testutils" ) func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { + parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") + lws := testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Replica(1).WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Size(1).Obj() + updateTemplateHash := utils.LeaderWorkerTemplateHash(lws) + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" + lws.Status.CollisionCount = new(int32) + currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 1, lws.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + currentTemplateHash := utils.LeaderWorkerTemplateHash(lws) + tests := []struct { name string pod *corev1.Pod lws *leaderworkerset.LeaderWorkerSet wantStatefulSetConfig *appsapplyv1.StatefulSetApplyConfiguration + revision *appsv1.ControllerRevision }{ { - name: "1 replica, size 1, exclusive placement disabled", + name: "1 replica, size 1, exclusive placement disabled", + revision: currentRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ Name: "test-sample", @@ -49,6 +64,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { "leaderworkerset.sigs.k8s.io/name": "test-sample", "leaderworkerset.sigs.k8s.io/group-index": "1", "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, }, @@ -65,10 +81,10 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -83,10 +99,10 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", @@ -111,17 +127,18 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, }, { - name: "1 replica, size 2, exclusive placement enabled", + name: "1 replica, size 2, exclusive placement enabled", + revision: currentRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ Name: "test-sample", Namespace: "default", Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", + "leaderworkerset.sigs.k8s.io/worker-index": "0", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, }, @@ -140,10 +157,10 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -158,10 +175,10 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -187,17 +204,18 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, }, { - name: "1 replica, size 2, subgroupsize 2, exclusive placement enabled", + name: "1 replica, size 2, subgroupsize 2, exclusive placement enabled", + revision: currentRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ Name: "test-sample", Namespace: "default", Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", + "leaderworkerset.sigs.k8s.io/worker-index": "0", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, }, @@ -216,10 +234,10 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + "leaderworkerset.sigs.k8s.io/group-key": "test-key", }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -234,10 +252,10 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/template-revision-hash": "", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + "leaderworkerset.sigs.k8s.io/group-key": "test-key", }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -263,11 +281,85 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, }, }, + { + name: "LeaderPod has a different template hash than one generated by lws object, use podTemplateSpec from revision", + revision: currentRevision, + pod: &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-sample", + Namespace: "default", + Labels: map[string]string{ + "leaderworkerset.sigs.k8s.io/worker-index": "0", + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + }, + }, + }, + lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). + Replica(1). + WorkerTemplateSpec(testutils.MakeWorkerPodSpec()). + Size(1).Obj(), + wantStatefulSetConfig: &appsapplyv1.StatefulSetApplyConfiguration{ + TypeMetaApplyConfiguration: metaapplyv1.TypeMetaApplyConfiguration{ + Kind: ptr.To[string]("StatefulSet"), + APIVersion: ptr.To[string]("apps/v1"), + }, + ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ + Name: ptr.To[string]("test-sample"), + Namespace: ptr.To[string]("default"), + Labels: map[string]string{ + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + }, + }, + Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ + Replicas: ptr.To[int32](0), + Selector: &metaapplyv1.LabelSelectorApplyConfiguration{ + MatchLabels: map[string]string{ + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + }, + }, + Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ + ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ + Labels: map[string]string{ + "leaderworkerset.sigs.k8s.io/name": "test-sample", + "leaderworkerset.sigs.k8s.io/group-index": "1", + "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + }, + Annotations: map[string]string{ + "leaderworkerset.sigs.k8s.io/size": "1", + "leaderworkerset.sigs.k8s.io/leader-name": "test-sample", + }, + }, + Spec: &coreapplyv1.PodSpecApplyConfiguration{ + Containers: []coreapplyv1.ContainerApplyConfiguration{ + { + Name: ptr.To[string]("worker"), + Image: ptr.To[string]("nginx:1.14.2"), + Ports: []coreapplyv1.ContainerPortApplyConfiguration{{ContainerPort: ptr.To[int32](8080), Protocol: ptr.To[corev1.Protocol](corev1.ProtocolTCP)}}, + Resources: &coreapplyv1.ResourceRequirementsApplyConfiguration{}, + }, + }, + }, + }, + Ordinals: &appsapplyv1.StatefulSetOrdinalsApplyConfiguration{Start: ptr.To[int32](1)}, + ServiceName: ptr.To[string]("test-sample"), + PodManagementPolicy: ptr.To[appsv1.PodManagementPolicyType](appsv1.ParallelPodManagement), + }, + }, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - statefulSetConfig, err := constructWorkerStatefulSetApplyConfiguration(*tc.pod, *tc.lws) + statefulSetConfig, err := constructWorkerStatefulSetApplyConfiguration(*tc.pod, *tc.lws, tc.revision) if err != nil { t.Errorf("failed with error %s", err.Error()) } diff --git a/pkg/history/controller_history_test.go b/pkg/history/controller_history_test.go new file mode 100644 index 00000000..ce352d75 --- /dev/null +++ b/pkg/history/controller_history_test.go @@ -0,0 +1,137 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package history + +import ( + "testing" + "time" + + "github.com/google/go-cmp/cmp" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/lws/test/testutils" +) + +var parentKind = apps.SchemeGroupVersion.WithKind("LeaderWorkerSet") + +func TestFindEqualRevisions(t *testing.T) { + lws1 := testutils.BuildLeaderWorkerSet("test-sample").Obj() + lws2 := testutils.BuildLeaderWorkerSet("test-sample").LeaderTemplateSpec(testutils.MakeLeaderPodSpecWithTPUResource()).Obj() + + lws1Revision, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1, t), 1, lws1.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + + lws2Revision, err := NewControllerRevision(lws2, parentKind, lws2.Labels, testutils.RawLWSTemplate(lws2, t), 1, lws2.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + + lws1.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" + lws1Revision2, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1, t), 1, lws1.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + + tests := []struct { + name string + revision *apps.ControllerRevision + revisions []*apps.ControllerRevision + want map[string]bool + }{ + { + name: "finds nothing with no matches", + revision: lws1Revision, + revisions: []*apps.ControllerRevision{lws1Revision2, lws2Revision}, + want: map[string]bool{}, + }, + { + name: "finds nothing when empty", + revision: lws1Revision, + revisions: []*apps.ControllerRevision{}, + want: map[string]bool{}, + }, + { + name: "finds equivalent", + revision: lws1Revision, + revisions: []*apps.ControllerRevision{lws1Revision, lws1Revision2, lws2Revision}, + want: map[string]bool{lws1Revision.Name: true}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + revisions := FindEqualRevisions(tc.revisions, tc.revision) + if len(revisions) != len(tc.want) { + t.Errorf("want %d revisions, got %d revisions", len(tc.want), len(revisions)) + } + for i := range revisions { + if !tc.want[revisions[i].Name] { + t.Errorf("Wanted: %s, got: %s", tc.revision.Name, revisions[i].Name) + } + } + }) + } +} + +func TestSortControllerRevisions(t *testing.T) { + lws := testutils.BuildLeaderWorkerSet("test-sample").Obj() + lwsRevision1, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 1, lws.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + lwsRevision2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 2, lws.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + lwsRevision1Time2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 1, lws.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + lwsRevision1Time2.CreationTimestamp = v1.Time{Time: lwsRevision1.CreationTimestamp.Add(time.Second)} + + tests := []struct { + name string + revisions []*apps.ControllerRevision + want []*apps.ControllerRevision + }{ + { + name: "already sorted", + revisions: []*apps.ControllerRevision{lwsRevision1, lwsRevision2}, + want: []*apps.ControllerRevision{lwsRevision1, lwsRevision2}, + }, + { + name: "inverted sorted", + revisions: []*apps.ControllerRevision{lwsRevision2, lwsRevision1}, + want: []*apps.ControllerRevision{lwsRevision1, lwsRevision2}, + }, + { + name: "same revision name, different timestamp", + revisions: []*apps.ControllerRevision{lwsRevision1, lwsRevision2, lwsRevision1Time2}, + want: []*apps.ControllerRevision{lwsRevision1, lwsRevision1Time2, lwsRevision2}, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + SortControllerRevisions(tc.revisions) + if diff := cmp.Diff(tc.revisions, tc.want); diff != "" { + t.Errorf("error sorting revisions %s", diff) + } + }) + } +} diff --git a/pkg/utils/controller/controller_utils_test.go b/pkg/utils/controller/controller_utils_test.go index ce81ef6a..d833bd14 100644 --- a/pkg/utils/controller/controller_utils_test.go +++ b/pkg/utils/controller/controller_utils_test.go @@ -19,16 +19,46 @@ package controller import ( "testing" - "sigs.k8s.io/lws/test/testutils" + "github.com/google/go-cmp/cmp" + leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/lws/pkg/history" + testutils "sigs.k8s.io/lws/test/testutils" ) -func TestCreateApplyRevision(t *testing.T) { +func TestApplyRevision(t *testing.T) { + lws := testutils.BuildLeaderWorkerSet("default").Obj() lws.Status.CollisionCount = new(int32) revision, err := NewRevision(lws, 1, lws.Status.CollisionCount) + currentLws := lws.DeepCopy() + if err != nil { + t.Fatal(err) + } lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" + subdomainPolicy := leaderworkerset.SubdomainUniquePerReplica + lws.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ + SubdomainPolicy: &subdomainPolicy, + } + restoredLws, err := ApplyRevision(lws, revision) + if err != nil { + t.Fatal(err) + } + + restoredRevision, err := NewRevision(restoredLws, 2, restoredLws.Status.CollisionCount) + if err != nil { + t.Fatal(err) + } + + if !history.EqualRevision(revision, restoredRevision) { + t.Errorf("expected value %v, got %v", revision, restoredRevision) + } - restoredLws, err := ApplyRevision(lws, re) + if diff := cmp.Diff(currentLws.Spec.LeaderWorkerTemplate, restoredLws.Spec.LeaderWorkerTemplate); diff != "" { + t.Errorf("unexpected restored LeaderWorkerTemplate: %s", diff) + } + if diff := cmp.Diff(currentLws, restoredLws); diff == "" { + t.Errorf("LWS Spec fields should not be restored") + } } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ea7b1170..bf214971 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -16,6 +16,8 @@ limitations under the License. package e2e import ( + "fmt" + "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" @@ -301,4 +303,33 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { return numberOfPodsInCommon, nil }, timeout, interval).Should(gomega.Equal(0)) }) + ginkgo.It("unupdated worker StatefulSet restarted during rolling update will be restored with old worker spec", func() { + lws = testing.BuildLeaderWorkerSet(ns.Name).Replica(2).Size(2).Obj() + testing.MustCreateLws(ctx, k8sClient, lws) + testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") + + testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) + testing.UpdateWorkerTemplate(ctx, k8sClient, lws) + gomega.Expect(k8sClient.Delete(ctx, &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: lws.Namespace, Name: lws.Name + "-0-1"}})).To(gomega.Succeed()) + gomega.Eventually(func() error { + var sts appsv1.StatefulSet + if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name + "-0", Namespace: lws.Namespace}, &sts); err != nil { + return err + } + // use the original lws object instead of newest, since we are comparing with the old worker template + podTemplateSpec := *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() + if sts.Spec.Template.Spec.Containers[0].Name != podTemplateSpec.Spec.Containers[0].Name { + return fmt.Errorf("StatefulSet did not have the expected container name: " + sts.Spec.Template.Spec.Containers[0].Name) + } + return nil + }, timeout, interval).Should(gomega.Succeed()) + + // Rolling update finishes + testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) + // All worker statfulsets have the updated version + testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) + testing.ExpectValidPods(ctx, k8sClient, lws, &corev1.PodList{}) + testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") + + }) }) diff --git a/test/integration/controllers/leaderworkerset_test.go b/test/integration/controllers/leaderworkerset_test.go index d063db5e..db3039b6 100644 --- a/test/integration/controllers/leaderworkerset_test.go +++ b/test/integration/controllers/leaderworkerset_test.go @@ -504,6 +504,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -520,6 +521,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { // soon updated to 3 (replicas-maxUnavailable), it's fine here. testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -534,6 +536,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -548,6 +551,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -565,6 +569,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) // 3-index status is unready but template already updated. testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -580,6 +585,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -600,6 +606,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -614,6 +621,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -628,6 +636,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -642,6 +651,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -658,6 +668,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -678,6 +689,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -692,6 +704,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -707,6 +720,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -727,6 +741,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -752,6 +767,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 4) // We haven't set the replica-4, replica-5 to ready, so the readyReplicas is 4, the updatedReplicas is 0. testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -767,6 +783,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -787,6 +804,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -801,6 +819,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -815,6 +834,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -844,6 +864,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 3) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -859,6 +880,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -879,6 +901,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 6) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -900,6 +923,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 5) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -924,6 +948,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -939,6 +964,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 3) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -959,6 +985,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -985,6 +1012,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -999,6 +1027,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 5, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1013,6 +1042,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 5, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1027,6 +1057,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 5, 3) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1043,6 +1074,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 3) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1058,6 +1090,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1078,6 +1111,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1104,6 +1138,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1127,6 +1162,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1147,6 +1183,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 6) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1167,6 +1204,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUnavailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) // Partition is updated from 3 to 2. testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) }, @@ -1193,6 +1231,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 3) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1213,6 +1252,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1238,6 +1278,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1249,6 +1290,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1263,6 +1305,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1283,6 +1326,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1309,6 +1353,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1336,6 +1381,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) // The last 2 replicas are updated but not ready. testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1351,6 +1397,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1375,6 +1422,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1389,6 +1437,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1405,6 +1454,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 1) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1420,6 +1470,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1440,6 +1491,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1466,6 +1518,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1481,6 +1534,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 2) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1503,6 +1557,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { // Partition will transit from 4 to 3. testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1527,6 +1582,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, diff --git a/test/testutils/validators.go b/test/testutils/validators.go index a72fa94b..0f51c031 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -439,3 +439,23 @@ func ExpectSpecifiedWorkerStatefulSetsNotCreated(ctx context.Context, k8sClient return true }, Timeout, Interval).Should(gomega.Equal(true)) } + +func ExpectCurrentRevisionToEqualUpdateRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { + gomega.Eventually(func() bool { + var fetchedLWS leaderworkerset.LeaderWorkerSet + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLWS); err != nil { + return false + } + return fetchedLWS.Status.CurrentRevision == fetchedLWS.Status.UpdateRevision + }, Timeout, Interval).Should(gomega.Equal(true)) +} + +func ExpectCurrentRevisionToNotEqualUpdateRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { + gomega.Eventually(func() bool { + var fetchedLWS leaderworkerset.LeaderWorkerSet + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLWS); err != nil { + return false + } + return fetchedLWS.Status.CurrentRevision != fetchedLWS.Status.UpdateRevision + }, Timeout, Interval).Should(gomega.Equal(true)) +} diff --git a/test/testutils/wrappers.go b/test/testutils/wrappers.go index f10852d7..407e7863 100644 --- a/test/testutils/wrappers.go +++ b/test/testutils/wrappers.go @@ -15,12 +15,17 @@ limitations under the License. package testutils import ( + "bytes" + "encoding/json" "fmt" "strconv" + "testing" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -289,3 +294,28 @@ func MakeLeaderPodSpecWithTPUResource() corev1.PodSpec { Subdomain: "default", } } + +func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet, t *testing.T) runtime.RawExtension { + str := &bytes.Buffer{} + err := unstructured.UnstructuredJSONScheme.Encode(lws, str) + if err != nil { + t.Fatal(err) + } + var raw map[string]interface{} + err = json.Unmarshal(str.Bytes(), &raw) + if err != nil { + t.Fatal(err) + } + objCopy := make(map[string]interface{}) + specCopy := make(map[string]interface{}) + spec := raw["spec"].(map[string]interface{}) + template := spec["leaderWorkerTemplate"].(map[string]interface{}) + specCopy["leaderWorkerTemplate"] = template + template["$patch"] = "replace" + objCopy["spec"] = specCopy + patch, err := json.Marshal(objCopy) + if err != nil { + t.Fatal(err) + } + return runtime.RawExtension{Raw: patch} +} From 5a49f88f5f116e386ef048b9e1d4bf6941bca616 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 6 Dec 2024 18:50:43 +0000 Subject: [PATCH 06/27] cleanup and added integration test for collisionCount --- .../v1/leaderworkerset_types.go | 2 +- ...erworkerset.x-k8s.io_leaderworkersets.yaml | 2 +- pkg/controllers/leaderworkerset_controller.go | 2 +- pkg/controllers/pod_controller.go | 2 +- pkg/controllers/pod_controller_test.go | 2 +- pkg/history/controller_history_test.go | 12 ++-- pkg/utils/controller/controller_utils.go | 4 +- .../controllers/leaderworkerset_test.go | 49 +++++++++++++++ test/testutils/util.go | 63 +++++++++++++++++++ test/testutils/validators.go | 22 +++++-- test/testutils/wrappers.go | 9 ++- 11 files changed, 145 insertions(+), 24 deletions(-) diff --git a/api/leaderworkerset/v1/leaderworkerset_types.go b/api/leaderworkerset/v1/leaderworkerset_types.go index 2f939cab..9224aad5 100644 --- a/api/leaderworkerset/v1/leaderworkerset_types.go +++ b/api/leaderworkerset/v1/leaderworkerset_types.go @@ -312,7 +312,7 @@ type LeaderWorkerSetStatus struct { // used to generate the worker pods in sequence [replicas-updatedReplicas,replicas) UpdateRevision string `json:"updateRevision,omitempty"` - // collisionCount is the count of hash collisions for the StatefulSet. The StatefulSet controller + // collisionCount is the count of hash collisions for lws. The lws controller // uses this field as a collision avoidance mechanism when it needs to create the name for the // newest ControllerRevision. // +optional diff --git a/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml b/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml index c313fe46..518fe6a4 100644 --- a/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml +++ b/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml @@ -16206,7 +16206,7 @@ spec: properties: collisionCount: description: |- - collisionCount is the count of hash collisions for the StatefulSet. The StatefulSet controller + collisionCount is the count of hash collisions for lws. The lws controller uses this field as a collision avoidance mechanism when it needs to create the name for the newest ControllerRevision. format: int32 diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index af3c0dac..e7af7720 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -101,7 +101,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ currentRevision, updateRevision, collisionCount, err := controllerutils.GetLeaderWorkerSetRevisions(ctx, r.Client, lws) if err != nil { - log.Error(err, "Getting StatefulSet revisions") + log.Error(err, "Getting lws revisions") return ctrl.Result{}, err } lws.Status.CurrentRevision = currentRevision.Name diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index a851fe6e..a6d5b193 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -121,7 +121,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R } currentRevision, _, _, err := controllerutils.GetLeaderWorkerSetRevisions(ctx, r.Client, &leaderWorkerSet) if err != nil { - log.Error(err, "Getting StatefulSet revisions") + log.Error(err, "Getting lws revisions") return ctrl.Result{}, err } statefulSet, err := constructWorkerStatefulSetApplyConfiguration(pod, leaderWorkerSet, currentRevision) diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index 034e553b..b885b72e 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -39,7 +39,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { updateTemplateHash := utils.LeaderWorkerTemplateHash(lws) lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" lws.Status.CollisionCount = new(int32) - currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 1, lws.Status.CollisionCount) + currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1, lws.Status.CollisionCount) if err != nil { t.Fatal(err) } diff --git a/pkg/history/controller_history_test.go b/pkg/history/controller_history_test.go index ce352d75..1b7a71b2 100644 --- a/pkg/history/controller_history_test.go +++ b/pkg/history/controller_history_test.go @@ -32,18 +32,18 @@ func TestFindEqualRevisions(t *testing.T) { lws1 := testutils.BuildLeaderWorkerSet("test-sample").Obj() lws2 := testutils.BuildLeaderWorkerSet("test-sample").LeaderTemplateSpec(testutils.MakeLeaderPodSpecWithTPUResource()).Obj() - lws1Revision, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1, t), 1, lws1.Status.CollisionCount) + lws1Revision, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1, lws1.Status.CollisionCount) if err != nil { t.Fatal(err) } - lws2Revision, err := NewControllerRevision(lws2, parentKind, lws2.Labels, testutils.RawLWSTemplate(lws2, t), 1, lws2.Status.CollisionCount) + lws2Revision, err := NewControllerRevision(lws2, parentKind, lws2.Labels, testutils.RawLWSTemplate(lws2), 1, lws2.Status.CollisionCount) if err != nil { t.Fatal(err) } lws1.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" - lws1Revision2, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1, t), 1, lws1.Status.CollisionCount) + lws1Revision2, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1, lws1.Status.CollisionCount) if err != nil { t.Fatal(err) } @@ -91,15 +91,15 @@ func TestFindEqualRevisions(t *testing.T) { func TestSortControllerRevisions(t *testing.T) { lws := testutils.BuildLeaderWorkerSet("test-sample").Obj() - lwsRevision1, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 1, lws.Status.CollisionCount) + lwsRevision1, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1, lws.Status.CollisionCount) if err != nil { t.Fatal(err) } - lwsRevision2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 2, lws.Status.CollisionCount) + lwsRevision2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 2, lws.Status.CollisionCount) if err != nil { t.Fatal(err) } - lwsRevision1Time2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws, t), 1, lws.Status.CollisionCount) + lwsRevision1Time2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1, lws.Status.CollisionCount) if err != nil { t.Fatal(err) } diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index e894a1e1..38e2f8b5 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -98,8 +98,8 @@ func GetLeaderWorkerSetRevisions( revisionCount := len(revisions) history.SortControllerRevisions(revisions) - // Use a local copy of set.Status.CollisionCount to avoid modifying set.Status directly. - // This copy is returned so the value gets carried over to set.Status in updateStatefulSet. + // Use a local copy of lws.Status.CollisionCount to avoid modifying lws.Status directly. + // This copy is returned so the value gets carried over to lws reconcile. var collisionCount int32 if lws.Status.CollisionCount != nil { collisionCount = *lws.Status.CollisionCount diff --git a/test/integration/controllers/leaderworkerset_test.go b/test/integration/controllers/leaderworkerset_test.go index db3039b6..f93b9ba5 100644 --- a/test/integration/controllers/leaderworkerset_test.go +++ b/test/integration/controllers/leaderworkerset_test.go @@ -1587,6 +1587,55 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { }, }, }), + ginkgo.Entry("lws can create controllerRevision even if there is a hash collision", &testCase{ + makeLeaderWorkerSet: testing.BuildLeaderWorkerSet, + updates: []*update{ + { + lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) { + testing.SetPodGroupsToReady(ctx, k8sClient, lws, 2) + lws.Status.CollisionCount = new(int32) + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "new-worker-name" + // Create a controller revision that will cause a collision when worker is updated + testing.CreateControllerRevisionForHashCollision(ctx, k8sClient, lws) + }, + checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) { + testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") + testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) + testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) + testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) + }, + }, + { + lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) { + testing.UpdateWorkerTemplate(ctx, k8sClient, lws) + + }, + checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) { + testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) + testing.ExpectLeaderWorkerSetUnavailable(ctx, k8sClient, lws, "All replicas are ready") + testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") + testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") + testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) + testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 0) + testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) + testing.ExpectCollisionCountEqualTo(ctx, k8sClient, lws, 1) + }, + }, + { + // Set all groups to ready. + lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) { + testing.SetPodGroupsToReady(ctx, k8sClient, lws, 2) + }, + checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) { + testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") + testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") + testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") + testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) + }, + }, + }, + }), ginkgo.Entry("create a leaderworkerset with spec.startupPolicy=LeaderReady", &testCase{ makeLeaderWorkerSet: func(nsName string) *testing.LeaderWorkerSetWrapper { return testing.BuildLeaderWorkerSet(nsName).Replica(4).StartupPolicy(leaderworkerset.LeaderReadyStartupPolicy) diff --git a/test/testutils/util.go b/test/testutils/util.go index c2ec3b41..6c8fe582 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -18,14 +18,18 @@ import ( "context" "errors" "fmt" + "hash" + "hash/fnv" "strconv" + "github.com/davecgh/go-spew/spew" "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/rand" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" @@ -540,6 +544,33 @@ func SetLeaderPodsToReady(ctx context.Context, k8sClient client.Client, lws *lea }, Timeout, Interval).Should(gomega.Succeed()) } +func CreateControllerRevisionForHashCollision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { + parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") + controllerRevisionHashLabel := "controller.kubernetes.io/hash" + labels := lws.Labels + if lws.Labels == nil { + labels = make(map[string]string) + } + cr := &appsv1.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(lws, parentKind)}, + Namespace: lws.GetNamespace(), + }, + Data: RawLWSTemplate(lws), + Revision: 1, + } + hash := hashControllerRevision(cr, lws.Status.CollisionCount) + cr.Name = controllerRevisionName(lws.GetName(), hash) + cr.Labels[controllerRevisionHashLabel] = hash + // Change the lws that is used for the data, This create a controller revision + // with same name but different contents, triggering a hash collision + modifiedLws := lws.DeepCopy() + modifiedLws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "hash-collision" + cr.Data = RawLWSTemplate(modifiedLws) + gomega.Expect(k8sClient.Create(ctx, cr)).Should(gomega.Succeed()) +} + func deleteWorkerStatefulSetIfExists(ctx context.Context, k8sClient client.Client, statefulsetName string, lws *leaderworkerset.LeaderWorkerSet) { // in cases where size = 1, the workerstatefulset does not exist gomega.Eventually(func() error { @@ -553,3 +584,35 @@ func deleteWorkerStatefulSetIfExists(ctx context.Context, k8sClient client.Clien return k8sClient.Delete(ctx, &sts) }, Timeout, Interval).Should(gomega.Succeed()) } + +func hashControllerRevision(revision *appsv1.ControllerRevision, probe *int32) string { + hf := fnv.New32() + if len(revision.Data.Raw) > 0 { + hf.Write(revision.Data.Raw) + } + if revision.Data.Object != nil { + deepHashObject(hf, revision.Data.Object) + } + if probe != nil { + hf.Write([]byte(strconv.FormatInt(int64(*probe), 10))) + } + return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) +} + +func controllerRevisionName(prefix string, hash string) string { + return fmt.Sprintf("%s-%s", prefix, hash) +} + +func deepHashObject(hasher hash.Hash, objectToWrite interface{}) { + hasher.Reset() + printer := spew.ConfigState{ + Indent: " ", + SortKeys: true, + DisableMethods: true, + SpewKeys: true, + } + _, err := printer.Fprintf(hasher, "%#v", objectToWrite) + if err != nil { + return + } +} diff --git a/test/testutils/validators.go b/test/testutils/validators.go index 0f51c031..9b29e72d 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -442,20 +442,30 @@ func ExpectSpecifiedWorkerStatefulSetsNotCreated(ctx context.Context, k8sClient func ExpectCurrentRevisionToEqualUpdateRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { gomega.Eventually(func() bool { - var fetchedLWS leaderworkerset.LeaderWorkerSet - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLWS); err != nil { + var fetchedLws leaderworkerset.LeaderWorkerSet + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLws); err != nil { return false } - return fetchedLWS.Status.CurrentRevision == fetchedLWS.Status.UpdateRevision + return fetchedLws.Status.CurrentRevision == fetchedLws.Status.UpdateRevision }, Timeout, Interval).Should(gomega.Equal(true)) } func ExpectCurrentRevisionToNotEqualUpdateRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { gomega.Eventually(func() bool { - var fetchedLWS leaderworkerset.LeaderWorkerSet - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLWS); err != nil { + var fetchedLws leaderworkerset.LeaderWorkerSet + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLws); err != nil { return false } - return fetchedLWS.Status.CurrentRevision != fetchedLWS.Status.UpdateRevision + return fetchedLws.Status.CurrentRevision != fetchedLws.Status.UpdateRevision }, Timeout, Interval).Should(gomega.Equal(true)) } + +func ExpectCollisionCountEqualTo(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, collisionCount int32) { + gomega.Eventually(func() int32 { + var fetchedLws leaderworkerset.LeaderWorkerSet + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLws); err != nil { + return -1 + } + return *fetchedLws.Status.CollisionCount + }, Timeout, Interval).Should(gomega.Equal(collisionCount)) +} diff --git a/test/testutils/wrappers.go b/test/testutils/wrappers.go index 407e7863..8bb4dbcd 100644 --- a/test/testutils/wrappers.go +++ b/test/testutils/wrappers.go @@ -19,7 +19,6 @@ import ( "encoding/json" "fmt" "strconv" - "testing" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -295,16 +294,16 @@ func MakeLeaderPodSpecWithTPUResource() corev1.PodSpec { } } -func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet, t *testing.T) runtime.RawExtension { +func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet) runtime.RawExtension { str := &bytes.Buffer{} err := unstructured.UnstructuredJSONScheme.Encode(lws, str) if err != nil { - t.Fatal(err) + panic(err) } var raw map[string]interface{} err = json.Unmarshal(str.Bytes(), &raw) if err != nil { - t.Fatal(err) + panic(err) } objCopy := make(map[string]interface{}) specCopy := make(map[string]interface{}) @@ -315,7 +314,7 @@ func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet, t *testing.T) runtime. objCopy["spec"] = specCopy patch, err := json.Marshal(objCopy) if err != nil { - t.Fatal(err) + panic(err) } return runtime.RawExtension{Raw: patch} } From f28995ac8c1b883c4fbdc7f10e80c370e17dcabb Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 6 Dec 2024 19:21:27 +0000 Subject: [PATCH 07/27] fix lint --- test/e2e/e2e_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index bf214971..474eda2e 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -319,7 +319,7 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { // use the original lws object instead of newest, since we are comparing with the old worker template podTemplateSpec := *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() if sts.Spec.Template.Spec.Containers[0].Name != podTemplateSpec.Spec.Containers[0].Name { - return fmt.Errorf("StatefulSet did not have the expected container name: " + sts.Spec.Template.Spec.Containers[0].Name) + return fmt.Errorf("StatefulSet did not have the expected container name") } return nil }, timeout, interval).Should(gomega.Succeed()) From 1f5adfae97dd4d9c7d063b293cde0f1f2b20706e Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Mon, 9 Dec 2024 19:34:08 +0000 Subject: [PATCH 08/27] changed from string to const on labels --- pkg/controllers/pod_controller_test.go | 128 ++++++++++++------------- test/testutils/util.go | 2 +- 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index b885b72e..78aaf7c0 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -60,11 +60,11 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: "test-sample", Namespace: "default", Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.WorkerIndexLabelKey: "0", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, }, @@ -81,28 +81,28 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ Replicas: ptr.To[int32](0), Selector: &metaapplyv1.LabelSelectorApplyConfiguration{ MatchLabels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", @@ -134,11 +134,11 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: "test-sample", Namespace: "default", Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.WorkerIndexLabelKey: "0", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, }, @@ -157,28 +157,28 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ Replicas: ptr.To[int32](1), Selector: &metaapplyv1.LabelSelectorApplyConfiguration{ MatchLabels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -211,11 +211,11 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: "test-sample", Namespace: "default", Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.WorkerIndexLabelKey: "0", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, }, }, }, @@ -234,28 +234,28 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ Replicas: ptr.To[int32](1), Selector: &metaapplyv1.LabelSelectorApplyConfiguration{ MatchLabels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -289,11 +289,11 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: "test-sample", Namespace: "default", Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + leaderworkerset.WorkerIndexLabelKey: "0", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, }, }, }, @@ -310,28 +310,28 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Name: ptr.To[string]("test-sample"), Namespace: ptr.To[string]("default"), Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ Replicas: ptr.To[int32](0), Selector: &metaapplyv1.LabelSelectorApplyConfiguration{ MatchLabels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ Labels: map[string]string{ - "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/group-index": "1", - "leaderworkerset.sigs.k8s.io/group-key": "test-key", - leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + leaderworkerset.SetNameLabelKey: "test-sample", + leaderworkerset.GroupIndexLabelKey: "1", + leaderworkerset.GroupUniqueHashLabelKey: "test-key", + leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", diff --git a/test/testutils/util.go b/test/testutils/util.go index 6c8fe582..0b18f81f 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -563,7 +563,7 @@ func CreateControllerRevisionForHashCollision(ctx context.Context, k8sClient cli hash := hashControllerRevision(cr, lws.Status.CollisionCount) cr.Name = controllerRevisionName(lws.GetName(), hash) cr.Labels[controllerRevisionHashLabel] = hash - // Change the lws that is used for the data, This create a controller revision + // Change the lws that is used for the data, This creates a controller revision // with same name but different contents, triggering a hash collision modifiedLws := lws.DeepCopy() modifiedLws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "hash-collision" From 1b978cf62a161c0cf54ca9032da5caffeac1840d Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Mon, 16 Dec 2024 16:27:20 +0000 Subject: [PATCH 09/27] changed revision logic based on updated design --- pkg/controllers/leaderworkerset_controller.go | 107 ++++++++--- .../leaderworkerset_controller_test.go | 2 +- pkg/controllers/pod_controller.go | 15 +- pkg/history/controller_history.go | 8 +- pkg/utils/controller/controller_utils.go | 172 +++++++++--------- pkg/utils/controller/controller_utils_test.go | 4 +- pkg/utils/utils.go | 16 ++ test/testutils/util.go | 2 +- 8 files changed, 204 insertions(+), 122 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index e7af7720..08a61aa7 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -99,16 +99,6 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - currentRevision, updateRevision, collisionCount, err := controllerutils.GetLeaderWorkerSetRevisions(ctx, r.Client, lws) - if err != nil { - log.Error(err, "Getting lws revisions") - return ctrl.Result{}, err - } - lws.Status.CurrentRevision = currentRevision.Name - lws.Status.UpdateRevision = updateRevision.Name - lws.Status.CollisionCount = new(int32) - lws.Status.CollisionCount = &collisionCount - partition, replicas, err := r.rollingUpdateParameters(ctx, lws) if err != nil { log.Error(err, "Rolling partition error") @@ -204,19 +194,31 @@ func SetupIndexes(indexer client.FieldIndexer) error { // - One exception here is when unready replicas of leaderWorkerSet is equal to MaxSurge, // we should reclaim the extra replicas gradually to accommodate for the new replicas. func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet) (int32, int32, error) { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) lwsReplicas := *lws.Spec.Replicas - sts := &appsv1.StatefulSet{} - err := r.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, sts) + stsExists, sts, err := stsCreated(ctx, r.Client, lws) + if err != nil { + return 0, 0, err + } + + if !stsExists { + return 0, lwsReplicas, nil + } + + existingControllerRevisions, err := controllerutils.ExistingControllerRevisions(ctx, r.Client, lws) if err != nil { - // Case 1: - // If sts not created yet, all partitions should be updated, - // replicas should not change. - if apierrors.IsNotFound(err) { - return 0, lwsReplicas, nil - } return 0, 0, err } + if !existingControllerRevisions { + // Updating from version that did not support Controller Revision. Need to create one first before checking if template has been updated + log.V(2).Info(fmt.Sprintf("Creating new controller revision create/update operation for %+v ", lws)) + if err := controllerutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]); err != nil { + return 0, 0, nil + } + return 0, lwsReplicas, nil + } stsReplicas := *sts.Spec.Replicas maxSurge, err := intstr.GetValueFromIntOrPercent(&lws.Spec.RolloutStrategy.RollingUpdateConfiguration.MaxSurge, int(lwsReplicas), true) @@ -242,7 +244,11 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, // Case 2: // Indicates a new rolling update here. - if templateUpdated(sts, lws) { + hasTemplateUdated, err := templateUpdated(ctx, r.Client, sts, lws) + if err != nil { + return 0, 0, err + } + if hasTemplateUdated { // Processing scaling up/down first prior to rolling update. return min(lwsReplicas, stsReplicas), wantReplicas(lwsReplicas), nil } @@ -290,8 +296,32 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32) error { log := ctrl.LoggerFrom(ctx) + // templateHash is not a reliable way to determine whether or not an lws object has been updated as seen in + // https://github.com/kubernetes-sigs/lws/issues/281 + // If a leader sts already exists, but the template has not been updated, the templateHash of the leader is + // used to keep consistency in cases where two different templateHashes are calculated from the same LWS object + stsExists, sts, err := stsCreated(ctx, r.Client, lws) + if err != nil { + return err + } + templateHash := utils.LeaderWorkerTemplateHash(lws) + if stsExists { + templateUpdated, err := templateUpdated(ctx, r.Client, sts, lws) + if err != nil { + return err + } + if !templateUpdated { + templateHash = sts.Labels[leaderworkerset.TemplateRevisionHashKey] + } + } + + if err = controllerutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, templateHash); err != nil { + log.Error(err, "Creating LWS Revision") + return err + } + // construct the statefulset apply configuration - leaderStatefulSetApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(lws, partition, replicas) + leaderStatefulSetApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(lws, partition, replicas, templateHash) if err != nil { log.Error(err, "Constructing StatefulSet apply configuration.") return err @@ -400,7 +430,6 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) - lws.Status.CurrentRevision = lws.Status.UpdateRevision } else { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetProgressing)) } @@ -538,7 +567,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le } // constructLeaderStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet -func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32) (*appsapplyv1.StatefulSetApplyConfiguration, error) { +func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, templateHash string) (*appsapplyv1.StatefulSetApplyConfiguration, error) { var podTemplateSpec corev1.PodTemplateSpec if lws.Spec.LeaderWorkerTemplate.LeaderTemplate != nil { podTemplateSpec = *lws.Spec.LeaderWorkerTemplate.LeaderTemplate.DeepCopy() @@ -556,7 +585,6 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor return nil, err } - templateHash := utils.LeaderWorkerTemplateHash(lws) podTemplateApplyConfiguration.WithLabels(map[string]string{ leaderworkerset.WorkerIndexLabelKey: "0", leaderworkerset.SetNameLabelKey: lws.Name, @@ -689,6 +717,35 @@ func exclusiveConditionTypes(condition1 metav1.Condition, condition2 metav1.Cond return false } -func templateUpdated(sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) bool { - return sts.Labels[leaderworkerset.TemplateRevisionHashKey] != utils.LeaderWorkerTemplateHash(lws) +func templateUpdated(ctx context.Context, k8sClient client.Client, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerRevision, err := controllerutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, k8sClient, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) + if err != nil { + return false, err + } + + baselineLws, err := controllerutils.ApplyRevision(lws, controllerRevision) + if err != nil { + return false, err + } + log.V(2).Info(fmt.Sprintf("comparing networkConfig %s, with %s", string(*lws.Spec.NetworkConfig.SubdomainPolicy), string(*baselineLws.Spec.NetworkConfig.SubdomainPolicy))) + log.V(2).Info(fmt.Sprintf("Fetching controller revision with hash %s", sts.Labels[leaderworkerset.TemplateRevisionHashKey])) + return !utils.EqualLeaderWorkerTemplates(baselineLws, lws), nil +} + +func stsCreated(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (bool, *appsv1.StatefulSet, error) { + sts := &appsv1.StatefulSet{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, sts) + if err != nil { + // Case 1: + // If sts not created yet, all partitions should be updated, + // replicas should not change. + if apierrors.IsNotFound(err) { + return false, nil, nil + } + return false, nil, err + } + + return true, sts, nil } diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index 84082843..c38ab0bc 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -383,7 +383,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - stsApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(tc.lws, 0, *tc.lws.Spec.Replicas) + stsApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(tc.lws, 0, *tc.lws.Spec.Replicas, "") if err != nil { t.Errorf("failed with error: %s", err.Error()) } diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index a6d5b193..77e03c0d 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -38,7 +38,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/utils" acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" controllerutils "sigs.k8s.io/lws/pkg/utils/controller" podutils "sigs.k8s.io/lws/pkg/utils/pod" @@ -119,7 +118,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - currentRevision, _, _, err := controllerutils.GetLeaderWorkerSetRevisions(ctx, r.Client, &leaderWorkerSet) + currentRevision, err := controllerutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { log.Error(err, "Getting lws revisions") return ctrl.Result{}, err @@ -265,15 +264,11 @@ func setControllerReferenceWithStatefulSet(owner metav1.Object, sts *appsapplyv1 // constructWorkerStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet func constructWorkerStatefulSetApplyConfiguration(leaderPod corev1.Pod, lws leaderworkerset.LeaderWorkerSet, currentRevision *appsv1.ControllerRevision) (*appsapplyv1.StatefulSetApplyConfiguration, error) { - updatedTemplateHash := utils.LeaderWorkerTemplateHash(&lws) - podTemplateSpec := *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() - if updatedTemplateHash != leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] { - originalLws, err := controllerutils.ApplyRevision(&lws, currentRevision) - if err != nil { - return nil, err - } - podTemplateSpec = *originalLws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() + currentLws, err := controllerutils.ApplyRevision(&lws, currentRevision) + if err != nil { + return nil, err } + podTemplateSpec := *currentLws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() // construct pod template spec configuration obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&podTemplateSpec) if err != nil { diff --git a/pkg/history/controller_history.go b/pkg/history/controller_history.go index f4b207eb..ec7d91d4 100644 --- a/pkg/history/controller_history.go +++ b/pkg/history/controller_history.go @@ -29,6 +29,7 @@ import ( "github.com/davecgh/go-spew/spew" appsv1 "k8s.io/api/apps/v1" "sigs.k8s.io/controller-runtime/pkg/client" + leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" @@ -110,6 +111,11 @@ func SortControllerRevisions(revisions []*appsv1.ControllerRevision) { // contain semantically equivalent data. Otherwise this method returns false. func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { var lhsHash, rhsHash *uint32 + + if lhs.Labels[leaderworkerset.TemplateRevisionHashKey] == rhs.Labels[leaderworkerset.TemplateRevisionHashKey] { + return true + } + if lhs == nil || rhs == nil { return lhs == rhs } @@ -202,7 +208,7 @@ type Interface interface { // NewHistory returns an instance of Interface that uses client to communicate with the API Server and lister to list // ControllerRevisions. This method should be used to create an Interface for all scenarios other than testing. -func NewHistory(k8sclient client.Client, context context.Context) Interface { +func NewHistory(context context.Context, k8sclient client.Client) Interface { return &realHistory{k8sclient, context} } diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index 38e2f8b5..995177d5 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -20,6 +20,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -72,95 +73,60 @@ func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Clie return nil } -// GetLeaderWorkerSetRevisions returns the current and update ControllerRevisions for leaderWorkerSet. It also -// returns a collision count that records the number of name collisions set saw when creating -// new ControllerRevisions. This count is incremented on every name collision and is used in -// building the ControllerRevision names for name collision avoidance. This method may create -// a new revision, or modify the Revision of an existing revision if an update to set is detected. -// This method expects that revisions is sorted when supplied. -func GetLeaderWorkerSetRevisions( - ctx context.Context, - k8sClient client.Client, - lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, *appsv1.ControllerRevision, int32, error) { - var currentRevision *appsv1.ControllerRevision +func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(k8sClient, ctx) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.TemplateRevisionHashKey: templateHash, + }}) if err != nil { - return nil, nil, int32(-1), err + return nil, err } revisions, err := controllerHistory.ListControllerRevisions(lws, selector) if err != nil { log.Error(err, "Listing all controller revisions") - return nil, nil, int32(-1), err + return nil, err } - revisionCount := len(revisions) - history.SortControllerRevisions(revisions) - // Use a local copy of lws.Status.CollisionCount to avoid modifying lws.Status directly. - // This copy is returned so the value gets carried over to lws reconcile. - var collisionCount int32 - if lws.Status.CollisionCount != nil { - collisionCount = *lws.Status.CollisionCount + if len(revisions) == 0 { + return nil, fmt.Errorf("could not find LWS revision based on %s", templateHash) } - // create a new revision from the current set - updateRevision, err := NewRevision(lws, NextRevision(revisions), &collisionCount) - if err != nil { - log.Error(err, "Creating new revision for lws") - return nil, nil, collisionCount, err + if len(revisions) > 1 { + // Since we only create a controllerRevision when the template hash changes, only one should match + return nil, fmt.Errorf("found more than one revision matching templateHash %s", templateHash) } - // find any equivalent revisions - equalRevisions := history.FindEqualRevisions(revisions, updateRevision) - equalCount := len(equalRevisions) - - if equalCount > 0 && history.EqualRevision(revisions[revisionCount-1], equalRevisions[equalCount-1]) { - // if the equivalent revision is immediately prior the update revision has not changed - updateRevision = revisions[revisionCount-1] - } else if equalCount > 0 { - // if the equivalent revision is not immediately prior we will roll back by incrementing the - // Revision of the equivalent revision - updateRevision, err = controllerHistory.UpdateControllerRevision( - equalRevisions[equalCount-1], - updateRevision.Revision) - if err != nil { - log.Error(err, "updating controller revision") - return nil, nil, collisionCount, err - } - } else { - //if there is no equivalent revision we create a new one - updateRevision, err = controllerHistory.CreateControllerRevision(lws, updateRevision, &collisionCount) - if err != nil { - log.Error(err, "Creating new controller revision for lws") - return nil, nil, collisionCount, err - } - } + return revisions[0], nil +} - // attempt to find the revision that corresponds to the current revision - for i := range revisions { - if revisions[i].Name == lws.Status.CurrentRevision { - currentRevision = revisions[i] - break - } +func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) + if err != nil { + return false, err } - - // if the current revision is nil we initialize the history by setting it to the update revision - if currentRevision == nil { - currentRevision = updateRevision + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + return false, err } - - return currentRevision, updateRevision, collisionCount, nil + return len(revisions) > 0, nil } // getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a // previous version. If the returned error is nil the patch is valid. The current state that we save is the // leaderWorkerTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously // recorded patches. + func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { str := &bytes.Buffer{} - err := unstructured.UnstructuredJSONScheme.Encode(lws, str) + clone := lws.DeepCopy() + // ResourceVersion will always be different even if the underlying LWS object is the same. + clone.ResourceVersion = "" + err := unstructured.UnstructuredJSONScheme.Encode(clone, str) if err != nil { return nil, err } @@ -169,36 +135,78 @@ func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { if err != nil { return nil, err } - objCopy := make(map[string]interface{}) - specCopy := make(map[string]interface{}) - spec := raw["spec"].(map[string]interface{}) - template := spec["leaderWorkerTemplate"].(map[string]interface{}) - specCopy["leaderWorkerTemplate"] = template - template["$patch"] = "replace" - objCopy["spec"] = specCopy - patch, err := json.Marshal(objCopy) + patch, err := json.Marshal(raw) return patch, err } +func CreateLeaderWorkerSetRevision( + ctx context.Context, + k8sClient client.Client, + lws *leaderworkerset.LeaderWorkerSet, + templateHash string) error { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) + if err != nil { + return err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + log.Error(err, "Listing all controller revisions") + return err + } + revisionCount := len(revisions) + history.SortControllerRevisions(revisions) + + currentRevision, err := NewRevision(lws, NextRevision(revisions), new(int32), templateHash) + if err != nil { + log.Error(err, "Creating new revision for lws") + return err + } + + equalRevisions := history.FindEqualRevisions(revisions, currentRevision) + equalCount := len(equalRevisions) + log.V(2).Info(fmt.Sprintf("found %d equal revisions", equalCount)) + if len(equalRevisions) > 0 && history.EqualRevision(revisions[revisionCount-1], equalRevisions[equalCount-1]) { + return nil + } + + if len(equalRevisions) > 0 { + // if the equivalent revision is not immediately prior we will roll back by incrementing the + // Revision of the equivalent revision + _, err = controllerHistory.UpdateControllerRevision( + equalRevisions[equalCount-1], + currentRevision.Revision) + if err != nil { + log.Error(err, "updating controller revision") + return nil + } + return nil + } + + _, err = controllerHistory.CreateControllerRevision(lws, currentRevision, new(int32)) + log.V(2).Info("Created new controller revision") + if err != nil { + log.Error(err, "Creating new controller revision for lws") + return err + } + + return nil +} + // newRevision creates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned // ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set // to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. -func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collisionCount *int32) (*appsv1.ControllerRevision, error) { +func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collisionCount *int32, templateHash string) (*appsv1.ControllerRevision, error) { patch, err := getPatch(lws) if err != nil { return nil, err } - combinedLabels := make(map[string]string) - for k, v := range lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Labels { - combinedLabels[k] = v - } - for k, v := range lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Labels { - combinedLabels[k] = v - } cr, err := history.NewControllerRevision(lws, controllerKind, - combinedLabels, + map[string]string{leaderworkerset.TemplateRevisionHashKey: templateHash}, runtime.RawExtension{Raw: patch}, revision, collisionCount) diff --git a/pkg/utils/controller/controller_utils_test.go b/pkg/utils/controller/controller_utils_test.go index d833bd14..89df6eff 100644 --- a/pkg/utils/controller/controller_utils_test.go +++ b/pkg/utils/controller/controller_utils_test.go @@ -29,7 +29,7 @@ func TestApplyRevision(t *testing.T) { lws := testutils.BuildLeaderWorkerSet("default").Obj() lws.Status.CollisionCount = new(int32) - revision, err := NewRevision(lws, 1, lws.Status.CollisionCount) + revision, err := NewRevision(lws, 1, lws.Status.CollisionCount, "") currentLws := lws.DeepCopy() if err != nil { t.Fatal(err) @@ -45,7 +45,7 @@ func TestApplyRevision(t *testing.T) { t.Fatal(err) } - restoredRevision, err := NewRevision(restoredLws, 2, restoredLws.Status.CollisionCount) + restoredRevision, err := NewRevision(restoredLws, 2, restoredLws.Status.CollisionCount, "") if err != nil { t.Fatal(err) } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 94abe9af..fc2fbad4 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -20,6 +20,7 @@ import ( "crypto/sha1" "encoding/hex" "os" + "reflect" "strings" appsv1 "k8s.io/api/apps/v1" @@ -46,6 +47,21 @@ func NonZeroValue(value int32) int32 { return value } +func EqualLeaderWorkerTemplates(lhs *leaderworkerset.LeaderWorkerSet, rhs *leaderworkerset.LeaderWorkerSet) bool { + if !reflect.DeepEqual(lhs.Spec.LeaderWorkerTemplate, rhs.Spec.LeaderWorkerTemplate) { + return false + } + if (lhs.Spec.NetworkConfig == nil || string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) && (rhs.Spec.NetworkConfig == nil || string(*rhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) { + return true + } + + if lhs.Spec.NetworkConfig == nil || rhs.Spec.NetworkConfig == nil { + return false + } + + return string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(*rhs.Spec.NetworkConfig.SubdomainPolicy) +} + func LeaderWorkerTemplateHash(lws *leaderworkerset.LeaderWorkerSet) string { if lws.Spec.NetworkConfig == nil || string(*lws.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared) { return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + diff --git a/test/testutils/util.go b/test/testutils/util.go index 0b18f81f..6023a491 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -177,7 +177,7 @@ func ExpectValidPods(ctx context.Context, k8sClient client.Client, lws *leaderwo } if len(podList.Items) != int((*lws.Spec.Replicas)*(*lws.Spec.LeaderWorkerTemplate.Size)) { - return errors.New("pod number not right") + return fmt.Errorf("expected %d pods, got %d", (int((*lws.Spec.Replicas) * (*lws.Spec.LeaderWorkerTemplate.Size))), len(podList.Items)) } var leaderTemplateSpec corev1.PodTemplateSpec From 522f1b09f7f8b15ae29fb84184dbf64e17f2fc65 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 20 Dec 2024 22:06:17 +0000 Subject: [PATCH 10/27] removed status changes, cleaned up tests that referenced them. Implemented TruncateHistory and fixed getPatch --- .../v1/leaderworkerset_types.go | 14 --- .../v1/zz_generated.deepcopy.go | 5 - .../v1/leaderworkersetstatus.go | 27 ----- ...erworkerset.x-k8s.io_leaderworkersets.yaml | 17 --- pkg/controllers/leaderworkerset_controller.go | 10 +- pkg/controllers/pod_controller_test.go | 15 ++- pkg/history/controller_history.go | 67 +++++------ pkg/history/controller_history_test.go | 12 +- pkg/utils/controller/controller_utils.go | 41 +++++-- pkg/utils/controller/controller_utils_test.go | 7 +- test/e2e/e2e_test.go | 2 +- .../controllers/leaderworkerset_test.go | 105 ------------------ test/testutils/util.go | 11 +- test/testutils/validators.go | 30 ----- test/testutils/wrappers.go | 14 +-- 15 files changed, 89 insertions(+), 288 deletions(-) diff --git a/api/leaderworkerset/v1/leaderworkerset_types.go b/api/leaderworkerset/v1/leaderworkerset_types.go index 9224aad5..cfbae2c3 100644 --- a/api/leaderworkerset/v1/leaderworkerset_types.go +++ b/api/leaderworkerset/v1/leaderworkerset_types.go @@ -303,20 +303,6 @@ type LeaderWorkerSetStatus struct { // needed for HPA to know what pods belong to the LeaderWorkerSet object. Here // we only select the leader pods. HPAPodSelector string `json:"hpaPodSelector,omitempty"` - - // currentRevision, if not empty, indicates the version of the worker StatefulSet - // used to generate the worker pods in sequence [0,currentReplicas) - CurrentRevision string `json:"currentRevision,omitempty"` - - // updateRevision, if not empty, indicates the version of the worker StatefulSet - // used to generate the worker pods in sequence [replicas-updatedReplicas,replicas) - UpdateRevision string `json:"updateRevision,omitempty"` - - // collisionCount is the count of hash collisions for lws. The lws controller - // uses this field as a collision avoidance mechanism when it needs to create the name for the - // newest ControllerRevision. - // +optional - CollisionCount *int32 `json:"collisionCount,omitempty"` } type LeaderWorkerSetConditionType string diff --git a/api/leaderworkerset/v1/zz_generated.deepcopy.go b/api/leaderworkerset/v1/zz_generated.deepcopy.go index 6014f924..1baaab4f 100644 --- a/api/leaderworkerset/v1/zz_generated.deepcopy.go +++ b/api/leaderworkerset/v1/zz_generated.deepcopy.go @@ -122,11 +122,6 @@ func (in *LeaderWorkerSetStatus) DeepCopyInto(out *LeaderWorkerSetStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.CollisionCount != nil { - in, out := &in.CollisionCount, &out.CollisionCount - *out = new(int32) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LeaderWorkerSetStatus. diff --git a/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go b/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go index ce81a4c0..42f68aed 100644 --- a/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go +++ b/client-go/applyconfiguration/leaderworkerset/v1/leaderworkersetstatus.go @@ -29,9 +29,6 @@ type LeaderWorkerSetStatusApplyConfiguration struct { UpdatedReplicas *int32 `json:"updatedReplicas,omitempty"` Replicas *int32 `json:"replicas,omitempty"` HPAPodSelector *string `json:"hpaPodSelector,omitempty"` - CurrentRevision *string `json:"currentRevision,omitempty"` - UpdateRevision *string `json:"updateRevision,omitempty"` - CollisionCount *int32 `json:"collisionCount,omitempty"` } // LeaderWorkerSetStatusApplyConfiguration constructs a declarative configuration of the LeaderWorkerSetStatus type for use with @@ -84,27 +81,3 @@ func (b *LeaderWorkerSetStatusApplyConfiguration) WithHPAPodSelector(value strin b.HPAPodSelector = &value return b } - -// WithCurrentRevision sets the CurrentRevision field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CurrentRevision field is set to the value of the last call. -func (b *LeaderWorkerSetStatusApplyConfiguration) WithCurrentRevision(value string) *LeaderWorkerSetStatusApplyConfiguration { - b.CurrentRevision = &value - return b -} - -// WithUpdateRevision sets the UpdateRevision field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UpdateRevision field is set to the value of the last call. -func (b *LeaderWorkerSetStatusApplyConfiguration) WithUpdateRevision(value string) *LeaderWorkerSetStatusApplyConfiguration { - b.UpdateRevision = &value - return b -} - -// WithCollisionCount sets the CollisionCount field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CollisionCount field is set to the value of the last call. -func (b *LeaderWorkerSetStatusApplyConfiguration) WithCollisionCount(value int32) *LeaderWorkerSetStatusApplyConfiguration { - b.CollisionCount = &value - return b -} diff --git a/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml b/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml index 518fe6a4..1feb3171 100644 --- a/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml +++ b/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml @@ -16204,13 +16204,6 @@ spec: status: description: LeaderWorkerSetStatus defines the observed state of LeaderWorkerSet properties: - collisionCount: - description: |- - collisionCount is the count of hash collisions for lws. The lws controller - uses this field as a collision avoidance mechanism when it needs to create the name for the - newest ControllerRevision. - format: int32 - type: integer conditions: description: Conditions track the condition of the leaderworkerset. items: @@ -16268,11 +16261,6 @@ spec: - type type: object type: array - currentRevision: - description: |- - currentRevision, if not empty, indicates the version of the worker StatefulSet - used to generate the worker pods in sequence [0,currentReplicas) - type: string hpaPodSelector: description: |- HPAPodSelector for pods that belong to the LeaderWorkerSet object, this is @@ -16289,11 +16277,6 @@ spec: created (updated or not, ready or not) format: int32 type: integer - updateRevision: - description: |- - updateRevision, if not empty, indicates the version of the worker StatefulSet - used to generate the worker pods in sequence [replicas-updatedReplicas,replicas) - type: string updatedReplicas: description: UpdatedReplicas track the number of groups that have been updated (ready or not). diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 08a61aa7..5a6c82fa 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -198,6 +198,9 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, ctx = ctrl.LoggerInto(ctx, log) lwsReplicas := *lws.Spec.Replicas + // Case 1: + // If sts not created yet, all partitions should be updated, + // replicas should not change. stsExists, sts, err := stsCreated(ctx, r.Client, lws) if err != nil { return 0, 0, err @@ -211,6 +214,7 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, if err != nil { return 0, 0, err } + if !existingControllerRevisions { // Updating from version that did not support Controller Revision. Need to create one first before checking if template has been updated log.V(2).Info(fmt.Sprintf("Creating new controller revision create/update operation for %+v ", lws)) @@ -430,6 +434,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) + controllerutils.TruncateHistory(ctx, r.Client, lws, templateHash) } else { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetProgressing)) } @@ -729,8 +734,6 @@ func templateUpdated(ctx context.Context, k8sClient client.Client, sts *appsv1.S if err != nil { return false, err } - log.V(2).Info(fmt.Sprintf("comparing networkConfig %s, with %s", string(*lws.Spec.NetworkConfig.SubdomainPolicy), string(*baselineLws.Spec.NetworkConfig.SubdomainPolicy))) - log.V(2).Info(fmt.Sprintf("Fetching controller revision with hash %s", sts.Labels[leaderworkerset.TemplateRevisionHashKey])) return !utils.EqualLeaderWorkerTemplates(baselineLws, lws), nil } @@ -738,9 +741,6 @@ func stsCreated(ctx context.Context, k8sClient client.Client, lws *leaderworkers sts := &appsv1.StatefulSet{} err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, sts) if err != nil { - // Case 1: - // If sts not created yet, all partitions should be updated, - // replicas should not change. if apierrors.IsNotFound(err) { return false, nil, nil } diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index 78aaf7c0..d359954d 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -37,9 +37,12 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") lws := testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Replica(1).WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Size(1).Obj() updateTemplateHash := utils.LeaderWorkerTemplateHash(lws) + updateRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1) + if err != nil { + t.Fatal(err) + } lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" - lws.Status.CollisionCount = new(int32) - currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1, lws.Status.CollisionCount) + currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 2) if err != nil { t.Fatal(err) } @@ -54,7 +57,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }{ { name: "1 replica, size 1, exclusive placement disabled", - revision: currentRevision, + revision: updateRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ Name: "test-sample", @@ -128,7 +131,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, { name: "1 replica, size 2, exclusive placement enabled", - revision: currentRevision, + revision: updateRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ Name: "test-sample", @@ -205,7 +208,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, { name: "1 replica, size 2, subgroupsize 2, exclusive placement enabled", - revision: currentRevision, + revision: updateRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ Name: "test-sample", @@ -282,7 +285,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, }, { - name: "LeaderPod has a different template hash than one generated by lws object, use podTemplateSpec from revision", + name: "revision is before update, will use that and the old templateHash to create the worker statefulset configuration", revision: currentRevision, pod: &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ diff --git a/pkg/history/controller_history.go b/pkg/history/controller_history.go index ec7d91d4..5c4e5d49 100644 --- a/pkg/history/controller_history.go +++ b/pkg/history/controller_history.go @@ -58,15 +58,13 @@ func ControllerRevisionName(prefix string, hash string) string { // NewControllerRevision returns a ControllerRevision with a ControllerRef pointing to parent and indicating that // parent is of parentKind. The ControllerRevision has labels matching template labels, contains Data equal to data, and -// has a Revision equal to revision. The collisionCount is used when creating the name of the ControllerRevision -// so the name is likely unique. If the returned error is nil, the returned ControllerRevision is valid. If the +// has a Revision equal to revision. If the returned error is nil, the returned ControllerRevision is valid. If the // returned error is not nil, the returned ControllerRevision is invalid for use. func NewControllerRevision(parent metav1.Object, parentKind schema.GroupVersionKind, templateLabels map[string]string, data runtime.RawExtension, - revision int64, - collisionCount *int32) (*appsv1.ControllerRevision, error) { + revision int64) (*appsv1.ControllerRevision, error) { labelMap := make(map[string]string) for k, v := range templateLabels { labelMap[k] = v @@ -80,15 +78,15 @@ func NewControllerRevision(parent metav1.Object, Data: data, Revision: revision, } - hash := HashControllerRevision(cr, collisionCount) + hash := HashControllerRevision(cr) cr.Name = ControllerRevisionName(parent.GetName(), hash) cr.Labels[ControllerRevisionHashLabel] = hash return cr, nil } -// HashControllerRevision hashes the contents of revision's Data using FNV hashing. If probe is not nil, the byte value -// of probe is added written to the hash as well. The returned hash will be a safe encoded string to avoid bad words. -func HashControllerRevision(revision *appsv1.ControllerRevision, probe *int32) string { +// HashControllerRevision hashes the contents of revision's Data using FNV hashing. +// The returned hash will be a safe encoded string to avoid bad words. +func HashControllerRevision(revision *appsv1.ControllerRevision) string { hf := fnv.New32() if len(revision.Data.Raw) > 0 { hf.Write(revision.Data.Raw) @@ -96,9 +94,6 @@ func HashControllerRevision(revision *appsv1.ControllerRevision, probe *int32) s if revision.Data.Object != nil { DeepHashObject(hf, revision.Data.Object) } - if probe != nil { - hf.Write([]byte(strconv.FormatInt(int64(*probe), 10))) - } return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) } @@ -181,14 +176,11 @@ type Interface interface { // controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the // returned error is not nil, the returned slice is not valid. ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) - // CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. If name - // collision occurs, collisionCount (incremented each time collision occurs except for the first time) is - // added to the hash of the revision and it is renamed using ControllerRevisionName. Implementations may + // CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. Implementations may // cease to attempt to retry creation after some number of attempts and return an error. If the returned // error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been // created. - // Callers must make sure that collisionCount is not nil. An error is returned if it is. - CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision, collisionCount *int32) (*appsv1.ControllerRevision, error) + CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) // DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. DeleteControllerRevision(revision *appsv1.ControllerRevision) error // UpdateControllerRevision updates revision such that its Revision is equal to newRevision. Implementations @@ -236,35 +228,30 @@ func (rh *realHistory) ListControllerRevisions(parent metav1.Object, selector la return owned, err } -func (rh *realHistory) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision, collisionCount *int32) (*appsv1.ControllerRevision, error) { - if collisionCount == nil { - return nil, fmt.Errorf("collisionCount should not be nil") - } - +func (rh *realHistory) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { // Clone the input clone := revision.DeepCopy() - // Continue to attempt to create the revision updating the name with a new hash on each iteration - for { - hash := HashControllerRevision(revision, collisionCount) - // Update the revisions name - clone.Name = ControllerRevisionName(parent.GetName(), hash) - ns := parent.GetNamespace() - err := rh.Create(rh.context, clone) - if errors.IsAlreadyExists(err) { - exists := &appsv1.ControllerRevision{} - err := rh.Get(rh.context, types.NamespacedName{Namespace: ns, Name: clone.Name}, exists) - if err != nil { - return nil, err - } - if bytes.Equal(exists.Data.Raw, clone.Data.Raw) { - return exists, nil - } - *collisionCount++ - continue + hash := HashControllerRevision(revision) + // Update the revisions name + clone.Name = ControllerRevisionName(parent.GetName(), hash) + ns := parent.GetNamespace() + err := rh.Create(rh.context, clone) + if errors.IsAlreadyExists(err) { + exists := &appsv1.ControllerRevision{} + err := rh.Get(rh.context, types.NamespacedName{Namespace: ns, Name: clone.Name}, exists) + if err != nil { + return nil, err + } + if bytes.Equal(exists.Data.Raw, clone.Data.Raw) { + return exists, nil + } else { + // Since the contents of the revision are used to create the hash, the only way this + // happens is if the contents of the revision were changed, which is unintended behavior + return nil, fmt.Errorf("controller Revision with same name but different content exists") } - return clone, err } + return clone, err } func (rh *realHistory) UpdateControllerRevision(revision *appsv1.ControllerRevision, newRevision int64) (*appsv1.ControllerRevision, error) { diff --git a/pkg/history/controller_history_test.go b/pkg/history/controller_history_test.go index 1b7a71b2..09b823e4 100644 --- a/pkg/history/controller_history_test.go +++ b/pkg/history/controller_history_test.go @@ -32,18 +32,18 @@ func TestFindEqualRevisions(t *testing.T) { lws1 := testutils.BuildLeaderWorkerSet("test-sample").Obj() lws2 := testutils.BuildLeaderWorkerSet("test-sample").LeaderTemplateSpec(testutils.MakeLeaderPodSpecWithTPUResource()).Obj() - lws1Revision, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1, lws1.Status.CollisionCount) + lws1Revision, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1) if err != nil { t.Fatal(err) } - lws2Revision, err := NewControllerRevision(lws2, parentKind, lws2.Labels, testutils.RawLWSTemplate(lws2), 1, lws2.Status.CollisionCount) + lws2Revision, err := NewControllerRevision(lws2, parentKind, lws2.Labels, testutils.RawLWSTemplate(lws2), 1) if err != nil { t.Fatal(err) } lws1.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" - lws1Revision2, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1, lws1.Status.CollisionCount) + lws1Revision2, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1) if err != nil { t.Fatal(err) } @@ -91,15 +91,15 @@ func TestFindEqualRevisions(t *testing.T) { func TestSortControllerRevisions(t *testing.T) { lws := testutils.BuildLeaderWorkerSet("test-sample").Obj() - lwsRevision1, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1, lws.Status.CollisionCount) + lwsRevision1, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1) if err != nil { t.Fatal(err) } - lwsRevision2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 2, lws.Status.CollisionCount) + lwsRevision2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 2) if err != nil { t.Fatal(err) } - lwsRevision1Time2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1, lws.Status.CollisionCount) + lwsRevision1Time2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1) if err != nil { t.Fatal(err) } diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index 995177d5..04819e08 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -124,18 +124,22 @@ func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, l func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { str := &bytes.Buffer{} clone := lws.DeepCopy() - // ResourceVersion will always be different even if the underlying LWS object is the same. - clone.ResourceVersion = "" err := unstructured.UnstructuredJSONScheme.Encode(clone, str) if err != nil { return nil, err } var raw map[string]interface{} err = json.Unmarshal(str.Bytes(), &raw) + objCopy := make(map[string]interface{}) + specCopy := make(map[string]interface{}) + spec := raw["spec"].(map[string]interface{}) + specCopy = spec + specCopy["$patch"] = "replace" + objCopy["spec"] = spec if err != nil { return nil, err } - patch, err := json.Marshal(raw) + patch, err := json.Marshal(objCopy) return patch, err } @@ -159,7 +163,7 @@ func CreateLeaderWorkerSetRevision( revisionCount := len(revisions) history.SortControllerRevisions(revisions) - currentRevision, err := NewRevision(lws, NextRevision(revisions), new(int32), templateHash) + currentRevision, err := NewRevision(lws, NextRevision(revisions), templateHash) if err != nil { log.Error(err, "Creating new revision for lws") return err @@ -185,7 +189,7 @@ func CreateLeaderWorkerSetRevision( return nil } - _, err = controllerHistory.CreateControllerRevision(lws, currentRevision, new(int32)) + _, err = controllerHistory.CreateControllerRevision(lws, currentRevision) log.V(2).Info("Created new controller revision") if err != nil { log.Error(err, "Creating new controller revision for lws") @@ -199,7 +203,7 @@ func CreateLeaderWorkerSetRevision( // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned // ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set // to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. -func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collisionCount *int32, templateHash string) (*appsv1.ControllerRevision, error) { +func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, templateHash string) (*appsv1.ControllerRevision, error) { patch, err := getPatch(lws) if err != nil { return nil, err @@ -208,8 +212,7 @@ func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, collision controllerKind, map[string]string{leaderworkerset.TemplateRevisionHashKey: templateHash}, runtime.RawExtension{Raw: patch}, - revision, - collisionCount) + revision) if err != nil { return nil, err } @@ -254,11 +257,25 @@ func NextRevision(revisions []*appsv1.ControllerRevision) int64 { return revisions[count-1].Revision + 1 } -// TruncateHistory cleans up all other controller revisions expect the currentRevision and updateRevision -func TruncateHistory(history history.Interface, revisions []*appsv1.ControllerRevision, updateRevision *appsv1.ControllerRevision, currentRevision *appsv1.ControllerRevision) error { +// TruncateHistory cleans up all other controller revisions except the currentRevision. +// currentRevision is the one that matches the templateHash that is passed +func TruncateHistory(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) + if err != nil { + return err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + return err + } + currentRevision, err := GetLeaderWorkerSetRevisionFromTemplateHash(ctx, k8sClient, lws, templateHash) + if err != nil { + return err + } for i, revision := range revisions { - if revision.Name != updateRevision.Name && revision.Name != currentRevision.Name { - if err := history.DeleteControllerRevision(revisions[i]); err != nil { + if revision.Name != currentRevision.Name { + if err := controllerHistory.DeleteControllerRevision(revisions[i]); err != nil { return err } } diff --git a/pkg/utils/controller/controller_utils_test.go b/pkg/utils/controller/controller_utils_test.go index 89df6eff..65ed288b 100644 --- a/pkg/utils/controller/controller_utils_test.go +++ b/pkg/utils/controller/controller_utils_test.go @@ -28,8 +28,7 @@ import ( func TestApplyRevision(t *testing.T) { lws := testutils.BuildLeaderWorkerSet("default").Obj() - lws.Status.CollisionCount = new(int32) - revision, err := NewRevision(lws, 1, lws.Status.CollisionCount, "") + revision, err := NewRevision(lws, 1, "") currentLws := lws.DeepCopy() if err != nil { t.Fatal(err) @@ -45,7 +44,7 @@ func TestApplyRevision(t *testing.T) { t.Fatal(err) } - restoredRevision, err := NewRevision(restoredLws, 2, restoredLws.Status.CollisionCount, "") + restoredRevision, err := NewRevision(restoredLws, 2, "") if err != nil { t.Fatal(err) } @@ -58,7 +57,7 @@ func TestApplyRevision(t *testing.T) { t.Errorf("unexpected restored LeaderWorkerTemplate: %s", diff) } - if diff := cmp.Diff(currentLws, restoredLws); diff == "" { + if diff := cmp.Diff(currentLws, restoredLws); diff != "" { t.Errorf("LWS Spec fields should not be restored") } } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 474eda2e..3832a121 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -303,7 +303,7 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { return numberOfPodsInCommon, nil }, timeout, interval).Should(gomega.Equal(0)) }) - ginkgo.It("unupdated worker StatefulSet restarted during rolling update will be restored with old worker spec", func() { + ginkgo.It("Not updated worker StatefulSet restarted during rolling update will be restored with old worker spec", func() { lws = testing.BuildLeaderWorkerSet(ns.Name).Replica(2).Size(2).Obj() testing.MustCreateLws(ctx, k8sClient, lws) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") diff --git a/test/integration/controllers/leaderworkerset_test.go b/test/integration/controllers/leaderworkerset_test.go index f93b9ba5..d063db5e 100644 --- a/test/integration/controllers/leaderworkerset_test.go +++ b/test/integration/controllers/leaderworkerset_test.go @@ -504,7 +504,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -521,7 +520,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { // soon updated to 3 (replicas-maxUnavailable), it's fine here. testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -536,7 +534,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -551,7 +548,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -569,7 +565,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) // 3-index status is unready but template already updated. testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -585,7 +580,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -606,7 +600,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -621,7 +614,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -636,7 +628,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -651,7 +642,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -668,7 +658,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -689,7 +678,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -704,7 +692,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -720,7 +707,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -741,7 +727,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -767,7 +752,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 4) // We haven't set the replica-4, replica-5 to ready, so the readyReplicas is 4, the updatedReplicas is 0. testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -783,7 +767,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -804,7 +787,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -819,7 +801,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -834,7 +815,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -864,7 +844,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 3) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -880,7 +859,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -901,7 +879,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 6) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -923,7 +900,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 5) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -948,7 +924,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -964,7 +939,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 3) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -985,7 +959,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1012,7 +985,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1027,7 +999,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 5, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1042,7 +1013,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 5, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1057,7 +1027,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 5, 3) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1074,7 +1043,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 3) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1090,7 +1058,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1111,7 +1078,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1138,7 +1104,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1162,7 +1127,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1183,7 +1147,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 6) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 6) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1204,7 +1167,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUnavailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) // Partition is updated from 3 to 2. testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2) }, @@ -1231,7 +1193,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 3) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1252,7 +1213,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1278,7 +1238,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1290,7 +1249,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1305,7 +1263,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1326,7 +1283,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1353,7 +1309,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1381,7 +1336,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) // The last 2 replicas are updated but not ready. testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1397,7 +1351,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1422,7 +1375,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1437,7 +1389,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1454,7 +1405,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 1) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1470,7 +1420,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, @@ -1491,7 +1440,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 4) testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1518,7 +1466,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1534,7 +1481,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 2) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1557,7 +1503,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { // Partition will transit from 4 to 3. testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 3) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 6, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) }, }, { @@ -1582,56 +1527,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() { testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0) testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 4, 4) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) - }, - }, - }, - }), - ginkgo.Entry("lws can create controllerRevision even if there is a hash collision", &testCase{ - makeLeaderWorkerSet: testing.BuildLeaderWorkerSet, - updates: []*update{ - { - lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) { - testing.SetPodGroupsToReady(ctx, k8sClient, lws, 2) - lws.Status.CollisionCount = new(int32) - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "new-worker-name" - // Create a controller revision that will cause a collision when worker is updated - testing.CreateControllerRevisionForHashCollision(ctx, k8sClient, lws) - }, - checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) { - testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") - testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) - testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) - testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 2) - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) - }, - }, - { - lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) { - testing.UpdateWorkerTemplate(ctx, k8sClient, lws) - - }, - checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) { - testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) - testing.ExpectLeaderWorkerSetUnavailable(ctx, k8sClient, lws, "All replicas are ready") - testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing") - testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") - testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1) - testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 2, 0) - testing.ExpectCurrentRevisionToNotEqualUpdateRevision(ctx, k8sClient, lws) - testing.ExpectCollisionCountEqualTo(ctx, k8sClient, lws, 1) - }, - }, - { - // Set all groups to ready. - lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) { - testing.SetPodGroupsToReady(ctx, k8sClient, lws, 2) - }, - checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) { - testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") - testing.ExpectLeaderWorkerSetNotProgressing(ctx, k8sClient, lws, "Replicas are progressing") - testing.ExpectLeaderWorkerSetNoUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress") - testing.ExpectCurrentRevisionToEqualUpdateRevision(ctx, k8sClient, lws) }, }, }, diff --git a/test/testutils/util.go b/test/testutils/util.go index 6023a491..08ca1d21 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -557,17 +557,17 @@ func CreateControllerRevisionForHashCollision(ctx context.Context, k8sClient cli OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(lws, parentKind)}, Namespace: lws.GetNamespace(), }, - Data: RawLWSTemplate(lws), + // Data: RawLWSTemplate(lws), Revision: 1, } - hash := hashControllerRevision(cr, lws.Status.CollisionCount) + hash := hashControllerRevision(cr) cr.Name = controllerRevisionName(lws.GetName(), hash) cr.Labels[controllerRevisionHashLabel] = hash // Change the lws that is used for the data, This creates a controller revision // with same name but different contents, triggering a hash collision modifiedLws := lws.DeepCopy() modifiedLws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "hash-collision" - cr.Data = RawLWSTemplate(modifiedLws) + // cr.Data = RawLWSTemplate(modifiedLws) gomega.Expect(k8sClient.Create(ctx, cr)).Should(gomega.Succeed()) } @@ -585,7 +585,7 @@ func deleteWorkerStatefulSetIfExists(ctx context.Context, k8sClient client.Clien }, Timeout, Interval).Should(gomega.Succeed()) } -func hashControllerRevision(revision *appsv1.ControllerRevision, probe *int32) string { +func hashControllerRevision(revision *appsv1.ControllerRevision) string { hf := fnv.New32() if len(revision.Data.Raw) > 0 { hf.Write(revision.Data.Raw) @@ -593,9 +593,6 @@ func hashControllerRevision(revision *appsv1.ControllerRevision, probe *int32) s if revision.Data.Object != nil { deepHashObject(hf, revision.Data.Object) } - if probe != nil { - hf.Write([]byte(strconv.FormatInt(int64(*probe), 10))) - } return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) } diff --git a/test/testutils/validators.go b/test/testutils/validators.go index 9b29e72d..a72fa94b 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -439,33 +439,3 @@ func ExpectSpecifiedWorkerStatefulSetsNotCreated(ctx context.Context, k8sClient return true }, Timeout, Interval).Should(gomega.Equal(true)) } - -func ExpectCurrentRevisionToEqualUpdateRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { - gomega.Eventually(func() bool { - var fetchedLws leaderworkerset.LeaderWorkerSet - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLws); err != nil { - return false - } - return fetchedLws.Status.CurrentRevision == fetchedLws.Status.UpdateRevision - }, Timeout, Interval).Should(gomega.Equal(true)) -} - -func ExpectCurrentRevisionToNotEqualUpdateRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { - gomega.Eventually(func() bool { - var fetchedLws leaderworkerset.LeaderWorkerSet - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLws); err != nil { - return false - } - return fetchedLws.Status.CurrentRevision != fetchedLws.Status.UpdateRevision - }, Timeout, Interval).Should(gomega.Equal(true)) -} - -func ExpectCollisionCountEqualTo(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, collisionCount int32) { - gomega.Eventually(func() int32 { - var fetchedLws leaderworkerset.LeaderWorkerSet - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: lws.Name}, &fetchedLws); err != nil { - return -1 - } - return *fetchedLws.Status.CollisionCount - }, Timeout, Interval).Should(gomega.Equal(collisionCount)) -} diff --git a/test/testutils/wrappers.go b/test/testutils/wrappers.go index 8bb4dbcd..f4d0d283 100644 --- a/test/testutils/wrappers.go +++ b/test/testutils/wrappers.go @@ -295,23 +295,19 @@ func MakeLeaderPodSpecWithTPUResource() corev1.PodSpec { } func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet) runtime.RawExtension { + clone := lws.DeepCopy() str := &bytes.Buffer{} - err := unstructured.UnstructuredJSONScheme.Encode(lws, str) + err := unstructured.UnstructuredJSONScheme.Encode(clone, str) if err != nil { panic(err) } var raw map[string]interface{} err = json.Unmarshal(str.Bytes(), &raw) - if err != nil { - panic(err) - } objCopy := make(map[string]interface{}) - specCopy := make(map[string]interface{}) spec := raw["spec"].(map[string]interface{}) - template := spec["leaderWorkerTemplate"].(map[string]interface{}) - specCopy["leaderWorkerTemplate"] = template - template["$patch"] = "replace" - objCopy["spec"] = specCopy + specCopy := spec + specCopy["$patch"] = "replace" + objCopy["spec"] = spec patch, err := json.Marshal(objCopy) if err != nil { panic(err) From 9436aeb51d1af98c496ac9e20155cc1ece88cce5 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Mon, 23 Dec 2024 18:52:29 +0000 Subject: [PATCH 11/27] addressed comments, refactored --- pkg/controllers/leaderworkerset_controller.go | 180 +++++++------ .../leaderworkerset_controller_test.go | 7 +- pkg/controllers/pod_controller.go | 5 +- pkg/controllers/pod_controller_test.go | 19 +- pkg/history/controller_history.go | 183 ++----------- pkg/history/controller_history_test.go | 57 +--- pkg/utils/controller/controller_utils.go | 221 --------------- pkg/utils/revision/revision_utils.go | 252 ++++++++++++++++++ .../revision_utils_test.go} | 10 +- pkg/utils/utils.go | 25 -- pkg/webhooks/pod_webhook.go | 4 +- test/testutils/util.go | 77 ++---- test/testutils/validators.go | 7 +- test/testutils/wrappers.go | 6 +- 14 files changed, 422 insertions(+), 631 deletions(-) create mode 100644 pkg/utils/revision/revision_utils.go rename pkg/utils/{controller/controller_utils_test.go => revision/revision_utils_test.go} (82%) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 5a6c82fa..97111d85 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -44,6 +44,7 @@ import ( "sigs.k8s.io/lws/pkg/utils" controllerutils "sigs.k8s.io/lws/pkg/utils/controller" podutils "sigs.k8s.io/lws/pkg/utils/pod" + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset" ) @@ -99,13 +100,31 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - partition, replicas, err := r.rollingUpdateParameters(ctx, lws) + leaderSts, err := r.getLeaderStatefulSet(ctx, lws) + if err != nil { + log.Error(err, "Fetching leader statefulset") + return ctrl.Result{}, err + } + + if err := r.createControllerRevisionIfNonExist(ctx, leaderSts, lws); err != nil { + log.Error(err, "Creating controller revision") + return ctrl.Result{}, err + } + + lwsUpdated, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws) + if err != nil { + log.Error(err, "Validating if LWS has been updated") + return ctrl.Result{}, err + } + + templateHash := getLeaderWorkerTemplateHash(leaderSts, lws, lwsUpdated) + partition, replicas, err := r.rollingUpdateParameters(ctx, lws, leaderSts, lwsUpdated) if err != nil { log.Error(err, "Rolling partition error") return ctrl.Result{}, err } - if err := r.SSAWithStatefulset(ctx, lws, partition, replicas); err != nil { + if err := r.SSAWithStatefulset(ctx, lws, partition, replicas, templateHash); err != nil { return ctrl.Result{}, err } @@ -117,7 +136,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } - err = r.updateStatus(ctx, lws) + err = r.updateStatus(ctx, lws, templateHash) if err != nil { return ctrl.Result{}, err } @@ -193,7 +212,7 @@ func SetupIndexes(indexer client.FieldIndexer) error { // - Otherwise, Replicas is equal to spec.Replicas // - One exception here is when unready replicas of leaderWorkerSet is equal to MaxSurge, // we should reclaim the extra replicas gradually to accommodate for the new replicas. -func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, leaderWorkerSetUpdated bool) (int32, int32, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) lwsReplicas := *lws.Spec.Replicas @@ -201,26 +220,7 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, // Case 1: // If sts not created yet, all partitions should be updated, // replicas should not change. - stsExists, sts, err := stsCreated(ctx, r.Client, lws) - if err != nil { - return 0, 0, err - } - - if !stsExists { - return 0, lwsReplicas, nil - } - - existingControllerRevisions, err := controllerutils.ExistingControllerRevisions(ctx, r.Client, lws) - if err != nil { - return 0, 0, err - } - - if !existingControllerRevisions { - // Updating from version that did not support Controller Revision. Need to create one first before checking if template has been updated - log.V(2).Info(fmt.Sprintf("Creating new controller revision create/update operation for %+v ", lws)) - if err := controllerutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]); err != nil { - return 0, 0, nil - } + if sts == nil { return 0, lwsReplicas, nil } @@ -248,11 +248,7 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, // Case 2: // Indicates a new rolling update here. - hasTemplateUdated, err := templateUpdated(ctx, r.Client, sts, lws) - if err != nil { - return 0, 0, err - } - if hasTemplateUdated { + if leaderWorkerSetUpdated { // Processing scaling up/down first prior to rolling update. return min(lwsReplicas, stsReplicas), wantReplicas(lwsReplicas), nil } @@ -297,29 +293,10 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, return min(partition, utils.NonZeroValue(stsReplicas-int32(rollingStep)-continuousReadyReplicas)), wantReplicas(lwsUnreadyReplicas), nil } -func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32) error { +func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, templateHash string) error { log := ctrl.LoggerFrom(ctx) - // templateHash is not a reliable way to determine whether or not an lws object has been updated as seen in - // https://github.com/kubernetes-sigs/lws/issues/281 - // If a leader sts already exists, but the template has not been updated, the templateHash of the leader is - // used to keep consistency in cases where two different templateHashes are calculated from the same LWS object - stsExists, sts, err := stsCreated(ctx, r.Client, lws) - if err != nil { - return err - } - templateHash := utils.LeaderWorkerTemplateHash(lws) - if stsExists { - templateUpdated, err := templateUpdated(ctx, r.Client, sts, lws) - if err != nil { - return err - } - if !templateUpdated { - templateHash = sts.Labels[leaderworkerset.TemplateRevisionHashKey] - } - } - - if err = controllerutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, templateHash); err != nil { + if err := revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, templateHash); err != nil { log.Error(err, "Creating LWS Revision") return err } @@ -359,7 +336,7 @@ func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws } // updates the condition of the leaderworkerset to either Progressing or Available. -func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { +func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (bool, error) { log := ctrl.LoggerFrom(ctx) podSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, @@ -373,7 +350,6 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l updateStatus := false readyCount, updatedCount, updatedNonBurstWorkerCount, currentNonBurstWorkerCount, updatedAndReadyCount := 0, 0, 0, 0, 0 - templateHash := utils.LeaderWorkerTemplateHash(lws) noWorkerSts := *lws.Spec.LeaderWorkerTemplate.Size == 1 // Iterate through all leaderPods. @@ -434,7 +410,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) - controllerutils.TruncateHistory(ctx, r.Client, lws, templateHash) + revisionutils.TruncateHistory(ctx, r.Client, lws, templateHash) } else { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetProgressing)) } @@ -448,7 +424,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l } // Updates status and condition of LeaderWorkerSet and returns whether or not an update actually occurred. -func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet) error { +func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { updateStatus := false log := ctrl.LoggerFrom(ctx) @@ -484,7 +460,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade } // check if an update is needed - updateConditions, err := r.updateConditions(ctx, lws) + updateConditions, err := r.updateConditions(ctx, lws, templateHash) if err != nil { return err } @@ -529,7 +505,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return strconv.Atoi(sts.Labels[leaderworkerset.GroupIndexLabelKey]) }, stsList.Items, int(stsReplicas)) - templateHash := utils.LeaderWorkerTemplateHash(lws) + templateHash := revisionutils.LeaderWorkerTemplateHash(lws) // Once size==1, no worker statefulSets will be created. noWorkerSts := *lws.Spec.LeaderWorkerTemplate.Size == 1 processReplica := func(index int32) (ready bool) { @@ -571,6 +547,70 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return continuousReadyReplicas, lwsUnreadyReplicas, nil } +func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.StatefulSet, error) { + sts := &appsv1.StatefulSet{} + err := r.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, sts) + if err != nil { + if apierrors.IsNotFound(err) { + return nil, nil + } + return nil, err + } + + return sts, nil +} + +// Creates a Controller Revision if the leader statefulset exists but no revisions have been created yet. This happens when updating from a version that doesn't +// support controller revision +func (r *LeaderWorkerSetReconciler) createControllerRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) error { + + if sts == nil { + return nil + } + + existingControllerRevisions, err := revisionutils.ExistingControllerRevisions(ctx, r.Client, lws) + if err != nil { + return err + } + + if !existingControllerRevisions { + return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) + } + + return nil +} + +func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { + + if sts == nil { + return false, nil + } + + controllerRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) + if err != nil { + return false, err + } + baselineLws, err := revisionutils.ApplyRevision(lws, controllerRevision) + if err != nil { + return false, err + } + return !revisionutils.EqualLeaderWorkerTemplates(baselineLws, lws), nil +} + +// templateHash is not a reliable way to determine whether or not an lws object has been updated as seen in https://github.com/kubernetes-sigs/lws/issues/281 +// If a leader sts already exists, but the template has not been updated, the templateHash of the leader is used to keep consistency in cases where two +// different templateHashes are calculated from the same LWS object +func getLeaderWorkerTemplateHash(sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, leaderWorkerSetUpdated bool) string { + + if sts != nil { + if !leaderWorkerSetUpdated { + return sts.Labels[leaderworkerset.TemplateRevisionHashKey] + } + } + + return revisionutils.LeaderWorkerTemplateHash(lws) +} + // constructLeaderStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, templateHash string) (*appsapplyv1.StatefulSetApplyConfiguration, error) { var podTemplateSpec corev1.PodTemplateSpec @@ -721,31 +761,3 @@ func exclusiveConditionTypes(condition1 metav1.Condition, condition2 metav1.Cond return false } - -func templateUpdated(ctx context.Context, k8sClient client.Client, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) - ctx = ctrl.LoggerInto(ctx, log) - controllerRevision, err := controllerutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, k8sClient, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) - if err != nil { - return false, err - } - - baselineLws, err := controllerutils.ApplyRevision(lws, controllerRevision) - if err != nil { - return false, err - } - return !utils.EqualLeaderWorkerTemplates(baselineLws, lws), nil -} - -func stsCreated(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (bool, *appsv1.StatefulSet, error) { - sts := &appsv1.StatefulSet{} - err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, sts) - if err != nil { - if apierrors.IsNotFound(err) { - return false, nil, nil - } - return false, nil, err - } - - return true, sts, nil -} diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index c38ab0bc..44a2843e 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -30,15 +30,16 @@ import ( "k8s.io/utils/ptr" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/utils" + + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" testutils "sigs.k8s.io/lws/test/testutils" ) func TestLeaderStatefulSetApplyConfig(t *testing.T) { - hash1 := utils.LeaderWorkerTemplateHash(testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). + hash1 := revisionutils.LeaderWorkerTemplateHash(testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). LeaderTemplateSpec(testutils.MakeLeaderPodSpec()). WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj()) - hash2 := utils.LeaderWorkerTemplateHash(testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). + hash2 := revisionutils.LeaderWorkerTemplateHash(testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj()) tests := []struct { diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index 77e03c0d..3d201458 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -41,6 +41,7 @@ import ( acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" controllerutils "sigs.k8s.io/lws/pkg/utils/controller" podutils "sigs.k8s.io/lws/pkg/utils/pod" + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset" ) @@ -118,7 +119,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - currentRevision, err := controllerutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) + currentRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { log.Error(err, "Getting lws revisions") return ctrl.Result{}, err @@ -264,7 +265,7 @@ func setControllerReferenceWithStatefulSet(owner metav1.Object, sts *appsapplyv1 // constructWorkerStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet func constructWorkerStatefulSetApplyConfiguration(leaderPod corev1.Pod, lws leaderworkerset.LeaderWorkerSet, currentRevision *appsv1.ControllerRevision) (*appsapplyv1.StatefulSetApplyConfiguration, error) { - currentLws, err := controllerutils.ApplyRevision(&lws, currentRevision) + currentLws, err := revisionutils.ApplyRevision(&lws, currentRevision) if err != nil { return nil, err } diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index d359954d..41e52730 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -23,30 +23,39 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" appsapplyv1 "k8s.io/client-go/applyconfigurations/apps/v1" coreapplyv1 "k8s.io/client-go/applyconfigurations/core/v1" metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/ptr" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" "sigs.k8s.io/lws/pkg/history" - "sigs.k8s.io/lws/pkg/utils" + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" testutils "sigs.k8s.io/lws/test/testutils" ) func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") lws := testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Replica(1).WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Size(1).Obj() - updateTemplateHash := utils.LeaderWorkerTemplateHash(lws) - updateRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1) + updateTemplateHash := revisionutils.LeaderWorkerTemplateHash(lws) + patch, err := revisionutils.GetPatch(lws) + if err != nil { + t.Fatal(err) + } + updateRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 1) if err != nil { t.Fatal(err) } lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" - currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 2) + patch, err = revisionutils.GetPatch(lws) + if err != nil { + t.Fatal(err) + } + currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 2) if err != nil { t.Fatal(err) } - currentTemplateHash := utils.LeaderWorkerTemplateHash(lws) + currentTemplateHash := revisionutils.LeaderWorkerTemplateHash(lws) tests := []struct { name string diff --git a/pkg/history/controller_history.go b/pkg/history/controller_history.go index 5c4e5d49..674f28ea 100644 --- a/pkg/history/controller_history.go +++ b/pkg/history/controller_history.go @@ -14,16 +14,16 @@ See the License for the specific language governing permissions and limitations under the License. */ +// Adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go + package history import ( "bytes" "context" - "encoding/json" "fmt" "hash" "hash/fnv" - "sort" "strconv" "github.com/davecgh/go-spew/spew" @@ -40,7 +40,6 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/rand" - "k8s.io/client-go/util/retry" ) // ControllerRevisionHashLabel is the label used to indicate the hash value of a ControllerRevision's Data. @@ -97,11 +96,6 @@ func HashControllerRevision(revision *appsv1.ControllerRevision) string { return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) } -// SortControllerRevisions sorts revisions by their Revision. -func SortControllerRevisions(revisions []*appsv1.ControllerRevision) { - sort.Stable(byRevision(revisions)) -} - // EqualRevision returns true if lhs and rhs are either both nil, or both point to non-nil ControllerRevisions that // contain semantically equivalent data. Otherwise this method returns false. func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { @@ -134,81 +128,20 @@ func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevisio return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) } -// FindEqualRevisions returns all ControllerRevisions in revisions that are equal to needle using EqualRevision as the -// equality test. The returned slice preserves the order of revisions. -func FindEqualRevisions(revisions []*appsv1.ControllerRevision, needle *appsv1.ControllerRevision) []*appsv1.ControllerRevision { - var eq []*appsv1.ControllerRevision - for i := range revisions { - if EqualRevision(revisions[i], needle) { - eq = append(eq, revisions[i]) - } - } - return eq -} - -// byRevision implements sort.Interface to allow ControllerRevisions to be sorted by Revision. -type byRevision []*appsv1.ControllerRevision - -func (br byRevision) Len() int { - return len(br) -} - -// Less breaks ties first by creation timestamp, then by name -func (br byRevision) Less(i, j int) bool { - if br[i].Revision == br[j].Revision { - if br[j].CreationTimestamp.Equal(&br[i].CreationTimestamp) { - return br[i].Name < br[j].Name - } - return br[j].CreationTimestamp.After(br[i].CreationTimestamp.Time) - } - return br[i].Revision < br[j].Revision -} - -func (br byRevision) Swap(i, j int) { - br[i], br[j] = br[j], br[i] -} - -// Interface provides an interface allowing for management of a Controller's history as realized by recorded -// ControllerRevisions. An instance of Interface can be retrieved from NewHistory. Implementations must treat all -// pointer parameters as "in" parameter, and they must not be mutated. -type Interface interface { - // ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other - // controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the - // returned error is not nil, the returned slice is not valid. - ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) - // CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. Implementations may - // cease to attempt to retry creation after some number of attempts and return an error. If the returned - // error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been - // created. - CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) - // DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. - DeleteControllerRevision(revision *appsv1.ControllerRevision) error - // UpdateControllerRevision updates revision such that its Revision is equal to newRevision. Implementations - // may retry on conflict. If the returned error is nil, the update was successful and returned ControllerRevision - // is valid. If the returned error is not nil, the update failed and the returned ControllerRevision is invalid. - UpdateControllerRevision(revision *appsv1.ControllerRevision, newRevision int64) (*appsv1.ControllerRevision, error) - // AdoptControllerRevision attempts to adopt revision by adding a ControllerRef indicating that the parent - // Object of parentKind is the owner of revision. If revision is already owned, an error is returned. If the - // resource patch fails, an error is returned. If no error is returned, the returned ControllerRevision is - // valid. - AdoptControllerRevision(parent metav1.Object, parentKind schema.GroupVersionKind, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) - // ReleaseControllerRevision attempts to release parent's ownership of revision by removing parent from the - // OwnerReferences of revision. If an error is returned, parent remains the owner of revision. If no error is - // returned, the returned ControllerRevision is valid. - ReleaseControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) +type realHistory struct { + client.Client + context context.Context } // NewHistory returns an instance of Interface that uses client to communicate with the API Server and lister to list // ControllerRevisions. This method should be used to create an Interface for all scenarios other than testing. -func NewHistory(context context.Context, k8sclient client.Client) Interface { +func NewHistory(context context.Context, k8sclient client.Client) *realHistory { return &realHistory{k8sclient, context} } -type realHistory struct { - client.Client - context context.Context -} - +// ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other +// controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the +// returned error is not nil, the returned slice is not valid. func (rh *realHistory) ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { // List all revisions in the namespace that match the selector revisionList := new(appsv1.ControllerRevisionList) @@ -228,22 +161,20 @@ func (rh *realHistory) ListControllerRevisions(parent metav1.Object, selector la return owned, err } +// CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. Implementations may +// cease to attempt to retry creation after some number of attempts and return an error. If the returned +// error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been +// created. func (rh *realHistory) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - // Clone the input - clone := revision.DeepCopy() - - hash := HashControllerRevision(revision) - // Update the revisions name - clone.Name = ControllerRevisionName(parent.GetName(), hash) ns := parent.GetNamespace() - err := rh.Create(rh.context, clone) + err := rh.Create(rh.context, revision) if errors.IsAlreadyExists(err) { exists := &appsv1.ControllerRevision{} - err := rh.Get(rh.context, types.NamespacedName{Namespace: ns, Name: clone.Name}, exists) + err := rh.Get(rh.context, types.NamespacedName{Namespace: ns, Name: revision.Name}, exists) if err != nil { return nil, err } - if bytes.Equal(exists.Data.Raw, clone.Data.Raw) { + if bytes.Equal(exists.Data.Raw, revision.Data.Raw) { return exists, nil } else { // Since the contents of the revision are used to create the hash, the only way this @@ -251,92 +182,14 @@ func (rh *realHistory) CreateControllerRevision(parent metav1.Object, revision * return nil, fmt.Errorf("controller Revision with same name but different content exists") } } - return clone, err -} - -func (rh *realHistory) UpdateControllerRevision(revision *appsv1.ControllerRevision, newRevision int64) (*appsv1.ControllerRevision, error) { - clone := revision.DeepCopy() - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if clone.Revision == newRevision { - return nil - } - clone.Revision = newRevision - updateErr := rh.Update(rh.context, clone) - if updateErr == nil { - return nil - } - updated := &appsv1.ControllerRevision{} - if err := rh.Get(rh.context, types.NamespacedName{Namespace: clone.Namespace, Name: clone.Name}, updated); err == nil { - // make a copy so we don't mutate the shared cache - clone = updated.DeepCopy() - } - return updateErr - }) - return clone, err + return revision, err } +// DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. func (rh *realHistory) DeleteControllerRevision(revision *appsv1.ControllerRevision) error { return rh.Delete(rh.context, revision) } -type objectForPatch struct { - Metadata objectMetaForPatch `json:"metadata"` -} - -// objectMetaForPatch define object meta struct for patch operation -type objectMetaForPatch struct { - OwnerReferences []metav1.OwnerReference `json:"ownerReferences"` - UID types.UID `json:"uid"` -} - -func (rh *realHistory) AdoptControllerRevision(parent metav1.Object, parentKind schema.GroupVersionKind, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - blockOwnerDeletion := true - isController := true - // Return an error if the revision is not orphan - if owner := metav1.GetControllerOfNoCopy(revision); owner != nil { - return nil, fmt.Errorf("attempt to adopt revision owned by %v", owner) - } - addControllerPatch := objectForPatch{ - Metadata: objectMetaForPatch{ - UID: revision.UID, - OwnerReferences: []metav1.OwnerReference{{ - APIVersion: parentKind.GroupVersion().String(), - Kind: parentKind.Kind, - Name: parent.GetName(), - UID: parent.GetUID(), - Controller: &isController, - BlockOwnerDeletion: &blockOwnerDeletion, - }}, - }, - } - patchBytes, err := json.Marshal(&addControllerPatch) - if err != nil { - return nil, err - } - // Use strategic merge patch to add an owner reference indicating a controller ref - err = rh.Patch(rh.context, revision, client.RawPatch(types.StrategicMergePatchType, patchBytes)) - return revision, err -} - -func (rh *realHistory) ReleaseControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - dataBytes := GenerateDeleteOwnerRefStrategicMergeBytes(revision.UID, parent.GetUID()) - // Use strategic merge patch to add an owner reference indicating a controller ref - err := rh.Patch(rh.context, revision, client.RawPatch(types.StrategicMergePatchType, dataBytes)) - - if err != nil { - if errors.IsNotFound(err) { - // We ignore deleted revisions - return nil, nil - } - if errors.IsInvalid(err) { - // We ignore cases where the parent no longer owns the revision or where the revision has no - // owner. - return nil, nil - } - } - return revision, err -} - func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { hasher.Reset() printer := spew.ConfigState{ diff --git a/pkg/history/controller_history_test.go b/pkg/history/controller_history_test.go index 09b823e4..0ba0139f 100644 --- a/pkg/history/controller_history_test.go +++ b/pkg/history/controller_history_test.go @@ -1,3 +1,5 @@ +package history + /* Copyright 2023. @@ -12,17 +14,14 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -*/ + + package history import ( "testing" - "time" - - "github.com/google/go-cmp/cmp" apps "k8s.io/api/apps/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/lws/test/testutils" ) @@ -88,50 +87,4 @@ func TestFindEqualRevisions(t *testing.T) { }) } } - -func TestSortControllerRevisions(t *testing.T) { - lws := testutils.BuildLeaderWorkerSet("test-sample").Obj() - lwsRevision1, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1) - if err != nil { - t.Fatal(err) - } - lwsRevision2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 2) - if err != nil { - t.Fatal(err) - } - lwsRevision1Time2, err := NewControllerRevision(lws, parentKind, lws.Labels, testutils.RawLWSTemplate(lws), 1) - if err != nil { - t.Fatal(err) - } - lwsRevision1Time2.CreationTimestamp = v1.Time{Time: lwsRevision1.CreationTimestamp.Add(time.Second)} - - tests := []struct { - name string - revisions []*apps.ControllerRevision - want []*apps.ControllerRevision - }{ - { - name: "already sorted", - revisions: []*apps.ControllerRevision{lwsRevision1, lwsRevision2}, - want: []*apps.ControllerRevision{lwsRevision1, lwsRevision2}, - }, - { - name: "inverted sorted", - revisions: []*apps.ControllerRevision{lwsRevision2, lwsRevision1}, - want: []*apps.ControllerRevision{lwsRevision1, lwsRevision2}, - }, - { - name: "same revision name, different timestamp", - revisions: []*apps.ControllerRevision{lwsRevision1, lwsRevision2, lwsRevision1Time2}, - want: []*apps.ControllerRevision{lwsRevision1, lwsRevision1Time2, lwsRevision2}, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - SortControllerRevisions(tc.revisions) - if diff := cmp.Diff(tc.revisions, tc.want); diff != "" { - t.Errorf("error sorting revisions %s", diff) - } - }) - } -} +*/ diff --git a/pkg/utils/controller/controller_utils.go b/pkg/utils/controller/controller_utils.go index 04819e08..de15acd2 100644 --- a/pkg/utils/controller/controller_utils.go +++ b/pkg/utils/controller/controller_utils.go @@ -17,29 +17,18 @@ limitations under the License. package controller import ( - "bytes" "context" - "encoding/json" - "fmt" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/strategicpatch" - "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/history" ) -// controllerKind contains the schema.GroupVersionKind for this controller type. -var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") - func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, lws *leaderworkerset.LeaderWorkerSet, serviceName string, serviceSelector map[string]string, owner metav1.Object) error { log := ctrl.LoggerFrom(ctx) // If the headless service does not exist in the namespace, create it. @@ -72,213 +61,3 @@ func CreateHeadlessServiceIfNotExists(ctx context.Context, k8sClient client.Clie } return nil } - -func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) - ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(ctx, k8sClient) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ - leaderworkerset.TemplateRevisionHashKey: templateHash, - }}) - if err != nil { - return nil, err - } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) - if err != nil { - log.Error(err, "Listing all controller revisions") - return nil, err - } - - if len(revisions) == 0 { - return nil, fmt.Errorf("could not find LWS revision based on %s", templateHash) - } - - if len(revisions) > 1 { - // Since we only create a controllerRevision when the template hash changes, only one should match - return nil, fmt.Errorf("found more than one revision matching templateHash %s", templateHash) - } - - return revisions[0], nil -} - -func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) - ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(ctx, k8sClient) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) - if err != nil { - return false, err - } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) - if err != nil { - return false, err - } - return len(revisions) > 0, nil -} - -// getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a -// previous version. If the returned error is nil the patch is valid. The current state that we save is the -// leaderWorkerTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously -// recorded patches. - -func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { - str := &bytes.Buffer{} - clone := lws.DeepCopy() - err := unstructured.UnstructuredJSONScheme.Encode(clone, str) - if err != nil { - return nil, err - } - var raw map[string]interface{} - err = json.Unmarshal(str.Bytes(), &raw) - objCopy := make(map[string]interface{}) - specCopy := make(map[string]interface{}) - spec := raw["spec"].(map[string]interface{}) - specCopy = spec - specCopy["$patch"] = "replace" - objCopy["spec"] = spec - if err != nil { - return nil, err - } - patch, err := json.Marshal(objCopy) - return patch, err -} - -func CreateLeaderWorkerSetRevision( - ctx context.Context, - k8sClient client.Client, - lws *leaderworkerset.LeaderWorkerSet, - templateHash string) error { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) - ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(ctx, k8sClient) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) - if err != nil { - return err - } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) - if err != nil { - log.Error(err, "Listing all controller revisions") - return err - } - revisionCount := len(revisions) - history.SortControllerRevisions(revisions) - - currentRevision, err := NewRevision(lws, NextRevision(revisions), templateHash) - if err != nil { - log.Error(err, "Creating new revision for lws") - return err - } - - equalRevisions := history.FindEqualRevisions(revisions, currentRevision) - equalCount := len(equalRevisions) - log.V(2).Info(fmt.Sprintf("found %d equal revisions", equalCount)) - if len(equalRevisions) > 0 && history.EqualRevision(revisions[revisionCount-1], equalRevisions[equalCount-1]) { - return nil - } - - if len(equalRevisions) > 0 { - // if the equivalent revision is not immediately prior we will roll back by incrementing the - // Revision of the equivalent revision - _, err = controllerHistory.UpdateControllerRevision( - equalRevisions[equalCount-1], - currentRevision.Revision) - if err != nil { - log.Error(err, "updating controller revision") - return nil - } - return nil - } - - _, err = controllerHistory.CreateControllerRevision(lws, currentRevision) - log.V(2).Info("Created new controller revision") - if err != nil { - log.Error(err, "Creating new controller revision for lws") - return err - } - - return nil -} - -// newRevision creates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. -// The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned -// ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set -// to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. -func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, templateHash string) (*appsv1.ControllerRevision, error) { - patch, err := getPatch(lws) - if err != nil { - return nil, err - } - cr, err := history.NewControllerRevision(lws, - controllerKind, - map[string]string{leaderworkerset.TemplateRevisionHashKey: templateHash}, - runtime.RawExtension{Raw: patch}, - revision) - if err != nil { - return nil, err - } - if cr.ObjectMeta.Annotations == nil { - cr.ObjectMeta.Annotations = make(map[string]string) - } - for key, value := range lws.Annotations { - cr.ObjectMeta.Annotations[key] = value - } - return cr, nil -} - -// ApplyRevision returns a new LeaderWorkerSet constructed by restoring the state in revision to set. If the returned error -// is nil, the returned LeaderWorkerSet is valid. -func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*leaderworkerset.LeaderWorkerSet, error) { - clone := lws.DeepCopy() - str := &bytes.Buffer{} - err := unstructured.UnstructuredJSONScheme.Encode(lws, str) - if err != nil { - return nil, err - } - patched, err := strategicpatch.StrategicMergePatch(str.Bytes(), revision.Data.Raw, clone) - if err != nil { - return nil, err - } - restoredLws := &leaderworkerset.LeaderWorkerSet{} - err = json.Unmarshal(patched, restoredLws) - if err != nil { - return nil, err - } - return restoredLws, nil -} - -// nextRevision finds the next valid revision number based on revisions. If the length of revisions -// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method -// assumes that revisions has been sorted by Revision. -func NextRevision(revisions []*appsv1.ControllerRevision) int64 { - count := len(revisions) - if count <= 0 { - return 1 - } - return revisions[count-1].Revision + 1 -} - -// TruncateHistory cleans up all other controller revisions except the currentRevision. -// currentRevision is the one that matches the templateHash that is passed -func TruncateHistory(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { - controllerHistory := history.NewHistory(ctx, k8sClient) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{}) - if err != nil { - return err - } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) - if err != nil { - return err - } - currentRevision, err := GetLeaderWorkerSetRevisionFromTemplateHash(ctx, k8sClient, lws, templateHash) - if err != nil { - return err - } - for i, revision := range revisions { - if revision.Name != currentRevision.Name { - if err := controllerHistory.DeleteControllerRevision(revisions[i]); err != nil { - return err - } - } - } - return nil -} diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go new file mode 100644 index 00000000..94e982d3 --- /dev/null +++ b/pkg/utils/revision/revision_utils.go @@ -0,0 +1,252 @@ +package revision + +import ( + "bytes" + "context" + "crypto/sha1" + "encoding/hex" + "encoding/json" + "fmt" + "reflect" + + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/strategicpatch" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/lws/pkg/history" +) + +// Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ + +// controllerKind contains the schema.GroupVersionKind for this controller type. +var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") + +func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.TemplateRevisionHashKey: templateHash, + }}) + if err != nil { + return nil, err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + log.Error(err, "Listing all controller revisions") + return nil, err + } + + if len(revisions) == 0 { + return nil, fmt.Errorf("could not find LWS revision based on %s", templateHash) + } + + if len(revisions) > 1 { + // Since we only create a controllerRevision when the template hash changes, only one should match + return nil, fmt.Errorf("found more than one revision matching templateHash %s", templateHash) + } + + return revisions[0], nil +} + +func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.SetNameLabelKey: lws.Name, + }}) + if err != nil { + return false, err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + return false, err + } + return len(revisions) > 0, nil +} + +// getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a +// previous version. If the returned error is nil the patch is valid. The current state that we save is the +// leaderWorkerTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously +// recorded patches. + +func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { + str := &bytes.Buffer{} + clone := lws.DeepCopy() + err := unstructured.UnstructuredJSONScheme.Encode(clone, str) + if err != nil { + return nil, err + } + var raw map[string]interface{} + err = json.Unmarshal(str.Bytes(), &raw) + objCopy := make(map[string]interface{}) + specCopy := make(map[string]interface{}) + spec := raw["spec"].(map[string]interface{}) + specCopy["networkConfig"] = spec["networkConfig"] + specCopy["leaderWorkerTemplate"] = spec["leaderWorkerTemplate"].(map[string]interface{}) + specCopy["$patch"] = "replace" + objCopy["spec"] = specCopy + if err != nil { + return nil, err + } + return json.Marshal(objCopy) +} + +func CreateLeaderWorkerSetRevision( + ctx context.Context, + k8sClient client.Client, + lws *leaderworkerset.LeaderWorkerSet, + templateHash string) error { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) + ctx = ctrl.LoggerInto(ctx, log) + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.SetNameLabelKey: lws.Name, + }}) + if err != nil { + return err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + log.Error(err, "Listing all controller revisions") + return err + } + + currentRevision, err := NewRevision(lws, NextRevision(revisions), templateHash) + if err != nil { + log.Error(err, "Creating new revision for lws") + return err + } + + _, err = controllerHistory.CreateControllerRevision(lws, currentRevision) + log.V(2).Info("Created new controller revision") + if err != nil { + log.Error(err, "Creating new controller revision for lws") + return err + } + + return nil +} + +// newRevision creates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. +// The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned +// ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set +// to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. +func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, templateHash string) (*appsv1.ControllerRevision, error) { + patch, err := GetPatch(lws) + if err != nil { + return nil, err + } + + return history.NewControllerRevision(lws, + controllerKind, + map[string]string{ + leaderworkerset.TemplateRevisionHashKey: templateHash, + leaderworkerset.SetNameLabelKey: lws.Name, + }, + runtime.RawExtension{Raw: patch}, + revision) +} + +// ApplyRevision returns a new LeaderWorkerSet constructed by restoring the state in revision to set. If the returned error +// is nil, the returned LeaderWorkerSet is valid. +func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*leaderworkerset.LeaderWorkerSet, error) { + // clone := lws.DeepCopy() + str := &bytes.Buffer{} + err := unstructured.UnstructuredJSONScheme.Encode(lws, str) + if err != nil { + return nil, err + } + patched, err := strategicpatch.StrategicMergePatch(str.Bytes(), revision.Data.Raw, lws) + if err != nil { + return nil, err + } + restoredLws := &leaderworkerset.LeaderWorkerSet{} + err = json.Unmarshal(patched, restoredLws) + if err != nil { + return nil, err + } + return restoredLws, nil +} + +// nextRevision finds the next valid revision number based on revisions. If the length of revisions +// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method +// assumes that revisions has been sorted by Revision. +func NextRevision(revisions []*appsv1.ControllerRevision) int64 { + count := len(revisions) + if count <= 0 { + return 1 + } + + max := int64(1) + for _, revision := range revisions { + if max < revision.Revision { + max = revision.Revision + } + } + return max + 1 +} + +// TruncateHistory cleans up all other controller revisions except the currentRevision. +// currentRevision is the one that matches the templateHash that is passed +func TruncateHistory(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { + controllerHistory := history.NewHistory(ctx, k8sClient) + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.SetNameLabelKey: lws.Name, + }}) + if err != nil { + return err + } + revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + if err != nil { + return err + } + + for i, revision := range revisions { + if revision.Labels[leaderworkerset.TemplateRevisionHashKey] != templateHash { + if err := controllerHistory.DeleteControllerRevision(revisions[i]); err != nil { + return err + } + } + } + return nil +} + +func EqualLeaderWorkerTemplates(lhs *leaderworkerset.LeaderWorkerSet, rhs *leaderworkerset.LeaderWorkerSet) bool { + if !reflect.DeepEqual(lhs.Spec.LeaderWorkerTemplate, rhs.Spec.LeaderWorkerTemplate) { + return false + } + if (lhs.Spec.NetworkConfig == nil || string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) && (rhs.Spec.NetworkConfig == nil || string(*rhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) { + return true + } + + if lhs.Spec.NetworkConfig == nil || rhs.Spec.NetworkConfig == nil { + return false + } + + return string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(*rhs.Spec.NetworkConfig.SubdomainPolicy) +} + +// Sha1Hash accepts an input string and returns the 40 character SHA1 hash digest of the input string. +func Sha1Hash(s string) string { + h := sha1.New() + h.Write([]byte(s)) + return hex.EncodeToString(h.Sum(nil)) +} + +func LeaderWorkerTemplateHash(lws *leaderworkerset.LeaderWorkerSet) string { + if lws.Spec.NetworkConfig == nil || string(*lws.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared) { + return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String()) + } + + return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String() + string(*lws.Spec.NetworkConfig.SubdomainPolicy)) +} diff --git a/pkg/utils/controller/controller_utils_test.go b/pkg/utils/revision/revision_utils_test.go similarity index 82% rename from pkg/utils/controller/controller_utils_test.go rename to pkg/utils/revision/revision_utils_test.go index 65ed288b..e6f341ff 100644 --- a/pkg/utils/controller/controller_utils_test.go +++ b/pkg/utils/revision/revision_utils_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package controller +package revision import ( "testing" @@ -28,7 +28,7 @@ import ( func TestApplyRevision(t *testing.T) { lws := testutils.BuildLeaderWorkerSet("default").Obj() - revision, err := NewRevision(lws, 1, "") + revision, err := NewRevision(lws, 1, LeaderWorkerTemplateHash(lws)) currentLws := lws.DeepCopy() if err != nil { t.Fatal(err) @@ -44,7 +44,7 @@ func TestApplyRevision(t *testing.T) { t.Fatal(err) } - restoredRevision, err := NewRevision(restoredLws, 2, "") + restoredRevision, err := NewRevision(restoredLws, 2, LeaderWorkerTemplateHash(restoredLws)) if err != nil { t.Fatal(err) } @@ -57,7 +57,7 @@ func TestApplyRevision(t *testing.T) { t.Errorf("unexpected restored LeaderWorkerTemplate: %s", diff) } - if diff := cmp.Diff(currentLws, restoredLws); diff != "" { - t.Errorf("LWS Spec fields should not be restored") + if diff := cmp.Diff(currentLws.Spec.NetworkConfig, restoredLws.Spec.NetworkConfig); diff != "" { + t.Errorf("NetworkConfig should be restored %s", diff) } } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index fc2fbad4..f3c4f91b 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -47,31 +47,6 @@ func NonZeroValue(value int32) int32 { return value } -func EqualLeaderWorkerTemplates(lhs *leaderworkerset.LeaderWorkerSet, rhs *leaderworkerset.LeaderWorkerSet) bool { - if !reflect.DeepEqual(lhs.Spec.LeaderWorkerTemplate, rhs.Spec.LeaderWorkerTemplate) { - return false - } - if (lhs.Spec.NetworkConfig == nil || string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) && (rhs.Spec.NetworkConfig == nil || string(*rhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) { - return true - } - - if lhs.Spec.NetworkConfig == nil || rhs.Spec.NetworkConfig == nil { - return false - } - - return string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(*rhs.Spec.NetworkConfig.SubdomainPolicy) -} - -func LeaderWorkerTemplateHash(lws *leaderworkerset.LeaderWorkerSet) string { - if lws.Spec.NetworkConfig == nil || string(*lws.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared) { - return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String()) - } - - return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String() + string(*lws.Spec.NetworkConfig.SubdomainPolicy)) -} - // SortByIndex returns an ascending list, the length of the list is always specified by the parameter. func SortByIndex[T appsv1.StatefulSet | corev1.Pod | int](indexFunc func(T) (int, error), items []T, length int) []T { result := make([]T, length) diff --git a/pkg/webhooks/pod_webhook.go b/pkg/webhooks/pod_webhook.go index adbafa82..2a788808 100644 --- a/pkg/webhooks/pod_webhook.go +++ b/pkg/webhooks/pod_webhook.go @@ -28,9 +28,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/utils" acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" podutils "sigs.k8s.io/lws/pkg/utils/pod" + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset" ) @@ -174,7 +174,7 @@ func (p *PodWebhook) Default(ctx context.Context, obj runtime.Object) error { } func genGroupUniqueKey(ns string, podName string) string { - return utils.Sha1Hash(fmt.Sprintf("%s/%s", ns, podName)) + return revisionutils.Sha1Hash(fmt.Sprintf("%s/%s", ns, podName)) } // SetExclusiveAffinities set the pod affinity/anti-affinity diff --git a/test/testutils/util.go b/test/testutils/util.go index 08ca1d21..c56377e2 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -16,27 +16,24 @@ package testutils import ( "context" + "crypto/sha1" + "encoding/hex" "errors" "fmt" - "hash" - "hash/fnv" "strconv" - "github.com/davecgh/go-spew/spew" "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/rand" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/utils" acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" ) @@ -137,7 +134,7 @@ func CreateLeaderPods(ctx context.Context, leaderSts appsv1.StatefulSet, k8sClie leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(0), leaderworkerset.GroupIndexLabelKey: strconv.Itoa(i), leaderworkerset.GroupUniqueHashLabelKey: "randomValue", - leaderworkerset.TemplateRevisionHashKey: utils.LeaderWorkerTemplateHash(lws), + leaderworkerset.TemplateRevisionHashKey: leaderWorkerTemplateHash(lws), }, Annotations: map[string]string{ leaderworkerset.SizeAnnotationKey: strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)), @@ -166,7 +163,7 @@ func ExpectValidPods(ctx context.Context, k8sClient client.Client, lws *leaderwo return err } - hash := utils.LeaderWorkerTemplateHash(lws) + hash := leaderWorkerTemplateHash(lws) labelSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.TemplateRevisionHashKey: hash, @@ -253,7 +250,7 @@ func SetLeaderPodToReady(ctx context.Context, k8sClient client.Client, podName s if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: lws.Namespace, Name: lws.Name}, lws); err != nil { return err } - hash := utils.LeaderWorkerTemplateHash(lws) + hash := leaderWorkerTemplateHash(lws) leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] = hash return k8sClient.Update(ctx, &leaderPod) @@ -544,33 +541,6 @@ func SetLeaderPodsToReady(ctx context.Context, k8sClient client.Client, lws *lea }, Timeout, Interval).Should(gomega.Succeed()) } -func CreateControllerRevisionForHashCollision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { - parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") - controllerRevisionHashLabel := "controller.kubernetes.io/hash" - labels := lws.Labels - if lws.Labels == nil { - labels = make(map[string]string) - } - cr := &appsv1.ControllerRevision{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(lws, parentKind)}, - Namespace: lws.GetNamespace(), - }, - // Data: RawLWSTemplate(lws), - Revision: 1, - } - hash := hashControllerRevision(cr) - cr.Name = controllerRevisionName(lws.GetName(), hash) - cr.Labels[controllerRevisionHashLabel] = hash - // Change the lws that is used for the data, This creates a controller revision - // with same name but different contents, triggering a hash collision - modifiedLws := lws.DeepCopy() - modifiedLws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "hash-collision" - // cr.Data = RawLWSTemplate(modifiedLws) - gomega.Expect(k8sClient.Create(ctx, cr)).Should(gomega.Succeed()) -} - func deleteWorkerStatefulSetIfExists(ctx context.Context, k8sClient client.Client, statefulsetName string, lws *leaderworkerset.LeaderWorkerSet) { // in cases where size = 1, the workerstatefulset does not exist gomega.Eventually(func() error { @@ -585,31 +555,20 @@ func deleteWorkerStatefulSetIfExists(ctx context.Context, k8sClient client.Clien }, Timeout, Interval).Should(gomega.Succeed()) } -func hashControllerRevision(revision *appsv1.ControllerRevision) string { - hf := fnv.New32() - if len(revision.Data.Raw) > 0 { - hf.Write(revision.Data.Raw) - } - if revision.Data.Object != nil { - deepHashObject(hf, revision.Data.Object) - } - return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) -} - -func controllerRevisionName(prefix string, hash string) string { - return fmt.Sprintf("%s-%s", prefix, hash) +// sha1Hash accepts an input string and returns the 40 character SHA1 hash digest of the input string. +func sha1Hash(s string) string { + h := sha1.New() + h.Write([]byte(s)) + return hex.EncodeToString(h.Sum(nil)) } -func deepHashObject(hasher hash.Hash, objectToWrite interface{}) { - hasher.Reset() - printer := spew.ConfigState{ - Indent: " ", - SortKeys: true, - DisableMethods: true, - SpewKeys: true, - } - _, err := printer.Fprintf(hasher, "%#v", objectToWrite) - if err != nil { - return +// added to avoid import cycle between testutils, pkg/history, and pkg/utils/revision +func leaderWorkerTemplateHash(lws *leaderworkerset.LeaderWorkerSet) string { + if lws.Spec.NetworkConfig == nil || string(*lws.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared) { + return sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String()) } + + return sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String() + string(*lws.Spec.NetworkConfig.SubdomainPolicy)) } diff --git a/test/testutils/validators.go b/test/testutils/validators.go index a72fa94b..d13b975a 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -33,7 +33,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/utils" statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset" ) @@ -150,7 +149,7 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, if sts.Spec.Template.Labels[leaderworkerset.SetNameLabelKey] == "" { return fmt.Errorf("leader statefulset pod template misses leaderworkerset label") } - hash := utils.LeaderWorkerTemplateHash(&lws) + hash := leaderWorkerTemplateHash(&lws) if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { return fmt.Errorf("mismatch template revision hash for leader statefulset, got: %s, want: %s", sts.Spec.Template.Labels[leaderworkerset.TemplateRevisionHashKey], hash) } @@ -182,7 +181,7 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, if diff := cmp.Diff(sts.Spec.Template.Labels, map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.WorkerIndexLabelKey: "0", - leaderworkerset.TemplateRevisionHashKey: utils.LeaderWorkerTemplateHash(&lws), + leaderworkerset.TemplateRevisionHashKey: leaderWorkerTemplateHash(&lws), }); diff != "" { return errors.New("leader StatefulSet pod template doesn't have the correct labels: " + diff) } @@ -271,7 +270,7 @@ func ExpectValidWorkerStatefulSets(ctx context.Context, leaderWorkerSet *leaderw if lws.Annotations[leaderworkerset.ExclusiveKeyAnnotationKey] != sts.Spec.Template.Annotations[leaderworkerset.ExclusiveKeyAnnotationKey] { return fmt.Errorf("mismatch exclusive placement annotation between worker statefulset and leaderworkerset") } - hash := utils.LeaderWorkerTemplateHash(&lws) + hash := leaderWorkerTemplateHash(&lws) if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { return fmt.Errorf("mismatch template revision hash for worker statefulset, got: %s, want: %s", sts.Labels[leaderworkerset.TemplateRevisionHashKey], hash) } diff --git a/test/testutils/wrappers.go b/test/testutils/wrappers.go index f4d0d283..47635dd5 100644 --- a/test/testutils/wrappers.go +++ b/test/testutils/wrappers.go @@ -15,16 +15,12 @@ limitations under the License. package testutils import ( - "bytes" - "encoding/json" "fmt" "strconv" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -294,6 +290,7 @@ func MakeLeaderPodSpecWithTPUResource() corev1.PodSpec { } } +/* func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet) runtime.RawExtension { clone := lws.DeepCopy() str := &bytes.Buffer{} @@ -314,3 +311,4 @@ func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet) runtime.RawExtension { } return runtime.RawExtension{Raw: patch} } +*/ From 8d3da1983f9fc2e41dcf5f7e7cdd4bfa88ac800c Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Mon, 23 Dec 2024 19:51:17 +0000 Subject: [PATCH 12/27] rebased --- pkg/utils/utils.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index f3c4f91b..faedd0c2 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -20,13 +20,10 @@ import ( "crypto/sha1" "encoding/hex" "os" - "reflect" "strings" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - - leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" ) const ( From 5fa3b3422785b3047b7d186d9af12879cc825003 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Mon, 23 Dec 2024 20:21:53 +0000 Subject: [PATCH 13/27] fixed failing tests and lint error --- pkg/controllers/leaderworkerset_controller.go | 4 +++- .../leaderworkerset_controller_test.go | 18 ++++++++++++------ pkg/utils/revision/revision_utils.go | 16 +++++++--------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 97111d85..4baf6c59 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -410,7 +410,9 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) - revisionutils.TruncateHistory(ctx, r.Client, lws, templateHash) + if err := revisionutils.TruncateHistory(ctx, r.Client, lws, templateHash); err != nil { + return false, err + } } else { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetProgressing)) } diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index 44a2843e..c807a061 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -44,11 +44,13 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { tests := []struct { name string + templateHash string lws *leaderworkerset.LeaderWorkerSet wantApplyConfig *appsapplyv1.StatefulSetApplyConfiguration }{ { - name: "1 replica, size 1, with empty leader template, exclusive placement disabled", + name: "1 replica, size 1, with empty leader template, exclusive placement disabled", + templateHash: hash2, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). Replica(1). RolloutStrategy(leaderworkerset.RolloutStrategy{ @@ -113,7 +115,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "1 replica, size 2 , with empty leader template, exclusive placement enabled", + name: "1 replica, size 2 , with empty leader template, exclusive placement enabled", + templateHash: hash2, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). Annotation(map[string]string{ "leaderworkerset.sigs.k8s.io/exclusive-topology": "topologyKey", @@ -181,7 +184,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "2 replica, size 2, with leader template, exclusive placement enabled", + name: "2 replica, size 2, with leader template, exclusive placement enabled", + templateHash: hash1, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Annotation(map[string]string{ "leaderworkerset.sigs.k8s.io/exclusive-topology": "topologyKey", }).Replica(2). @@ -248,7 +252,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "2 maxUnavailable, 1 maxSurge, with empty leader template, exclusive placement disabled", + name: "2 maxUnavailable, 1 maxSurge, with empty leader template, exclusive placement disabled", + templateHash: hash2, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). Replica(1). RolloutStrategy(leaderworkerset.RolloutStrategy{ @@ -314,7 +319,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "1 replica, size 2, with leader template, exclusive placement enabled, subgroupsize enabled", + name: "1 replica, size 2, with leader template, exclusive placement enabled, subgroupsize enabled", + templateHash: hash1, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Annotation(map[string]string{ leaderworkerset.SubGroupExclusiveKeyAnnotationKey: "topologyKey", }).SubGroupSize(2).Replica(1). @@ -384,7 +390,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - stsApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(tc.lws, 0, *tc.lws.Spec.Replicas, "") + stsApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(tc.lws, 0, *tc.lws.Spec.Replicas, tc.templateHash) if err != nil { t.Errorf("failed with error: %s", err.Error()) } diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 94e982d3..d295f380 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -74,18 +74,20 @@ func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, l // getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a // previous version. If the returned error is nil the patch is valid. The current state that we save is the -// leaderWorkerTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously -// recorded patches. +// leaderWorkerTemplate and NetworkConfig. We can modify this later to encompass more state (or less) and +// remain compatible with previously recorded patches. func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { str := &bytes.Buffer{} clone := lws.DeepCopy() err := unstructured.UnstructuredJSONScheme.Encode(clone, str) - if err != nil { + if err := unstructured.UnstructuredJSONScheme.Encode(clone, str); err != nil { return nil, err } var raw map[string]interface{} - err = json.Unmarshal(str.Bytes(), &raw) + if err = json.Unmarshal(str.Bytes(), &raw); err != nil { + return nil, err + } objCopy := make(map[string]interface{}) specCopy := make(map[string]interface{}) spec := raw["spec"].(map[string]interface{}) @@ -93,9 +95,6 @@ func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { specCopy["leaderWorkerTemplate"] = spec["leaderWorkerTemplate"].(map[string]interface{}) specCopy["$patch"] = "replace" objCopy["spec"] = specCopy - if err != nil { - return nil, err - } return json.Marshal(objCopy) } @@ -169,8 +168,7 @@ func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.Contro return nil, err } restoredLws := &leaderworkerset.LeaderWorkerSet{} - err = json.Unmarshal(patched, restoredLws) - if err != nil { + if err = json.Unmarshal(patched, restoredLws); err != nil { return nil, err } return restoredLws, nil From 6a29f29b8a1d4aa34f3a8116f0c15fc020bfe8e3 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Mon, 23 Dec 2024 20:35:41 +0000 Subject: [PATCH 14/27] fixed lint, again --- pkg/utils/revision/revision_utils.go | 3 +-- pkg/utils/revision/revision_utils_test.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index d295f380..031ad98c 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -80,12 +80,11 @@ func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, l func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { str := &bytes.Buffer{} clone := lws.DeepCopy() - err := unstructured.UnstructuredJSONScheme.Encode(clone, str) if err := unstructured.UnstructuredJSONScheme.Encode(clone, str); err != nil { return nil, err } var raw map[string]interface{} - if err = json.Unmarshal(str.Bytes(), &raw); err != nil { + if err := json.Unmarshal(str.Bytes(), &raw); err != nil { return nil, err } objCopy := make(map[string]interface{}) diff --git a/pkg/utils/revision/revision_utils_test.go b/pkg/utils/revision/revision_utils_test.go index e6f341ff..87e9b72f 100644 --- a/pkg/utils/revision/revision_utils_test.go +++ b/pkg/utils/revision/revision_utils_test.go @@ -29,10 +29,10 @@ func TestApplyRevision(t *testing.T) { lws := testutils.BuildLeaderWorkerSet("default").Obj() revision, err := NewRevision(lws, 1, LeaderWorkerTemplateHash(lws)) - currentLws := lws.DeepCopy() if err != nil { t.Fatal(err) } + currentLws := lws.DeepCopy() lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" subdomainPolicy := leaderworkerset.SubdomainUniquePerReplica From e9d61310011fb6cec45a92c6c474159be16a5c76 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Thu, 26 Dec 2024 21:59:23 +0000 Subject: [PATCH 15/27] refactored revision code, added fix for PodGroupRestart bug and an e2e test --- Dockerfile | 1 - pkg/controllers/leaderworkerset_controller.go | 65 ++--- .../leaderworkerset_controller_test.go | 22 +- pkg/controllers/pod_controller.go | 7 +- pkg/controllers/pod_controller_test.go | 15 +- pkg/history/controller_history.go | 209 --------------- pkg/history/controller_history_test.go | 90 ------- pkg/utils/revision/revision_utils.go | 251 +++++++++++++----- pkg/utils/revision/revision_utils_test.go | 69 ++++- test/e2e/e2e_test.go | 27 ++ test/testutils/util.go | 77 ++++-- test/testutils/validators.go | 18 +- test/testutils/wrappers.go | 23 -- 13 files changed, 387 insertions(+), 487 deletions(-) delete mode 100644 pkg/history/controller_history.go delete mode 100644 pkg/history/controller_history_test.go diff --git a/Dockerfile b/Dockerfile index fc98835a..da2ff121 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,6 @@ RUN go mod download COPY cmd/main.go cmd/main.go COPY api/ api/ COPY pkg/controllers/ pkg/controllers/ -COPY pkg/history/ pkg/history/ COPY pkg/cert/ pkg/cert/ COPY pkg/webhooks/ pkg/webhooks/ COPY pkg/utils pkg/utils diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 4baf6c59..f74fa40a 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -106,25 +106,25 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } - if err := r.createControllerRevisionIfNonExist(ctx, leaderSts, lws); err != nil { + revision, err := r.createControllerRevisionIfNonExist(ctx, leaderSts, lws) + if err != nil { log.Error(err, "Creating controller revision") return ctrl.Result{}, err } - lwsUpdated, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws) + lwsUpdated, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) if err != nil { log.Error(err, "Validating if LWS has been updated") return ctrl.Result{}, err } - templateHash := getLeaderWorkerTemplateHash(leaderSts, lws, lwsUpdated) - partition, replicas, err := r.rollingUpdateParameters(ctx, lws, leaderSts, lwsUpdated) + partition, replicas, err := r.rollingUpdateParameters(ctx, lws, leaderSts, revision, lwsUpdated) if err != nil { log.Error(err, "Rolling partition error") return ctrl.Result{}, err } - if err := r.SSAWithStatefulset(ctx, lws, partition, replicas, templateHash); err != nil { + if err := r.SSAWithStatefulset(ctx, lws, partition, replicas, revision.Labels[leaderworkerset.TemplateRevisionHashKey]); err != nil { return ctrl.Result{}, err } @@ -136,7 +136,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } - err = r.updateStatus(ctx, lws, templateHash) + err = r.updateStatus(ctx, lws, revision.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { return ctrl.Result{}, err } @@ -212,7 +212,7 @@ func SetupIndexes(indexer client.FieldIndexer) error { // - Otherwise, Replicas is equal to spec.Replicas // - One exception here is when unready replicas of leaderWorkerSet is equal to MaxSurge, // we should reclaim the extra replicas gradually to accommodate for the new replicas. -func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, leaderWorkerSetUpdated bool) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, revision *appsv1.ControllerRevision, leaderWorkerSetUpdated bool) (int32, int32, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) lwsReplicas := *lws.Spec.Replicas @@ -261,7 +261,7 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, return 0, lwsReplicas, nil } - continuousReadyReplicas, lwsUnreadyReplicas, err := r.iterateReplicas(ctx, lws, stsReplicas) + continuousReadyReplicas, lwsUnreadyReplicas, err := r.iterateReplicas(ctx, lws, stsReplicas, revision) if err != nil { return 0, 0, err } @@ -296,11 +296,6 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, templateHash string) error { log := ctrl.LoggerFrom(ctx) - if err := revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, templateHash); err != nil { - log.Error(err, "Creating LWS Revision") - return err - } - // construct the statefulset apply configuration leaderStatefulSetApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(lws, partition, replicas, templateHash) if err != nil { @@ -480,7 +475,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade // - The first value represents the number of continuous ready replicas ranging from the last index to 0, // to help us judge whether we can update the Partition or not. // - The second value represents the unready replicas whose index is smaller than leaderWorkerSet Replicas. -func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, stsReplicas int32) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, stsReplicas int32, revision *appsv1.ControllerRevision) (int32, int32, error) { podSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.WorkerIndexLabelKey: "0", @@ -507,7 +502,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return strconv.Atoi(sts.Labels[leaderworkerset.GroupIndexLabelKey]) }, stsList.Items, int(stsReplicas)) - templateHash := revisionutils.LeaderWorkerTemplateHash(lws) + templateHash := revision.Labels[leaderworkerset.TemplateRevisionHashKey] // Once size==1, no worker statefulSets will be created. noWorkerSts := *lws.Spec.LeaderWorkerTemplate.Size == 1 processReplica := func(index int32) (ready bool) { @@ -562,55 +557,37 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw return sts, nil } -// Creates a Controller Revision if the leader statefulset exists but no revisions have been created yet. This happens when updating from a version that doesn't -// support controller revision -func (r *LeaderWorkerSetReconciler) createControllerRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) error { - +func (r *LeaderWorkerSetReconciler) createControllerRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { if sts == nil { - return nil + return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, "") } - existingControllerRevisions, err := revisionutils.ExistingControllerRevisions(ctx, r.Client, lws) + stsRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { - return err + return nil, err } - if !existingControllerRevisions { + if stsRevision == nil { return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) } - return nil + return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, "") } -func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { - +func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (bool, error) { if sts == nil { return false, nil } - controllerRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) - if err != nil { - return false, err - } - baselineLws, err := revisionutils.ApplyRevision(lws, controllerRevision) + stsRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { return false, err } - return !revisionutils.EqualLeaderWorkerTemplates(baselineLws, lws), nil -} - -// templateHash is not a reliable way to determine whether or not an lws object has been updated as seen in https://github.com/kubernetes-sigs/lws/issues/281 -// If a leader sts already exists, but the template has not been updated, the templateHash of the leader is used to keep consistency in cases where two -// different templateHashes are calculated from the same LWS object -func getLeaderWorkerTemplateHash(sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, leaderWorkerSetUpdated bool) string { - - if sts != nil { - if !leaderWorkerSetUpdated { - return sts.Labels[leaderworkerset.TemplateRevisionHashKey] - } + if stsRevision == nil { + return false, fmt.Errorf("did not find a revision for the existing leader sts") } - return revisionutils.LeaderWorkerTemplateHash(lws) + return !revisionutils.EqualRevision(stsRevision, revision), nil } // constructLeaderStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index c807a061..917f99f7 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -23,6 +23,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" appsapplyv1 "k8s.io/client-go/applyconfigurations/apps/v1" coreapplyv1 "k8s.io/client-go/applyconfigurations/core/v1" @@ -36,11 +37,24 @@ import ( ) func TestLeaderStatefulSetApplyConfig(t *testing.T) { - hash1 := revisionutils.LeaderWorkerTemplateHash(testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). + parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") + lws1 := testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). LeaderTemplateSpec(testutils.MakeLeaderPodSpec()). - WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj()) - hash2 := revisionutils.LeaderWorkerTemplateHash(testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). - WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj()) + WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj() + patch, err := revisionutils.GetPatch(lws1) + if err != nil { + t.Fatal(err) + } + cr1 := revisionutils.NewControllerRevision(lws1, parentKind, lws1.Labels, runtime.RawExtension{Raw: patch}, 1) + hash1 := cr1.Labels[leaderworkerset.TemplateRevisionHashKey] + lws2 := testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). + WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj() + patch, err = revisionutils.GetPatch(lws2) + if err != nil { + t.Fatal(err) + } + cr2 := revisionutils.NewControllerRevision(lws2, parentKind, lws2.Labels, runtime.RawExtension{Raw: patch}, 1) + hash2 := cr2.Labels[leaderworkerset.TemplateRevisionHashKey] tests := []struct { name string diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index 3d201458..528e57bb 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -119,12 +119,12 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - currentRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) + revision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { log.Error(err, "Getting lws revisions") return ctrl.Result{}, err } - statefulSet, err := constructWorkerStatefulSetApplyConfiguration(pod, leaderWorkerSet, currentRevision) + statefulSet, err := constructWorkerStatefulSetApplyConfiguration(pod, leaderWorkerSet, revision) if err != nil { return ctrl.Result{}, err } @@ -185,6 +185,9 @@ func (r *PodReconciler) handleRestartPolicy(ctx context.Context, pod corev1.Pod, if err := r.Get(ctx, types.NamespacedName{Name: leaderPodName, Namespace: pod.Namespace}, &leader); err != nil { return false, err } + if leader.Labels[leaderworkerset.TemplateRevisionHashKey] != pod.Labels[leaderworkerset.TemplateRevisionHashKey] { + return false, nil + } } else { leader = pod } diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index 41e52730..c0eb3742 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -29,7 +29,6 @@ import ( metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/ptr" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/history" revisionutils "sigs.k8s.io/lws/pkg/utils/revision" testutils "sigs.k8s.io/lws/test/testutils" ) @@ -37,25 +36,19 @@ import ( func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") lws := testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Replica(1).WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Size(1).Obj() - updateTemplateHash := revisionutils.LeaderWorkerTemplateHash(lws) patch, err := revisionutils.GetPatch(lws) if err != nil { t.Fatal(err) } - updateRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 1) - if err != nil { - t.Fatal(err) - } + updateRevision := revisionutils.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 1) + updateTemplateHash := updateRevision.Labels[leaderworkerset.TemplateRevisionHashKey] lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" patch, err = revisionutils.GetPatch(lws) if err != nil { t.Fatal(err) } - currentRevision, err := history.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 2) - if err != nil { - t.Fatal(err) - } - currentTemplateHash := revisionutils.LeaderWorkerTemplateHash(lws) + currentRevision := revisionutils.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 2) + currentTemplateHash := currentRevision.Labels[leaderworkerset.TemplateRevisionHashKey] tests := []struct { name string diff --git a/pkg/history/controller_history.go b/pkg/history/controller_history.go deleted file mode 100644 index 674f28ea..00000000 --- a/pkg/history/controller_history.go +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go - -package history - -import ( - "bytes" - "context" - "fmt" - "hash" - "hash/fnv" - "strconv" - - "github.com/davecgh/go-spew/spew" - appsv1 "k8s.io/api/apps/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - - apiequality "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/util/rand" -) - -// ControllerRevisionHashLabel is the label used to indicate the hash value of a ControllerRevision's Data. -const ControllerRevisionHashLabel = "controller.kubernetes.io/hash" - -// ControllerRevisionName returns the Name for a ControllerRevision in the form prefix-hash. If the length -// of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. -func ControllerRevisionName(prefix string, hash string) string { - if len(prefix) > 223 { - prefix = prefix[:223] - } - - return fmt.Sprintf("%s-%s", prefix, hash) -} - -// NewControllerRevision returns a ControllerRevision with a ControllerRef pointing to parent and indicating that -// parent is of parentKind. The ControllerRevision has labels matching template labels, contains Data equal to data, and -// has a Revision equal to revision. If the returned error is nil, the returned ControllerRevision is valid. If the -// returned error is not nil, the returned ControllerRevision is invalid for use. -func NewControllerRevision(parent metav1.Object, - parentKind schema.GroupVersionKind, - templateLabels map[string]string, - data runtime.RawExtension, - revision int64) (*appsv1.ControllerRevision, error) { - labelMap := make(map[string]string) - for k, v := range templateLabels { - labelMap[k] = v - } - cr := &appsv1.ControllerRevision{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labelMap, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(parent, parentKind)}, - Namespace: parent.GetNamespace(), - }, - Data: data, - Revision: revision, - } - hash := HashControllerRevision(cr) - cr.Name = ControllerRevisionName(parent.GetName(), hash) - cr.Labels[ControllerRevisionHashLabel] = hash - return cr, nil -} - -// HashControllerRevision hashes the contents of revision's Data using FNV hashing. -// The returned hash will be a safe encoded string to avoid bad words. -func HashControllerRevision(revision *appsv1.ControllerRevision) string { - hf := fnv.New32() - if len(revision.Data.Raw) > 0 { - hf.Write(revision.Data.Raw) - } - if revision.Data.Object != nil { - DeepHashObject(hf, revision.Data.Object) - } - return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) -} - -// EqualRevision returns true if lhs and rhs are either both nil, or both point to non-nil ControllerRevisions that -// contain semantically equivalent data. Otherwise this method returns false. -func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { - var lhsHash, rhsHash *uint32 - - if lhs.Labels[leaderworkerset.TemplateRevisionHashKey] == rhs.Labels[leaderworkerset.TemplateRevisionHashKey] { - return true - } - - if lhs == nil || rhs == nil { - return lhs == rhs - } - if hs, found := lhs.Labels[ControllerRevisionHashLabel]; found { - hash, err := strconv.ParseInt(hs, 10, 32) - if err == nil { - lhsHash = new(uint32) - *lhsHash = uint32(hash) - } - } - if hs, found := rhs.Labels[ControllerRevisionHashLabel]; found { - hash, err := strconv.ParseInt(hs, 10, 32) - if err == nil { - rhsHash = new(uint32) - *rhsHash = uint32(hash) - } - } - if lhsHash != nil && rhsHash != nil && *lhsHash != *rhsHash { - return false - } - return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) -} - -type realHistory struct { - client.Client - context context.Context -} - -// NewHistory returns an instance of Interface that uses client to communicate with the API Server and lister to list -// ControllerRevisions. This method should be used to create an Interface for all scenarios other than testing. -func NewHistory(context context.Context, k8sclient client.Client) *realHistory { - return &realHistory{k8sclient, context} -} - -// ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other -// controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the -// returned error is not nil, the returned slice is not valid. -func (rh *realHistory) ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { - // List all revisions in the namespace that match the selector - revisionList := new(appsv1.ControllerRevisionList) - err := rh.List(rh.context, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) - if err != nil { - return nil, err - } - history := revisionList.Items - var owned []*appsv1.ControllerRevision - for i := range history { - ref := metav1.GetControllerOfNoCopy(&history[i]) - if ref == nil || ref.UID == parent.GetUID() { - owned = append(owned, &history[i]) - } - - } - return owned, err -} - -// CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. Implementations may -// cease to attempt to retry creation after some number of attempts and return an error. If the returned -// error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been -// created. -func (rh *realHistory) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - ns := parent.GetNamespace() - err := rh.Create(rh.context, revision) - if errors.IsAlreadyExists(err) { - exists := &appsv1.ControllerRevision{} - err := rh.Get(rh.context, types.NamespacedName{Namespace: ns, Name: revision.Name}, exists) - if err != nil { - return nil, err - } - if bytes.Equal(exists.Data.Raw, revision.Data.Raw) { - return exists, nil - } else { - // Since the contents of the revision are used to create the hash, the only way this - // happens is if the contents of the revision were changed, which is unintended behavior - return nil, fmt.Errorf("controller Revision with same name but different content exists") - } - } - return revision, err -} - -// DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. -func (rh *realHistory) DeleteControllerRevision(revision *appsv1.ControllerRevision) error { - return rh.Delete(rh.context, revision) -} - -func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { - hasher.Reset() - printer := spew.ConfigState{ - Indent: " ", - SortKeys: true, - DisableMethods: true, - SpewKeys: true, - } - _, err := printer.Fprintf(hasher, "%#v", objectToWrite) - if err != nil { - return - } -} - -func GenerateDeleteOwnerRefStrategicMergeBytes(revisionUID types.UID, parentUID types.UID) []byte { - return []byte(fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, revisionUID, parentUID)) -} diff --git a/pkg/history/controller_history_test.go b/pkg/history/controller_history_test.go deleted file mode 100644 index 0ba0139f..00000000 --- a/pkg/history/controller_history_test.go +++ /dev/null @@ -1,90 +0,0 @@ -package history - -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - - -package history - -import ( - "testing" - - apps "k8s.io/api/apps/v1" - "sigs.k8s.io/lws/test/testutils" -) - -var parentKind = apps.SchemeGroupVersion.WithKind("LeaderWorkerSet") - -func TestFindEqualRevisions(t *testing.T) { - lws1 := testutils.BuildLeaderWorkerSet("test-sample").Obj() - lws2 := testutils.BuildLeaderWorkerSet("test-sample").LeaderTemplateSpec(testutils.MakeLeaderPodSpecWithTPUResource()).Obj() - - lws1Revision, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1) - if err != nil { - t.Fatal(err) - } - - lws2Revision, err := NewControllerRevision(lws2, parentKind, lws2.Labels, testutils.RawLWSTemplate(lws2), 1) - if err != nil { - t.Fatal(err) - } - - lws1.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Name = "update-name" - lws1Revision2, err := NewControllerRevision(lws1, parentKind, lws1.Labels, testutils.RawLWSTemplate(lws1), 1) - if err != nil { - t.Fatal(err) - } - - tests := []struct { - name string - revision *apps.ControllerRevision - revisions []*apps.ControllerRevision - want map[string]bool - }{ - { - name: "finds nothing with no matches", - revision: lws1Revision, - revisions: []*apps.ControllerRevision{lws1Revision2, lws2Revision}, - want: map[string]bool{}, - }, - { - name: "finds nothing when empty", - revision: lws1Revision, - revisions: []*apps.ControllerRevision{}, - want: map[string]bool{}, - }, - { - name: "finds equivalent", - revision: lws1Revision, - revisions: []*apps.ControllerRevision{lws1Revision, lws1Revision2, lws2Revision}, - want: map[string]bool{lws1Revision.Name: true}, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - revisions := FindEqualRevisions(tc.revisions, tc.revision) - if len(revisions) != len(tc.want) { - t.Errorf("want %d revisions, got %d revisions", len(tc.want), len(revisions)) - } - for i := range revisions { - if !tc.want[revisions[i].Name] { - t.Errorf("Wanted: %s, got: %s", tc.revision.Name, revisions[i].Name) - } - } - }) - } -} -*/ diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 031ad98c..d7e9c998 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -7,30 +7,187 @@ import ( "encoding/hex" "encoding/json" "fmt" - "reflect" + "hash" + "hash/fnv" + "github.com/davecgh/go-spew/spew" appsv1 "k8s.io/api/apps/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/strategicpatch" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/history" ) -// Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ +// Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ and +// https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go // controllerKind contains the schema.GroupVersionKind for this controller type. var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") +// ControllerRevisionName returns the Name for a ControllerRevision in the form prefix-hash. If the length +// of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. +func ControllerRevisionName(prefix string, hash string) string { + if len(prefix) > 223 { + prefix = prefix[:223] + } + + return fmt.Sprintf("%s-%s", prefix, hash) +} + +// NewControllerRevision returns a ControllerRevision with a ControllerRef pointing to parent and indicating that +// parent is of parentKind. The ControllerRevision has labels matching template labels, contains Data equal to data, and +// has a Revision equal to revision. If the returned error is nil, the returned ControllerRevision is valid. If the +// returned error is not nil, the returned ControllerRevision is invalid for use. +func NewControllerRevision(parent metav1.Object, + parentKind schema.GroupVersionKind, + templateLabels map[string]string, + data runtime.RawExtension, + revision int64) *appsv1.ControllerRevision { + labelMap := make(map[string]string) + for k, v := range templateLabels { + labelMap[k] = v + } + cr := &appsv1.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelMap, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(parent, parentKind)}, + Namespace: parent.GetNamespace(), + }, + Data: data, + Revision: revision, + } + hash := HashControllerRevision(cr) + cr.Name = ControllerRevisionName(parent.GetName(), hash) + if cr.Labels[leaderworkerset.TemplateRevisionHashKey] == "" { + cr.Labels[leaderworkerset.TemplateRevisionHashKey] = hash + } + return cr +} + +// HashControllerRevision hashes the contents of revision's Data using FNV hashing. +// The returned hash will be a safe encoded string to avoid bad words. +func HashControllerRevision(revision *appsv1.ControllerRevision) string { + hf := fnv.New32() + if len(revision.Data.Raw) > 0 { + hf.Write(revision.Data.Raw) + } + if revision.Data.Object != nil { + DeepHashObject(hf, revision.Data.Object) + } + return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) +} + +// EqualRevision returns true if lhs and rhs are either both nil, if the templateRevisionHash is the same, +// or if they are semantically equivalent. +func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { + + if lhs == nil || rhs == nil { + return lhs == rhs + } + + if lhs.Labels[leaderworkerset.TemplateRevisionHashKey] == rhs.Labels[leaderworkerset.TemplateRevisionHashKey] { + return true + } + + return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) +} + +type History struct { + client.Client + context context.Context +} + +// ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other +// controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the +// returned error is not nil, the returned slice is not valid. +func (h *History) ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { + // List all revisions in the namespace that match the selector + revisionList := new(appsv1.ControllerRevisionList) + err := h.List(h.context, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) + if err != nil { + return nil, err + } + history := revisionList.Items + var owned []*appsv1.ControllerRevision + for i := range history { + ref := metav1.GetControllerOfNoCopy(&history[i]) + if ref == nil || ref.UID == parent.GetUID() { + owned = append(owned, &history[i]) + } + + } + return owned, err +} + +// CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. If the returned +// error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been +// created. +func (h *History) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { + ns := parent.GetNamespace() + err := h.Create(h.context, revision) + if errors.IsAlreadyExists(err) { + exists := &appsv1.ControllerRevision{} + err := h.Get(h.context, types.NamespacedName{Namespace: ns, Name: revision.Name}, exists) + if err != nil { + return nil, err + } + if bytes.Equal(exists.Data.Raw, revision.Data.Raw) { + return exists, nil + } else { + // Since the contents of the revision are used to create the hash, the only way this + // happens is if the contents of the revision were changed, which is unintended behavior + return nil, fmt.Errorf("controller Revision with same name but different content exists") + } + } + if err != nil { + return nil, err + } + // Fetched the controller revision that was created, in case the revision webhook modified it. + created := &appsv1.ControllerRevision{} + if err := h.Get(h.context, types.NamespacedName{Namespace: ns, Name: revision.Name}, created); err != nil { + return nil, err + } + return created, err +} + +// DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. +func (h *History) DeleteControllerRevision(revision *appsv1.ControllerRevision) error { + return h.Delete(h.context, revision) +} + +func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { + hasher.Reset() + printer := spew.ConfigState{ + Indent: " ", + SortKeys: true, + DisableMethods: true, + SpewKeys: true, + } + _, err := printer.Fprintf(hasher, "%#v", objectToWrite) + if err != nil { + return + } +} + +func GenerateDeleteOwnerRefStrategicMergeBytes(revisionUID types.UID, parentUID types.UID) []byte { + return []byte(fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, revisionUID, parentUID)) +} + func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(ctx, k8sClient) + controllerHistory := History{Client: k8sClient, context: ctx} selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.TemplateRevisionHashKey: templateHash, }}) @@ -44,42 +201,36 @@ func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient c } if len(revisions) == 0 { - return nil, fmt.Errorf("could not find LWS revision based on %s", templateHash) + return nil, nil } if len(revisions) > 1 { // Since we only create a controllerRevision when the template hash changes, only one should match - return nil, fmt.Errorf("found more than one revision matching templateHash %s", templateHash) + log.Error(err, "More than one revision exists for the given templateHash") + return revisions[len(revisions)-1], nil } return revisions[0], nil } -func ExistingControllerRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) (bool, error) { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) - ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(ctx, k8sClient) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - }}) - if err != nil { - return false, err - } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) - if err != nil { - return false, err - } - return len(revisions) > 0, nil -} - -// getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a +// GetPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a // previous version. If the returned error is nil the patch is valid. The current state that we save is the // leaderWorkerTemplate and NetworkConfig. We can modify this later to encompass more state (or less) and // remain compatible with previously recorded patches. - func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { str := &bytes.Buffer{} clone := lws.DeepCopy() + // When upgrading from an LWS version that doesn't contain NetworkConfig, NetworkConfig will be nil + // until another field in the LWS object is changed triggering the LWS webhook. This allows the revision + // to be the same before and after the LWS webhook actually defaults the value. + if clone.Spec.NetworkConfig == nil { + clone.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{} + subdomainPolicy := leaderworkerset.SubdomainShared + clone.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ + SubdomainPolicy: &subdomainPolicy, + } + } + if err := unstructured.UnstructuredJSONScheme.Encode(clone, str); err != nil { return nil, err } @@ -101,36 +252,29 @@ func CreateLeaderWorkerSetRevision( ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, - templateHash string) error { + templateHash string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := history.NewHistory(ctx, k8sClient) + controllerHistory := History{Client: k8sClient, context: ctx} selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, }}) if err != nil { - return err + return nil, err } revisions, err := controllerHistory.ListControllerRevisions(lws, selector) if err != nil { log.Error(err, "Listing all controller revisions") - return err + return nil, err } currentRevision, err := NewRevision(lws, NextRevision(revisions), templateHash) if err != nil { log.Error(err, "Creating new revision for lws") - return err - } - - _, err = controllerHistory.CreateControllerRevision(lws, currentRevision) - log.V(2).Info("Created new controller revision") - if err != nil { - log.Error(err, "Creating new controller revision for lws") - return err + return nil, err } - return nil + return controllerHistory.CreateControllerRevision(lws, currentRevision) } // newRevision creates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. @@ -143,14 +287,14 @@ func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, templateH return nil, err } - return history.NewControllerRevision(lws, + return NewControllerRevision(lws, controllerKind, map[string]string{ leaderworkerset.TemplateRevisionHashKey: templateHash, leaderworkerset.SetNameLabelKey: lws.Name, }, runtime.RawExtension{Raw: patch}, - revision) + revision), nil } // ApplyRevision returns a new LeaderWorkerSet constructed by restoring the state in revision to set. If the returned error @@ -194,7 +338,7 @@ func NextRevision(revisions []*appsv1.ControllerRevision) int64 { // TruncateHistory cleans up all other controller revisions except the currentRevision. // currentRevision is the one that matches the templateHash that is passed func TruncateHistory(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { - controllerHistory := history.NewHistory(ctx, k8sClient) + controllerHistory := History{Client: k8sClient, context: ctx} selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, }}) @@ -216,34 +360,9 @@ func TruncateHistory(ctx context.Context, k8sClient client.Client, lws *leaderwo return nil } -func EqualLeaderWorkerTemplates(lhs *leaderworkerset.LeaderWorkerSet, rhs *leaderworkerset.LeaderWorkerSet) bool { - if !reflect.DeepEqual(lhs.Spec.LeaderWorkerTemplate, rhs.Spec.LeaderWorkerTemplate) { - return false - } - if (lhs.Spec.NetworkConfig == nil || string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) && (rhs.Spec.NetworkConfig == nil || string(*rhs.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared)) { - return true - } - - if lhs.Spec.NetworkConfig == nil || rhs.Spec.NetworkConfig == nil { - return false - } - - return string(*lhs.Spec.NetworkConfig.SubdomainPolicy) == string(*rhs.Spec.NetworkConfig.SubdomainPolicy) -} - // Sha1Hash accepts an input string and returns the 40 character SHA1 hash digest of the input string. func Sha1Hash(s string) string { h := sha1.New() h.Write([]byte(s)) return hex.EncodeToString(h.Sum(nil)) } - -func LeaderWorkerTemplateHash(lws *leaderworkerset.LeaderWorkerSet) string { - if lws.Spec.NetworkConfig == nil || string(*lws.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared) { - return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String()) - } - - return Sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String() + string(*lws.Spec.NetworkConfig.SubdomainPolicy)) -} diff --git a/pkg/utils/revision/revision_utils_test.go b/pkg/utils/revision/revision_utils_test.go index 87e9b72f..4076000c 100644 --- a/pkg/utils/revision/revision_utils_test.go +++ b/pkg/utils/revision/revision_utils_test.go @@ -20,15 +20,16 @@ import ( "testing" "github.com/google/go-cmp/cmp" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" - "sigs.k8s.io/lws/pkg/history" - testutils "sigs.k8s.io/lws/test/testutils" ) func TestApplyRevision(t *testing.T) { - lws := testutils.BuildLeaderWorkerSet("default").Obj() - revision, err := NewRevision(lws, 1, LeaderWorkerTemplateHash(lws)) + lws := BuildLeaderWorkerSet("default") + revision, err := NewRevision(lws, 1, "") if err != nil { t.Fatal(err) } @@ -44,12 +45,12 @@ func TestApplyRevision(t *testing.T) { t.Fatal(err) } - restoredRevision, err := NewRevision(restoredLws, 2, LeaderWorkerTemplateHash(restoredLws)) + restoredRevision, err := NewRevision(restoredLws, 2, "") if err != nil { t.Fatal(err) } - if !history.EqualRevision(revision, restoredRevision) { + if !EqualRevision(revision, restoredRevision) { t.Errorf("expected value %v, got %v", revision, restoredRevision) } @@ -61,3 +62,59 @@ func TestApplyRevision(t *testing.T) { t.Errorf("NetworkConfig should be restored %s", diff) } } + +func BuildLeaderWorkerSet(nsName string) *leaderworkerset.LeaderWorkerSet { + lws := leaderworkerset.LeaderWorkerSet{} + lws.Name = "test-sample" + lws.Namespace = nsName + lws.Spec = leaderworkerset.LeaderWorkerSetSpec{} + lws.Spec.Replicas = ptr.To[int32](2) + lws.Spec.LeaderWorkerTemplate = leaderworkerset.LeaderWorkerTemplate{RestartPolicy: leaderworkerset.RecreateGroupOnPodRestart} + lws.Spec.LeaderWorkerTemplate.Size = ptr.To[int32](2) + lws.Spec.LeaderWorkerTemplate.LeaderTemplate = &corev1.PodTemplateSpec{} + lws.Spec.LeaderWorkerTemplate.LeaderTemplate.Spec = MakeLeaderPodSpec() + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec = MakeWorkerPodSpec() + // Manually set this for we didn't enable webhook in controller tests. + lws.Spec.RolloutStrategy = leaderworkerset.RolloutStrategy{ + Type: leaderworkerset.RollingUpdateStrategyType, + RollingUpdateConfiguration: &leaderworkerset.RollingUpdateConfiguration{ + MaxUnavailable: intstr.FromInt32(1), + MaxSurge: intstr.FromInt(0), + }, + } + lws.Spec.StartupPolicy = leaderworkerset.LeaderCreatedStartupPolicy + subdomainPolicy := leaderworkerset.SubdomainShared + lws.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ + SubdomainPolicy: &subdomainPolicy, + } + + return &lws +} + +func MakeLeaderPodSpec() corev1.PodSpec { + return corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "worker", + Image: "nginx:1.14.2", + }, + }, + } +} + +func MakeWorkerPodSpec() corev1.PodSpec { + return corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "leader", + Image: "nginx:1.14.2", + Ports: []corev1.ContainerPort{ + { + ContainerPort: 8080, + Protocol: "TCP", + }, + }, + }, + }, + } +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 3832a121..42be85bd 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -332,4 +332,31 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") }) + ginkgo.It("Rolling update with restart policy RecreateGroupOnPodRestart only restarts the leader once", func() { + lws = testing.BuildLeaderWorkerSet(ns.Name).Replica(2).Size(2).MaxSurge(1).RestartPolicy(v1.RecreateGroupOnPodRestart).Obj() + testing.MustCreateLws(ctx, k8sClient, lws) + testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") + + initialLeaderPod := &corev1.Pod{} + testing.GetLeaderPod(ctx, lws, k8sClient, initialLeaderPod) + testing.UpdateWorkerTemplate(ctx, k8sClient, lws) + + // Happens during update + testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 3) + midUpdateLeaderPod := &corev1.Pod{} + testing.GetLeaderPod(ctx, lws, k8sClient, midUpdateLeaderPod) + + gomega.Eventually(func() (bool, error) { + return initialLeaderPod.UID == midUpdateLeaderPod.UID, nil + }, timeout, interval).Should(gomega.Equal(false)) + + testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) + testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) + finalLeaderPod := &corev1.Pod{} + testing.GetLeaderPod(ctx, lws, k8sClient, finalLeaderPod) + + gomega.Eventually(func() (bool, error) { + return finalLeaderPod.UID == midUpdateLeaderPod.UID, nil + }, timeout, interval).Should(gomega.Equal(true)) + }) }) diff --git a/test/testutils/util.go b/test/testutils/util.go index c56377e2..95a11b2a 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -16,8 +16,6 @@ package testutils import ( "context" - "crypto/sha1" - "encoding/hex" "errors" "fmt" "strconv" @@ -27,6 +25,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" @@ -35,8 +34,11 @@ import ( leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" ) +var parentKind = appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") + func MustCreateLws(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { gomega.Expect(k8sClient.Create(ctx, lws)).Should(gomega.Succeed()) gomega.Eventually(func() error { @@ -55,9 +57,10 @@ func CreateWorkerPodsForLeaderPod(ctx context.Context, leaderPod corev1.Pod, k8s Name: leaderPod.Name + "-" + strconv.Itoa(i), Namespace: leaderPod.Namespace, Labels: map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - "worker.pod": "workers", - leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(i), + leaderworkerset.SetNameLabelKey: lws.Name, + "worker.pod": "workers", + leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(i), + leaderworkerset.TemplateRevisionHashKey: leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey], }, Annotations: map[string]string{ leaderworkerset.SizeAnnotationKey: strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)), @@ -124,6 +127,11 @@ func CreateLeaderPods(ctx context.Context, leaderSts appsv1.StatefulSet, k8sClie } else { podTemplateSpec = *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() } + patch, err := revisionutils.GetPatch(lws) + if err != nil { + return err + } + cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) for i := start; i < end; i++ { pod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -134,7 +142,7 @@ func CreateLeaderPods(ctx context.Context, leaderSts appsv1.StatefulSet, k8sClie leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(0), leaderworkerset.GroupIndexLabelKey: strconv.Itoa(i), leaderworkerset.GroupUniqueHashLabelKey: "randomValue", - leaderworkerset.TemplateRevisionHashKey: leaderWorkerTemplateHash(lws), + leaderworkerset.TemplateRevisionHashKey: cr.Labels[leaderworkerset.TemplateRevisionHashKey], }, Annotations: map[string]string{ leaderworkerset.SizeAnnotationKey: strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)), @@ -163,10 +171,14 @@ func ExpectValidPods(ctx context.Context, k8sClient client.Client, lws *leaderwo return err } - hash := leaderWorkerTemplateHash(lws) + patch, err := revisionutils.GetPatch(lws) + if err != nil { + return err + } + cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) labelSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.TemplateRevisionHashKey: hash, + leaderworkerset.TemplateRevisionHashKey: cr.Labels[leaderworkerset.TemplateRevisionHashKey], }) if err := k8sClient.List(ctx, podList, labelSelector, client.InNamespace(lws.Namespace)); err != nil { @@ -195,6 +207,7 @@ func ExpectValidPods(ctx context.Context, k8sClient client.Client, lws *leaderwo return errors.New("container name not right") } } + return nil }, Timeout, Interval).Should(gomega.Succeed()) } @@ -208,6 +221,28 @@ func GetLeaderStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorker }, Timeout, Interval).Should(gomega.Succeed()) } +func GetLeaderPod(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8sClient client.Client, pod *corev1.Pod) { + gomega.Eventually(func() error { + if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, lws); err != nil { + return err + } + leaderReplicaIndex := fmt.Sprintf("-%v", (int(*lws.Spec.Replicas) - 1)) + if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name + leaderReplicaIndex, Namespace: lws.Namespace}, pod); err != nil { + return err + } + + patch, err := revisionutils.GetPatch(lws) + if err != nil { + return err + } + cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) + if cr.Labels[leaderworkerset.TemplateRevisionHashKey] != pod.Labels[leaderworkerset.TemplateRevisionHashKey] { + return fmt.Errorf("TemplateHash does not match, expected %s, got %s", cr.Labels[leaderworkerset.TemplateRevisionHashKey], pod.Labels[leaderworkerset.TemplateRevisionHashKey]) + } + return nil + }, Timeout, Interval).Should(gomega.Succeed()) +} + func GetStatefulSets(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8sClient client.Client, stsl *appsv1.StatefulSetList) { gomega.Eventually(func() (int, error) { if err := k8sClient.List(ctx, stsl, client.InNamespace(lws.Namespace)); err != nil { @@ -250,9 +285,13 @@ func SetLeaderPodToReady(ctx context.Context, k8sClient client.Client, podName s if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: lws.Namespace, Name: lws.Name}, lws); err != nil { return err } - hash := leaderWorkerTemplateHash(lws) + patch, err := revisionutils.GetPatch(lws) + if err != nil { + return err + } + cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) - leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] = hash + leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] = cr.Labels[leaderworkerset.TemplateRevisionHashKey] return k8sClient.Update(ctx, &leaderPod) }, Timeout, Interval).Should(gomega.Succeed()) @@ -554,21 +593,3 @@ func deleteWorkerStatefulSetIfExists(ctx context.Context, k8sClient client.Clien return k8sClient.Delete(ctx, &sts) }, Timeout, Interval).Should(gomega.Succeed()) } - -// sha1Hash accepts an input string and returns the 40 character SHA1 hash digest of the input string. -func sha1Hash(s string) string { - h := sha1.New() - h.Write([]byte(s)) - return hex.EncodeToString(h.Sum(nil)) -} - -// added to avoid import cycle between testutils, pkg/history, and pkg/utils/revision -func leaderWorkerTemplateHash(lws *leaderworkerset.LeaderWorkerSet) string { - if lws.Spec.NetworkConfig == nil || string(*lws.Spec.NetworkConfig.SubdomainPolicy) == string(leaderworkerset.SubdomainShared) { - return sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String()) - } - - return sha1Hash(lws.Spec.LeaderWorkerTemplate.LeaderTemplate.String() + - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.String() + string(*lws.Spec.NetworkConfig.SubdomainPolicy)) -} diff --git a/test/testutils/validators.go b/test/testutils/validators.go index d13b975a..2d92c611 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -29,10 +29,12 @@ import ( eventsv1 "k8s.io/api/events/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + revisionutils "sigs.k8s.io/lws/pkg/utils/revision" statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset" ) @@ -149,7 +151,12 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, if sts.Spec.Template.Labels[leaderworkerset.SetNameLabelKey] == "" { return fmt.Errorf("leader statefulset pod template misses leaderworkerset label") } - hash := leaderWorkerTemplateHash(&lws) + patch, err := revisionutils.GetPatch(&lws) + if err != nil { + return err + } + cr := revisionutils.NewControllerRevision(&lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) + hash := cr.Labels[leaderworkerset.TemplateRevisionHashKey] if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { return fmt.Errorf("mismatch template revision hash for leader statefulset, got: %s, want: %s", sts.Spec.Template.Labels[leaderworkerset.TemplateRevisionHashKey], hash) } @@ -181,7 +188,7 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, if diff := cmp.Diff(sts.Spec.Template.Labels, map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.WorkerIndexLabelKey: "0", - leaderworkerset.TemplateRevisionHashKey: leaderWorkerTemplateHash(&lws), + leaderworkerset.TemplateRevisionHashKey: hash, }); diff != "" { return errors.New("leader StatefulSet pod template doesn't have the correct labels: " + diff) } @@ -270,7 +277,12 @@ func ExpectValidWorkerStatefulSets(ctx context.Context, leaderWorkerSet *leaderw if lws.Annotations[leaderworkerset.ExclusiveKeyAnnotationKey] != sts.Spec.Template.Annotations[leaderworkerset.ExclusiveKeyAnnotationKey] { return fmt.Errorf("mismatch exclusive placement annotation between worker statefulset and leaderworkerset") } - hash := leaderWorkerTemplateHash(&lws) + patch, err := revisionutils.GetPatch(&lws) + if err != nil { + return err + } + cr := revisionutils.NewControllerRevision(&lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) + hash := cr.Labels[leaderworkerset.TemplateRevisionHashKey] if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { return fmt.Errorf("mismatch template revision hash for worker statefulset, got: %s, want: %s", sts.Labels[leaderworkerset.TemplateRevisionHashKey], hash) } diff --git a/test/testutils/wrappers.go b/test/testutils/wrappers.go index 47635dd5..f10852d7 100644 --- a/test/testutils/wrappers.go +++ b/test/testutils/wrappers.go @@ -289,26 +289,3 @@ func MakeLeaderPodSpecWithTPUResource() corev1.PodSpec { Subdomain: "default", } } - -/* -func RawLWSTemplate(lws *leaderworkerset.LeaderWorkerSet) runtime.RawExtension { - clone := lws.DeepCopy() - str := &bytes.Buffer{} - err := unstructured.UnstructuredJSONScheme.Encode(clone, str) - if err != nil { - panic(err) - } - var raw map[string]interface{} - err = json.Unmarshal(str.Bytes(), &raw) - objCopy := make(map[string]interface{}) - spec := raw["spec"].(map[string]interface{}) - specCopy := spec - specCopy["$patch"] = "replace" - objCopy["spec"] = spec - patch, err := json.Marshal(objCopy) - if err != nil { - panic(err) - } - return runtime.RawExtension{Raw: patch} -} -*/ From 7a19d8581c2ad5e4cb28bc67d478028428c38600 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 27 Dec 2024 17:40:16 +0000 Subject: [PATCH 16/27] addressed third round of comments --- pkg/controllers/leaderworkerset_controller.go | 47 ++--- .../leaderworkerset_controller_test.go | 12 +- pkg/controllers/pod_controller.go | 2 +- pkg/controllers/pod_controller_test.go | 13 +- pkg/utils/revision/revision_utils.go | 187 ++++++------------ pkg/utils/revision/revision_utils_test.go | 8 +- pkg/webhooks/pod_webhook.go | 4 +- test/testutils/util.go | 16 +- test/testutils/validators.go | 7 +- 9 files changed, 113 insertions(+), 183 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index f74fa40a..c27dd5b7 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -101,6 +101,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ ctx = ctrl.LoggerInto(ctx, log) leaderSts, err := r.getLeaderStatefulSet(ctx, lws) + log.V(2).Info(fmt.Sprintf("leader sts %v", leaderSts)) if err != nil { log.Error(err, "Fetching leader statefulset") return ctrl.Result{}, err @@ -112,7 +113,15 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } - lwsUpdated, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) + updatedRevision, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) + lwsUpdated := updatedRevision != nil + if lwsUpdated { + revision, err = revisionutils.CreateRevision(ctx, r.Client, lws, updatedRevision.Labels[leaderworkerset.TemplateRevisionHashKey]) + if err != nil { + log.Error(err, "Creating revisions for updated lws") + return ctrl.Result{}, err + } + } if err != nil { log.Error(err, "Validating if LWS has been updated") return ctrl.Result{}, err @@ -405,7 +414,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) - if err := revisionutils.TruncateHistory(ctx, r.Client, lws, templateHash); err != nil { + if err := revisionutils.TruncateRevisions(ctx, r.Client, lws, templateHash); err != nil { return false, err } } else { @@ -553,41 +562,35 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw } return nil, err } - return sts, nil } func (r *LeaderWorkerSetReconciler) createControllerRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { - if sts == nil { - return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, "") - } - - stsRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) - if err != nil { - return nil, err + templateHash := "" + if sts != nil && sts.Labels != nil { + templateHash = sts.Labels[leaderworkerset.TemplateRevisionHashKey] } - - if stsRevision == nil { - return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) + if stsRevision, err := revisionutils.GetRevision(ctx, r.Client, lws, templateHash); sts != nil || err != nil { + return stsRevision, err } - - return revisionutils.CreateLeaderWorkerSetRevision(ctx, r.Client, lws, "") + return revisionutils.CreateRevision(ctx, r.Client, lws, templateHash) } -func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (bool, error) { +func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { if sts == nil { - return false, nil + return nil, nil } - stsRevision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, lws, sts.Labels[leaderworkerset.TemplateRevisionHashKey]) + currentRevision, err := revisionutils.NewRevision(ctx, r.Client, lws, "") if err != nil { - return false, err + return nil, err } - if stsRevision == nil { - return false, fmt.Errorf("did not find a revision for the existing leader sts") + + if !revisionutils.EqualRevision(currentRevision, revision) { + return currentRevision, nil } - return !revisionutils.EqualRevision(stsRevision, revision), nil + return nil, nil } // constructLeaderStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index 917f99f7..b81cdc5d 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -17,13 +17,13 @@ limitations under the License. package controllers import ( + "context" "testing" "github.com/google/go-cmp/cmp" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" appsapplyv1 "k8s.io/client-go/applyconfigurations/apps/v1" coreapplyv1 "k8s.io/client-go/applyconfigurations/core/v1" @@ -32,28 +32,28 @@ import ( leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" revisionutils "sigs.k8s.io/lws/pkg/utils/revision" testutils "sigs.k8s.io/lws/test/testutils" ) func TestLeaderStatefulSetApplyConfig(t *testing.T) { - parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") + client := fake.NewClientBuilder().Build() lws1 := testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). LeaderTemplateSpec(testutils.MakeLeaderPodSpec()). WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj() - patch, err := revisionutils.GetPatch(lws1) + cr1, err := revisionutils.NewRevision(context.TODO(), client, lws1, "") if err != nil { t.Fatal(err) } - cr1 := revisionutils.NewControllerRevision(lws1, parentKind, lws1.Labels, runtime.RawExtension{Raw: patch}, 1) hash1 := cr1.Labels[leaderworkerset.TemplateRevisionHashKey] + lws2 := testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj() - patch, err = revisionutils.GetPatch(lws2) + cr2, err := revisionutils.NewRevision(context.TODO(), client, lws2, "") if err != nil { t.Fatal(err) } - cr2 := revisionutils.NewControllerRevision(lws2, parentKind, lws2.Labels, runtime.RawExtension{Raw: patch}, 1) hash2 := cr2.Labels[leaderworkerset.TemplateRevisionHashKey] tests := []struct { diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index 528e57bb..bff53cc9 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -119,7 +119,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - revision, err := revisionutils.GetLeaderWorkerSetRevisionFromTemplateHash(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) + revision, err := revisionutils.GetRevision(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) if err != nil { log.Error(err, "Getting lws revisions") return ctrl.Result{}, err diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index c0eb3742..a066319d 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -17,37 +17,38 @@ limitations under the License. package controllers import ( + "context" "testing" "github.com/google/go-cmp/cmp" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" appsapplyv1 "k8s.io/client-go/applyconfigurations/apps/v1" coreapplyv1 "k8s.io/client-go/applyconfigurations/core/v1" metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client/fake" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" revisionutils "sigs.k8s.io/lws/pkg/utils/revision" testutils "sigs.k8s.io/lws/test/testutils" ) func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { - parentKind := appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") + client := fake.NewClientBuilder().Build() + lws := testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Replica(1).WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Size(1).Obj() - patch, err := revisionutils.GetPatch(lws) + updateRevision, err := revisionutils.NewRevision(context.TODO(), client, lws, "") if err != nil { t.Fatal(err) } - updateRevision := revisionutils.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 1) updateTemplateHash := updateRevision.Labels[leaderworkerset.TemplateRevisionHashKey] + lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" - patch, err = revisionutils.GetPatch(lws) + currentRevision, err := revisionutils.NewRevision(context.TODO(), client, lws, "") if err != nil { t.Fatal(err) } - currentRevision := revisionutils.NewControllerRevision(lws, parentKind, lws.Labels, runtime.RawExtension{Raw: patch}, 2) currentTemplateHash := currentRevision.Labels[leaderworkerset.TemplateRevisionHashKey] tests := []struct { diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index d7e9c998..db69c749 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -3,8 +3,6 @@ package revision import ( "bytes" "context" - "crypto/sha1" - "encoding/hex" "encoding/json" "fmt" "hash" @@ -13,12 +11,10 @@ import ( "github.com/davecgh/go-spew/spew" appsv1 "k8s.io/api/apps/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/strategicpatch" @@ -32,52 +28,21 @@ import ( // Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ and // https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go -// controllerKind contains the schema.GroupVersionKind for this controller type. -var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") - -// ControllerRevisionName returns the Name for a ControllerRevision in the form prefix-hash. If the length +// ControllerRevisionName returns the Name for a ControllerRevision in the form prefix-hash-revisionnumber. If the length // of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. -func ControllerRevisionName(prefix string, hash string) string { +// revision-number allows us to avoid collisions if the created prefix-hash already exists in the history, since revision +// will be unique. +func RevisionName(prefix string, hash string, revisionNumber int64) string { if len(prefix) > 223 { prefix = prefix[:223] } - return fmt.Sprintf("%s-%s", prefix, hash) -} - -// NewControllerRevision returns a ControllerRevision with a ControllerRef pointing to parent and indicating that -// parent is of parentKind. The ControllerRevision has labels matching template labels, contains Data equal to data, and -// has a Revision equal to revision. If the returned error is nil, the returned ControllerRevision is valid. If the -// returned error is not nil, the returned ControllerRevision is invalid for use. -func NewControllerRevision(parent metav1.Object, - parentKind schema.GroupVersionKind, - templateLabels map[string]string, - data runtime.RawExtension, - revision int64) *appsv1.ControllerRevision { - labelMap := make(map[string]string) - for k, v := range templateLabels { - labelMap[k] = v - } - cr := &appsv1.ControllerRevision{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labelMap, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(parent, parentKind)}, - Namespace: parent.GetNamespace(), - }, - Data: data, - Revision: revision, - } - hash := HashControllerRevision(cr) - cr.Name = ControllerRevisionName(parent.GetName(), hash) - if cr.Labels[leaderworkerset.TemplateRevisionHashKey] == "" { - cr.Labels[leaderworkerset.TemplateRevisionHashKey] = hash - } - return cr + return fmt.Sprintf("%s-%s-%v", prefix, hash, revisionNumber) } // HashControllerRevision hashes the contents of revision's Data using FNV hashing. // The returned hash will be a safe encoded string to avoid bad words. -func HashControllerRevision(revision *appsv1.ControllerRevision) string { +func HashRevision(revision *appsv1.ControllerRevision) string { hf := fnv.New32() if len(revision.Data.Raw) > 0 { hf.Write(revision.Data.Raw) @@ -103,18 +68,13 @@ func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevisio return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) } -type History struct { - client.Client - context context.Context -} - // ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other // controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the // returned error is not nil, the returned slice is not valid. -func (h *History) ListControllerRevisions(parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { +func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { // List all revisions in the namespace that match the selector revisionList := new(appsv1.ControllerRevisionList) - err := h.List(h.context, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) + err := k8sClient.List(ctx, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) if err != nil { return nil, err } @@ -130,42 +90,6 @@ func (h *History) ListControllerRevisions(parent metav1.Object, selector labels. return owned, err } -// CreateControllerRevision attempts to create the revision as owned by parent via a ControllerRef. If the returned -// error is not nil, creation failed. If the returned error is nil, the returned ControllerRevision has been -// created. -func (h *History) CreateControllerRevision(parent metav1.Object, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - ns := parent.GetNamespace() - err := h.Create(h.context, revision) - if errors.IsAlreadyExists(err) { - exists := &appsv1.ControllerRevision{} - err := h.Get(h.context, types.NamespacedName{Namespace: ns, Name: revision.Name}, exists) - if err != nil { - return nil, err - } - if bytes.Equal(exists.Data.Raw, revision.Data.Raw) { - return exists, nil - } else { - // Since the contents of the revision are used to create the hash, the only way this - // happens is if the contents of the revision were changed, which is unintended behavior - return nil, fmt.Errorf("controller Revision with same name but different content exists") - } - } - if err != nil { - return nil, err - } - // Fetched the controller revision that was created, in case the revision webhook modified it. - created := &appsv1.ControllerRevision{} - if err := h.Get(h.context, types.NamespacedName{Namespace: ns, Name: revision.Name}, created); err != nil { - return nil, err - } - return created, err -} - -// DeleteControllerRevision attempts to delete revision. If the returned error is not nil, deletion has failed. -func (h *History) DeleteControllerRevision(revision *appsv1.ControllerRevision) error { - return h.Delete(h.context, revision) -} - func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { hasher.Reset() printer := spew.ConfigState{ @@ -180,21 +104,19 @@ func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { } } -func GenerateDeleteOwnerRefStrategicMergeBytes(revisionUID types.UID, parentUID types.UID) []byte { - return []byte(fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, revisionUID, parentUID)) -} - -func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { +func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := History{Client: k8sClient, context: ctx} + if templateHash == "" { + return nil, nil + } selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.TemplateRevisionHashKey: templateHash, }}) if err != nil { return nil, err } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + revisions, err := ListRevisions(ctx, k8sClient, lws, selector) if err != nil { log.Error(err, "Listing all controller revisions") return nil, err @@ -206,7 +128,7 @@ func GetLeaderWorkerSetRevisionFromTemplateHash(ctx context.Context, k8sClient c if len(revisions) > 1 { // Since we only create a controllerRevision when the template hash changes, only one should match - log.Error(err, "More than one revision exists for the given templateHash") + log.Error(err, "More than one revision exists for the given template hash; returning the latest revision") return revisions[len(revisions)-1], nil } @@ -248,53 +170,72 @@ func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { return json.Marshal(objCopy) } -func CreateLeaderWorkerSetRevision( +func CreateRevision( ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - controllerHistory := History{Client: k8sClient, context: ctx} - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - }}) + + revision, err := NewRevision(ctx, k8sClient, lws, templateHash) if err != nil { return nil, err } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) - if err != nil { - log.Error(err, "Listing all controller revisions") + if err := k8sClient.Create(ctx, revision); err != nil { + log.Error(err, "Creating new revision for lws") return nil, err } - - currentRevision, err := NewRevision(lws, NextRevision(revisions), templateHash) - if err != nil { - log.Error(err, "Creating new revision for lws") + created := &appsv1.ControllerRevision{} + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: revision.Name}, created); err != nil { return nil, err } - - return controllerHistory.CreateControllerRevision(lws, currentRevision) + return created, nil } -// newRevision creates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. +// newRevision instantiates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned // ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set // to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. -func NewRevision(lws *leaderworkerset.LeaderWorkerSet, revision int64, templateHash string) (*appsv1.ControllerRevision, error) { +func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { + var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.SetNameLabelKey: lws.Name, + }}) + if err != nil { + return nil, err + } + revisions, err := ListRevisions(ctx, k8sClient, lws, selector) + revision := NextRevision(revisions) + if err != nil { + return nil, err + } patch, err := GetPatch(lws) if err != nil { return nil, err } - return NewControllerRevision(lws, - controllerKind, - map[string]string{ - leaderworkerset.TemplateRevisionHashKey: templateHash, - leaderworkerset.SetNameLabelKey: lws.Name, + templateLabels := map[string]string{ + leaderworkerset.TemplateRevisionHashKey: templateHash, + leaderworkerset.SetNameLabelKey: lws.Name, + } + + cr := &appsv1.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Labels: templateLabels, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(lws, controllerKind)}, + Namespace: lws.Namespace, }, - runtime.RawExtension{Raw: patch}, - revision), nil + Data: runtime.RawExtension{Raw: patch}, + Revision: revision, + } + + hash := HashRevision(cr) + cr.Name = RevisionName(lws.Name, hash, revision) + if cr.Labels[leaderworkerset.TemplateRevisionHashKey] == "" { + cr.Labels[leaderworkerset.TemplateRevisionHashKey] = hash + } + return cr, nil } // ApplyRevision returns a new LeaderWorkerSet constructed by restoring the state in revision to set. If the returned error @@ -335,34 +276,26 @@ func NextRevision(revisions []*appsv1.ControllerRevision) int64 { return max + 1 } -// TruncateHistory cleans up all other controller revisions except the currentRevision. +// TruncateRevisions cleans up all other controller revisions except the currentRevision. // currentRevision is the one that matches the templateHash that is passed -func TruncateHistory(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { - controllerHistory := History{Client: k8sClient, context: ctx} +func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, }}) if err != nil { return err } - revisions, err := controllerHistory.ListControllerRevisions(lws, selector) + revisions, err := ListRevisions(ctx, k8sClient, lws, selector) if err != nil { return err } for i, revision := range revisions { if revision.Labels[leaderworkerset.TemplateRevisionHashKey] != templateHash { - if err := controllerHistory.DeleteControllerRevision(revisions[i]); err != nil { + if err := k8sClient.Delete(ctx, revisions[i]); err != nil { return err } } } return nil } - -// Sha1Hash accepts an input string and returns the 40 character SHA1 hash digest of the input string. -func Sha1Hash(s string) string { - h := sha1.New() - h.Write([]byte(s)) - return hex.EncodeToString(h.Sum(nil)) -} diff --git a/pkg/utils/revision/revision_utils_test.go b/pkg/utils/revision/revision_utils_test.go index 4076000c..8a969436 100644 --- a/pkg/utils/revision/revision_utils_test.go +++ b/pkg/utils/revision/revision_utils_test.go @@ -17,19 +17,23 @@ limitations under the License. package revision import ( + "context" "testing" "github.com/google/go-cmp/cmp" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client/fake" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" ) func TestApplyRevision(t *testing.T) { + client := fake.NewClientBuilder().Build() + lws := BuildLeaderWorkerSet("default") - revision, err := NewRevision(lws, 1, "") + revision, err := NewRevision(context.TODO(), client, lws, "") if err != nil { t.Fatal(err) } @@ -45,7 +49,7 @@ func TestApplyRevision(t *testing.T) { t.Fatal(err) } - restoredRevision, err := NewRevision(restoredLws, 2, "") + restoredRevision, err := NewRevision(context.TODO(), client, restoredLws, "") if err != nil { t.Fatal(err) } diff --git a/pkg/webhooks/pod_webhook.go b/pkg/webhooks/pod_webhook.go index 2a788808..adbafa82 100644 --- a/pkg/webhooks/pod_webhook.go +++ b/pkg/webhooks/pod_webhook.go @@ -28,9 +28,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" + "sigs.k8s.io/lws/pkg/utils" acceleratorutils "sigs.k8s.io/lws/pkg/utils/accelerators" podutils "sigs.k8s.io/lws/pkg/utils/pod" - revisionutils "sigs.k8s.io/lws/pkg/utils/revision" statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset" ) @@ -174,7 +174,7 @@ func (p *PodWebhook) Default(ctx context.Context, obj runtime.Object) error { } func genGroupUniqueKey(ns string, podName string) string { - return revisionutils.Sha1Hash(fmt.Sprintf("%s/%s", ns, podName)) + return utils.Sha1Hash(fmt.Sprintf("%s/%s", ns, podName)) } // SetExclusiveAffinities set the pod affinity/anti-affinity diff --git a/test/testutils/util.go b/test/testutils/util.go index 95a11b2a..1e10aa7b 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -25,7 +25,6 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" @@ -37,8 +36,6 @@ import ( revisionutils "sigs.k8s.io/lws/pkg/utils/revision" ) -var parentKind = appsv1.SchemeGroupVersion.WithKind("LeaderWorkerSet") - func MustCreateLws(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) { gomega.Expect(k8sClient.Create(ctx, lws)).Should(gomega.Succeed()) gomega.Eventually(func() error { @@ -127,11 +124,10 @@ func CreateLeaderPods(ctx context.Context, leaderSts appsv1.StatefulSet, k8sClie } else { podTemplateSpec = *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() } - patch, err := revisionutils.GetPatch(lws) + cr, err := revisionutils.NewRevision(ctx, k8sClient, lws, "") if err != nil { return err } - cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) for i := start; i < end; i++ { pod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -170,12 +166,10 @@ func ExpectValidPods(ctx context.Context, k8sClient client.Client, lws *leaderwo if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, lws); err != nil { return err } - - patch, err := revisionutils.GetPatch(lws) + cr, err := revisionutils.NewRevision(ctx, k8sClient, lws, "") if err != nil { return err } - cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) labelSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.TemplateRevisionHashKey: cr.Labels[leaderworkerset.TemplateRevisionHashKey], @@ -231,11 +225,10 @@ func GetLeaderPod(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8s return err } - patch, err := revisionutils.GetPatch(lws) + cr, err := revisionutils.NewRevision(ctx, k8sClient, lws, "") if err != nil { return err } - cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) if cr.Labels[leaderworkerset.TemplateRevisionHashKey] != pod.Labels[leaderworkerset.TemplateRevisionHashKey] { return fmt.Errorf("TemplateHash does not match, expected %s, got %s", cr.Labels[leaderworkerset.TemplateRevisionHashKey], pod.Labels[leaderworkerset.TemplateRevisionHashKey]) } @@ -285,11 +278,10 @@ func SetLeaderPodToReady(ctx context.Context, k8sClient client.Client, podName s if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: lws.Namespace, Name: lws.Name}, lws); err != nil { return err } - patch, err := revisionutils.GetPatch(lws) + cr, err := revisionutils.NewRevision(ctx, k8sClient, lws, "") if err != nil { return err } - cr := revisionutils.NewControllerRevision(lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] = cr.Labels[leaderworkerset.TemplateRevisionHashKey] return k8sClient.Update(ctx, &leaderPod) diff --git a/test/testutils/validators.go b/test/testutils/validators.go index 2d92c611..26f24c4f 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -29,7 +29,6 @@ import ( eventsv1 "k8s.io/api/events/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -151,11 +150,10 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, if sts.Spec.Template.Labels[leaderworkerset.SetNameLabelKey] == "" { return fmt.Errorf("leader statefulset pod template misses leaderworkerset label") } - patch, err := revisionutils.GetPatch(&lws) + cr, err := revisionutils.NewRevision(ctx, k8sClient, &lws, "") if err != nil { return err } - cr := revisionutils.NewControllerRevision(&lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) hash := cr.Labels[leaderworkerset.TemplateRevisionHashKey] if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { return fmt.Errorf("mismatch template revision hash for leader statefulset, got: %s, want: %s", sts.Spec.Template.Labels[leaderworkerset.TemplateRevisionHashKey], hash) @@ -277,11 +275,10 @@ func ExpectValidWorkerStatefulSets(ctx context.Context, leaderWorkerSet *leaderw if lws.Annotations[leaderworkerset.ExclusiveKeyAnnotationKey] != sts.Spec.Template.Annotations[leaderworkerset.ExclusiveKeyAnnotationKey] { return fmt.Errorf("mismatch exclusive placement annotation between worker statefulset and leaderworkerset") } - patch, err := revisionutils.GetPatch(&lws) + cr, err := revisionutils.NewRevision(ctx, k8sClient, &lws, "") if err != nil { return err } - cr := revisionutils.NewControllerRevision(&lws, parentKind, make(map[string]string), runtime.RawExtension{Raw: patch}, 1) hash := cr.Labels[leaderworkerset.TemplateRevisionHashKey] if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { return fmt.Errorf("mismatch template revision hash for worker statefulset, got: %s, want: %s", sts.Labels[leaderworkerset.TemplateRevisionHashKey], hash) From 8b66d18469d031e0022323cb54af0a5382ba0338 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 27 Dec 2024 17:42:43 +0000 Subject: [PATCH 17/27] removed blank space --- pkg/utils/revision/revision_utils_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/utils/revision/revision_utils_test.go b/pkg/utils/revision/revision_utils_test.go index 8a969436..92448e47 100644 --- a/pkg/utils/revision/revision_utils_test.go +++ b/pkg/utils/revision/revision_utils_test.go @@ -29,7 +29,6 @@ import ( ) func TestApplyRevision(t *testing.T) { - client := fake.NewClientBuilder().Build() lws := BuildLeaderWorkerSet("default") From 4d2ec479243acaccb33ebf9d8f49f1d25520488d Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 27 Dec 2024 21:24:01 +0000 Subject: [PATCH 18/27] addressed comments, round 4 --- .../v1/leaderworkerset_types.go | 6 +- pkg/controllers/leaderworkerset_controller.go | 93 +++---- .../leaderworkerset_controller_test.go | 4 +- pkg/controllers/pod_controller.go | 7 +- pkg/controllers/pod_controller_test.go | 28 +-- pkg/utils/revision/revision_utils.go | 226 +++++++++--------- pkg/utils/revision/revision_utils_test.go | 11 + test/e2e/e2e_test.go | 4 +- test/testutils/util.go | 20 +- test/testutils/validators.go | 18 +- 10 files changed, 219 insertions(+), 198 deletions(-) diff --git a/api/leaderworkerset/v1/leaderworkerset_types.go b/api/leaderworkerset/v1/leaderworkerset_types.go index cfbae2c3..180b5c48 100644 --- a/api/leaderworkerset/v1/leaderworkerset_types.go +++ b/api/leaderworkerset/v1/leaderworkerset_types.go @@ -58,10 +58,8 @@ const ( // Worker pods will have an annotation that is the leader pod's name. LeaderPodNameAnnotationKey string = "leaderworkerset.sigs.k8s.io/leader-name" - // SHAed leaderWorkerTemplate value for version tracking. - // This will be applied to all API objects including: - // leaderStatefulset, leaderPods, workerStatefulsets, workerPods. - TemplateRevisionHashKey string = "leaderworkerset.sigs.k8s.io/template-revision-hash" + // Hash to track the controller revision that matches an LWS object + RevisionKey string = "leaderworkerset.sigs.k8s.io/template-revision-hash" // Environment variable added to all containers in the LeaderWorkerSet to // address the leader via the headless service. diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index c27dd5b7..37f16d95 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -101,39 +101,41 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ ctx = ctrl.LoggerInto(ctx, log) leaderSts, err := r.getLeaderStatefulSet(ctx, lws) - log.V(2).Info(fmt.Sprintf("leader sts %v", leaderSts)) if err != nil { log.Error(err, "Fetching leader statefulset") return ctrl.Result{}, err } - revision, err := r.createControllerRevisionIfNonExist(ctx, leaderSts, lws) + // Handles two cases: + // Case 1: Upgrading the LWS controller from a version that doesn't support controller revision + // Case 2: Creating the controller revision for a newly created LWS object + revision, err := r.getOrCreateRevisionIfNonExist(ctx, leaderSts, lws) if err != nil { log.Error(err, "Creating controller revision") return ctrl.Result{}, err } updatedRevision, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) + if err != nil { + log.Error(err, "Validating if LWS has been updated") + return ctrl.Result{}, err + } lwsUpdated := updatedRevision != nil if lwsUpdated { - revision, err = revisionutils.CreateRevision(ctx, r.Client, lws, updatedRevision.Labels[leaderworkerset.TemplateRevisionHashKey]) + revision, err = revisionutils.CreateRevision(ctx, r.Client, updatedRevision) if err != nil { - log.Error(err, "Creating revisions for updated lws") + log.Error(err, "Creating revision for updated LWS") return ctrl.Result{}, err } } - if err != nil { - log.Error(err, "Validating if LWS has been updated") - return ctrl.Result{}, err - } - partition, replicas, err := r.rollingUpdateParameters(ctx, lws, leaderSts, revision, lwsUpdated) + partition, replicas, err := r.rollingUpdateParameters(ctx, lws, leaderSts, revisionutils.GetRevisionKey(revision), lwsUpdated) if err != nil { log.Error(err, "Rolling partition error") return ctrl.Result{}, err } - if err := r.SSAWithStatefulset(ctx, lws, partition, replicas, revision.Labels[leaderworkerset.TemplateRevisionHashKey]); err != nil { + if err := r.SSAWithStatefulset(ctx, lws, partition, replicas, revisionutils.GetRevisionKey(revision)); err != nil { return ctrl.Result{}, err } @@ -145,11 +147,16 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } - err = r.updateStatus(ctx, lws, revision.Labels[leaderworkerset.TemplateRevisionHashKey]) + updateDone, err := r.updateStatus(ctx, lws, revisionutils.GetRevisionKey(revision)) if err != nil { return ctrl.Result{}, err } + if updateDone { + if err := revisionutils.TruncateRevisions(ctx, r.Client, lws, revisionutils.GetRevisionKey(revision)); err != nil { + return ctrl.Result{}, err + } + } log.V(2).Info("Leader Reconcile completed.") return ctrl.Result{}, nil } @@ -221,7 +228,7 @@ func SetupIndexes(indexer client.FieldIndexer) error { // - Otherwise, Replicas is equal to spec.Replicas // - One exception here is when unready replicas of leaderWorkerSet is equal to MaxSurge, // we should reclaim the extra replicas gradually to accommodate for the new replicas. -func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, revision *appsv1.ControllerRevision, leaderWorkerSetUpdated bool) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, templateHash string, leaderWorkerSetUpdated bool) (int32, int32, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) lwsReplicas := *lws.Spec.Replicas @@ -270,7 +277,7 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, return 0, lwsReplicas, nil } - continuousReadyReplicas, lwsUnreadyReplicas, err := r.iterateReplicas(ctx, lws, stsReplicas, revision) + continuousReadyReplicas, lwsUnreadyReplicas, err := r.iterateReplicas(ctx, lws, stsReplicas, templateHash) if err != nil { return 0, 0, err } @@ -340,7 +347,7 @@ func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws } // updates the condition of the leaderworkerset to either Progressing or Available. -func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (bool, error) { +func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (bool, bool, error) { log := ctrl.LoggerFrom(ctx) podSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, @@ -349,7 +356,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l leaderPodList := &corev1.PodList{} if err := r.List(ctx, leaderPodList, podSelector, client.InNamespace(lws.Namespace)); err != nil { log.Error(err, "Fetching leaderPods") - return false, err + return false, false, err } updateStatus := false @@ -360,7 +367,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l for _, pod := range leaderPodList.Items { index, err := strconv.Atoi(pod.Labels[leaderworkerset.GroupIndexLabelKey]) if err != nil { - return false, err + return false, false, err } if index < int(*lws.Spec.Replicas) { currentNonBurstWorkerCount++ @@ -370,7 +377,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l if !noWorkerSts { if err := r.Get(ctx, client.ObjectKey{Namespace: lws.Namespace, Name: pod.Name}, &sts); err != nil { log.Error(err, "Fetching worker statefulSet") - return false, err + return false, false, err } } @@ -379,7 +386,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l ready = true readyCount++ } - if (noWorkerSts || sts.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash) && pod.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash { + if (noWorkerSts || sts.Labels[leaderworkerset.RevisionKey] == templateHash) && pod.Labels[leaderworkerset.RevisionKey] == templateHash { updated = true updatedCount++ if index < int(*lws.Spec.Replicas) { @@ -407,6 +414,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l } var conditions []metav1.Condition + updateDone := false if updatedNonBurstWorkerCount < currentNonBurstWorkerCount { // upgradeInProgress is true when the upgrade replicas is smaller than the expected // number of total replicas not including the burst replicas @@ -414,9 +422,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetUpgradeInProgress)) } else if updatedAndReadyCount == int(*lws.Spec.Replicas) { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetAvailable)) - if err := revisionutils.TruncateRevisions(ctx, r.Client, lws, templateHash); err != nil { - return false, err - } + updateDone = true } else { conditions = append(conditions, makeCondition(leaderworkerset.LeaderWorkerSetProgressing)) } @@ -426,11 +432,11 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l if updateCondition { r.Record.Eventf(lws, corev1.EventTypeNormal, conditions[0].Reason, conditions[0].Message+fmt.Sprintf(", with %d groups ready of total %d groups", readyCount, int(*lws.Spec.Replicas))) } - return updateStatus || updateCondition, nil + return updateStatus || updateCondition, updateDone, nil } // Updates status and condition of LeaderWorkerSet and returns whether or not an update actually occurred. -func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { +func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (bool, error) { updateStatus := false log := ctrl.LoggerFrom(ctx) @@ -438,7 +444,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade sts := &appsv1.StatefulSet{} if err := r.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, sts); err != nil { log.Error(err, "Error retrieving leader StatefulSet") - return err + return false, err } // retrieve the current number of replicas -- the number of leaders @@ -458,7 +464,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade selector, err := metav1.LabelSelectorAsSelector(labelSelector) if err != nil { log.Error(err, "Converting label selector to selector") - return err + return false, err } lws.Status.HPAPodSelector = selector.String() @@ -466,17 +472,17 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade } // check if an update is needed - updateConditions, err := r.updateConditions(ctx, lws, templateHash) + updateConditions, updateDone, err := r.updateConditions(ctx, lws, templateHash) if err != nil { - return err + return false, err } if updateStatus || updateConditions { if err := r.Status().Update(ctx, lws); err != nil { log.Error(err, "Updating LeaderWorkerSet status and/or condition.") - return err + return false, err } } - return nil + return updateDone, nil } // iterateReplicas will iterate the leader pods together with corresponding worker statefulsets @@ -484,7 +490,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade // - The first value represents the number of continuous ready replicas ranging from the last index to 0, // to help us judge whether we can update the Partition or not. // - The second value represents the unready replicas whose index is smaller than leaderWorkerSet Replicas. -func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, stsReplicas int32, revision *appsv1.ControllerRevision) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, stsReplicas int32, templateHash string) (int32, int32, error) { podSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.WorkerIndexLabelKey: "0", @@ -511,7 +517,6 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return strconv.Atoi(sts.Labels[leaderworkerset.GroupIndexLabelKey]) }, stsList.Items, int(stsReplicas)) - templateHash := revision.Labels[leaderworkerset.TemplateRevisionHashKey] // Once size==1, no worker statefulSets will be created. noWorkerSts := *lws.Spec.LeaderWorkerTemplate.Size == 1 processReplica := func(index int32) (ready bool) { @@ -522,7 +527,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return false } - podTemplateHash := sortedPods[index].Labels[leaderworkerset.TemplateRevisionHashKey] + podTemplateHash := sortedPods[index].Labels[leaderworkerset.RevisionKey] if !(podTemplateHash == templateHash && podutils.PodRunningAndReady(sortedPods[index])) { return false } @@ -531,7 +536,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return true } - stsTemplateHash := sortedSts[index].Labels[leaderworkerset.TemplateRevisionHashKey] + stsTemplateHash := sortedSts[index].Labels[leaderworkerset.RevisionKey] return stsTemplateHash == templateHash && statefulsetutils.StatefulsetReady(sortedSts[index]) } @@ -565,15 +570,21 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw return sts, nil } -func (r *LeaderWorkerSetReconciler) createControllerRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { +func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { templateHash := "" if sts != nil && sts.Labels != nil { - templateHash = sts.Labels[leaderworkerset.TemplateRevisionHashKey] + // Uses the hash in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where + // the templateHash was used to detect update instead of controller revision. + templateHash = revisionutils.GetRevisionKey(sts) } if stsRevision, err := revisionutils.GetRevision(ctx, r.Client, lws, templateHash); sts != nil || err != nil { return stsRevision, err } - return revisionutils.CreateRevision(ctx, r.Client, lws, templateHash) + revision, err := revisionutils.NewRevision(ctx, r.Client, lws, templateHash) + if err != nil { + return nil, err + } + return revisionutils.CreateRevision(ctx, r.Client, revision) } func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { @@ -613,9 +624,9 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor } podTemplateApplyConfiguration.WithLabels(map[string]string{ - leaderworkerset.WorkerIndexLabelKey: "0", - leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.TemplateRevisionHashKey: templateHash, + leaderworkerset.WorkerIndexLabelKey: "0", + leaderworkerset.SetNameLabelKey: lws.Name, + leaderworkerset.RevisionKey: templateHash, }) podAnnotations := make(map[string]string) podAnnotations[leaderworkerset.SizeAnnotationKey] = strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)) @@ -650,8 +661,8 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor leaderworkerset.WorkerIndexLabelKey: "0", }))). WithLabels(map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.TemplateRevisionHashKey: templateHash, + leaderworkerset.SetNameLabelKey: lws.Name, + leaderworkerset.RevisionKey: templateHash, }). WithAnnotations(map[string]string{ leaderworkerset.ReplicasAnnotationKey: strconv.Itoa(int(*lws.Spec.Replicas)), diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index b81cdc5d..750cdb72 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -46,7 +46,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { if err != nil { t.Fatal(err) } - hash1 := cr1.Labels[leaderworkerset.TemplateRevisionHashKey] + hash1 := revisionutils.GetRevisionKey(cr1) lws2 := testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj() @@ -54,7 +54,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { if err != nil { t.Fatal(err) } - hash2 := cr2.Labels[leaderworkerset.TemplateRevisionHashKey] + hash2 := revisionutils.GetRevisionKey(cr2) tests := []struct { name string diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index bff53cc9..9a089ce7 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -119,7 +119,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - revision, err := revisionutils.GetRevision(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.TemplateRevisionHashKey]) + revision, err := revisionutils.GetRevision(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.RevisionKey]) if err != nil { log.Error(err, "Getting lws revisions") return ctrl.Result{}, err @@ -185,7 +185,8 @@ func (r *PodReconciler) handleRestartPolicy(ctx context.Context, pod corev1.Pod, if err := r.Get(ctx, types.NamespacedName{Name: leaderPodName, Namespace: pod.Namespace}, &leader); err != nil { return false, err } - if leader.Labels[leaderworkerset.TemplateRevisionHashKey] != pod.Labels[leaderworkerset.TemplateRevisionHashKey] { + // Different revision key means that this pod will be deleted soon and alternative will be created with the matching key + if leader.Labels[leaderworkerset.RevisionKey] != pod.Labels[leaderworkerset.RevisionKey] { return false, nil } } else { @@ -292,7 +293,7 @@ func constructWorkerStatefulSetApplyConfiguration(leaderPod corev1.Pod, lws lead leaderworkerset.GroupIndexLabelKey: leaderPod.Labels[leaderworkerset.GroupIndexLabelKey], leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.GroupUniqueHashLabelKey: leaderPod.Labels[leaderworkerset.GroupUniqueHashLabelKey], - leaderworkerset.TemplateRevisionHashKey: leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey], + leaderworkerset.RevisionKey: leaderPod.Labels[leaderworkerset.RevisionKey], } podTemplateApplyConfiguration.WithLabels(labelMap) diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index a066319d..4a0992ae 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -42,14 +42,14 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { if err != nil { t.Fatal(err) } - updateTemplateHash := updateRevision.Labels[leaderworkerset.TemplateRevisionHashKey] + updateTemplateHash := revisionutils.GetRevisionKey(updateRevision) lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" currentRevision, err := revisionutils.NewRevision(context.TODO(), client, lws, "") if err != nil { t.Fatal(err) } - currentTemplateHash := currentRevision.Labels[leaderworkerset.TemplateRevisionHashKey] + currentTemplateHash := revisionutils.GetRevisionKey(currentRevision) tests := []struct { name string @@ -70,7 +70,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, }, }, @@ -90,7 +90,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -108,7 +108,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", @@ -144,7 +144,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, }, }, @@ -166,7 +166,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -184,7 +184,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -221,7 +221,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, }, }, }, @@ -242,7 +242,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Labels: map[string]string{ leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, @@ -260,7 +260,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Labels: map[string]string{ leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.TemplateRevisionHashKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateTemplateHash, leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, Annotations: map[string]string{ @@ -299,7 +299,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + leaderworkerset.RevisionKey: currentTemplateHash, }, }, }, @@ -319,7 +319,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + leaderworkerset.RevisionKey: currentTemplateHash, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -337,7 +337,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.TemplateRevisionHashKey: currentTemplateHash, + leaderworkerset.RevisionKey: currentTemplateHash, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index db69c749..ba9c70fb 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -28,47 +28,21 @@ import ( // Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ and // https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go -// ControllerRevisionName returns the Name for a ControllerRevision in the form prefix-hash-revisionnumber. If the length -// of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. -// revision-number allows us to avoid collisions if the created prefix-hash already exists in the history, since revision -// will be unique. -func RevisionName(prefix string, hash string, revisionNumber int64) string { - if len(prefix) > 223 { - prefix = prefix[:223] - } - - return fmt.Sprintf("%s-%s-%v", prefix, hash, revisionNumber) -} - -// HashControllerRevision hashes the contents of revision's Data using FNV hashing. -// The returned hash will be a safe encoded string to avoid bad words. -func HashRevision(revision *appsv1.ControllerRevision) string { - hf := fnv.New32() - if len(revision.Data.Raw) > 0 { - hf.Write(revision.Data.Raw) - } - if revision.Data.Object != nil { - DeepHashObject(hf, revision.Data.Object) - } - return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) -} - // EqualRevision returns true if lhs and rhs are either both nil, if the templateRevisionHash is the same, // or if they are semantically equivalent. func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { - if lhs == nil || rhs == nil { return lhs == rhs } - if lhs.Labels[leaderworkerset.TemplateRevisionHashKey] == rhs.Labels[leaderworkerset.TemplateRevisionHashKey] { + if GetRevisionKey(lhs) == GetRevisionKey(rhs) { return true } return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) } -// ListControllerRevisions lists all ControllerRevisions matching selector and owned by parent or no other +// ListRevisions lists all ControllerRevisions matching selector and owned by parent or no other // controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the // returned error is not nil, the returned slice is not valid. func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { @@ -90,20 +64,6 @@ func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.O return owned, err } -func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { - hasher.Reset() - printer := spew.ConfigState{ - Indent: " ", - SortKeys: true, - DisableMethods: true, - SpewKeys: true, - } - _, err := printer.Fprintf(hasher, "%#v", objectToWrite) - if err != nil { - return - } -} - func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) @@ -111,7 +71,7 @@ func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker return nil, nil } selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ - leaderworkerset.TemplateRevisionHashKey: templateHash, + leaderworkerset.RevisionKey: templateHash, }}) if err != nil { return nil, err @@ -135,59 +95,22 @@ func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker return revisions[0], nil } -// GetPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a -// previous version. If the returned error is nil the patch is valid. The current state that we save is the -// leaderWorkerTemplate and NetworkConfig. We can modify this later to encompass more state (or less) and -// remain compatible with previously recorded patches. -func GetPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { - str := &bytes.Buffer{} - clone := lws.DeepCopy() - // When upgrading from an LWS version that doesn't contain NetworkConfig, NetworkConfig will be nil - // until another field in the LWS object is changed triggering the LWS webhook. This allows the revision - // to be the same before and after the LWS webhook actually defaults the value. - if clone.Spec.NetworkConfig == nil { - clone.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{} - subdomainPolicy := leaderworkerset.SubdomainShared - clone.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ - SubdomainPolicy: &subdomainPolicy, - } - } - - if err := unstructured.UnstructuredJSONScheme.Encode(clone, str); err != nil { - return nil, err - } - var raw map[string]interface{} - if err := json.Unmarshal(str.Bytes(), &raw); err != nil { - return nil, err +func GetRevisionKey(obj metav1.Object) string { + if obj.GetLabels() != nil { + return obj.GetLabels()[leaderworkerset.RevisionKey] } - objCopy := make(map[string]interface{}) - specCopy := make(map[string]interface{}) - spec := raw["spec"].(map[string]interface{}) - specCopy["networkConfig"] = spec["networkConfig"] - specCopy["leaderWorkerTemplate"] = spec["leaderWorkerTemplate"].(map[string]interface{}) - specCopy["$patch"] = "replace" - objCopy["spec"] = specCopy - return json.Marshal(objCopy) + return "" } func CreateRevision( ctx context.Context, k8sClient client.Client, - lws *leaderworkerset.LeaderWorkerSet, - templateHash string) (*appsv1.ControllerRevision, error) { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) - ctx = ctrl.LoggerInto(ctx, log) - - revision, err := NewRevision(ctx, k8sClient, lws, templateHash) - if err != nil { - return nil, err - } + revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { if err := k8sClient.Create(ctx, revision); err != nil { - log.Error(err, "Creating new revision for lws") return nil, err } created := &appsv1.ControllerRevision{} - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: lws.Namespace, Name: revision.Name}, created); err != nil { + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: revision.Namespace, Name: revision.Name}, created); err != nil { return nil, err } return created, nil @@ -206,18 +129,18 @@ func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker return nil, err } revisions, err := ListRevisions(ctx, k8sClient, lws, selector) - revision := NextRevision(revisions) + revision := nextRevision(revisions) if err != nil { return nil, err } - patch, err := GetPatch(lws) + patch, err := getPatch(lws) if err != nil { return nil, err } templateLabels := map[string]string{ - leaderworkerset.TemplateRevisionHashKey: templateHash, - leaderworkerset.SetNameLabelKey: lws.Name, + leaderworkerset.RevisionKey: templateHash, + leaderworkerset.SetNameLabelKey: lws.Name, } cr := &appsv1.ControllerRevision{ @@ -230,10 +153,10 @@ func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker Revision: revision, } - hash := HashRevision(cr) - cr.Name = RevisionName(lws.Name, hash, revision) - if cr.Labels[leaderworkerset.TemplateRevisionHashKey] == "" { - cr.Labels[leaderworkerset.TemplateRevisionHashKey] = hash + hash := hashRevision(cr) + cr.Name = revisionName(lws.Name, hash, revision) + if cr.Labels[leaderworkerset.RevisionKey] == "" { + cr.Labels[leaderworkerset.RevisionKey] = hash } return cr, nil } @@ -258,24 +181,6 @@ func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.Contro return restoredLws, nil } -// nextRevision finds the next valid revision number based on revisions. If the length of revisions -// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method -// assumes that revisions has been sorted by Revision. -func NextRevision(revisions []*appsv1.ControllerRevision) int64 { - count := len(revisions) - if count <= 0 { - return 1 - } - - max := int64(1) - for _, revision := range revisions { - if max < revision.Revision { - max = revision.Revision - } - } - return max + 1 -} - // TruncateRevisions cleans up all other controller revisions except the currentRevision. // currentRevision is the one that matches the templateHash that is passed func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { @@ -291,7 +196,7 @@ func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leader } for i, revision := range revisions { - if revision.Labels[leaderworkerset.TemplateRevisionHashKey] != templateHash { + if revision.Labels[leaderworkerset.RevisionKey] != templateHash { if err := k8sClient.Delete(ctx, revisions[i]); err != nil { return err } @@ -299,3 +204,98 @@ func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leader } return nil } + +// getPatch returns a strategic merge patch that can be applied to restore a LeaderWorkerSet to a +// previous version. If the returned error is nil the patch is valid. The current state that we save is the +// leaderWorkerTemplate and NetworkConfig. We can modify this later to encompass more state (or less) and +// remain compatible with previously recorded patches. +func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { + str := &bytes.Buffer{} + clone := lws.DeepCopy() + // When upgrading from an LWS version that doesn't contain NetworkConfig, NetworkConfig will be nil + // until another field in the LWS object is changed triggering the LWS webhook. This allows the revision + // to be the same before and after the LWS webhook actually defaults the value. + if clone.Spec.NetworkConfig == nil { + clone.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{} + subdomainPolicy := leaderworkerset.SubdomainShared + clone.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ + SubdomainPolicy: &subdomainPolicy, + } + } + + if err := unstructured.UnstructuredJSONScheme.Encode(clone, str); err != nil { + return nil, err + } + var raw map[string]interface{} + if err := json.Unmarshal(str.Bytes(), &raw); err != nil { + return nil, err + } + objCopy := make(map[string]interface{}) + specCopy := make(map[string]interface{}) + spec := raw["spec"].(map[string]interface{}) + networkConfig := spec["networkConfig"].(map[string]interface{}) + specCopy["networkConfig"] = networkConfig + template := spec["leaderWorkerTemplate"].(map[string]interface{}) + specCopy["leaderWorkerTemplate"] = template + networkConfig["$patch"] = "replace" + template["$patch"] = "replace" + objCopy["spec"] = specCopy + return json.Marshal(objCopy) +} + +// nextRevision finds the next valid revision number based on revisions. If the length of revisions +// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method +// assumes that revisions has been sorted by Revision. +func nextRevision(revisions []*appsv1.ControllerRevision) int64 { + count := len(revisions) + if count <= 0 { + return 1 + } + + max := int64(1) + for _, revision := range revisions { + if max < revision.Revision { + max = revision.Revision + } + } + return max + 1 +} + +// RevisionName returns the Name for a ControllerRevision in the form prefix-hash-revisionnumber. If the length +// of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. +// revision-number allows us to avoid collisions if the created prefix-hash already exists in the history, since revision +// will be unique. +func revisionName(prefix string, hash string, revisionNumber int64) string { + if len(prefix) > 220 { + prefix = prefix[:220] + } + + return fmt.Sprintf("%s-%s-%v", prefix, hash, revisionNumber) +} + +// HashRevision hashes the contents of revision's Data using FNV hashing. +// The returned hash will be a safe encoded string to avoid bad words. +func hashRevision(revision *appsv1.ControllerRevision) string { + hf := fnv.New32() + if len(revision.Data.Raw) > 0 { + hf.Write(revision.Data.Raw) + } + if revision.Data.Object != nil { + deepHashObject(hf, revision.Data.Object) + } + return rand.SafeEncodeString(fmt.Sprint(hf.Sum32())) +} + +func deepHashObject(hasher hash.Hash, objectToWrite interface{}) { + hasher.Reset() + printer := spew.ConfigState{ + Indent: " ", + SortKeys: true, + DisableMethods: true, + SpewKeys: true, + } + _, err := printer.Fprintf(hasher, "%#v", objectToWrite) + if err != nil { + return + } +} diff --git a/pkg/utils/revision/revision_utils_test.go b/pkg/utils/revision/revision_utils_test.go index 92448e47..922f1bb8 100644 --- a/pkg/utils/revision/revision_utils_test.go +++ b/pkg/utils/revision/revision_utils_test.go @@ -43,6 +43,13 @@ func TestApplyRevision(t *testing.T) { lws.Spec.NetworkConfig = &leaderworkerset.NetworkConfig{ SubdomainPolicy: &subdomainPolicy, } + lws.Spec.RolloutStrategy = leaderworkerset.RolloutStrategy{ + Type: leaderworkerset.RollingUpdateStrategyType, + RollingUpdateConfiguration: &leaderworkerset.RollingUpdateConfiguration{ + MaxUnavailable: intstr.FromInt32(2), + MaxSurge: intstr.FromInt(1), + }, + } restoredLws, err := ApplyRevision(lws, revision) if err != nil { t.Fatal(err) @@ -64,6 +71,10 @@ func TestApplyRevision(t *testing.T) { if diff := cmp.Diff(currentLws.Spec.NetworkConfig, restoredLws.Spec.NetworkConfig); diff != "" { t.Errorf("NetworkConfig should be restored %s", diff) } + + if diff := cmp.Diff(lws.Spec.RolloutStrategy, restoredLws.Spec.RolloutStrategy); diff != "" { + t.Errorf("It should not restore/clear non NetworkConfig Spec fields %s,", diff) + } } func BuildLeaderWorkerSet(nsName string) *leaderworkerset.LeaderWorkerSet { diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 42be85bd..fce7ed54 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -73,7 +73,7 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { gomega.Expect(*lws.Spec.LeaderWorkerTemplate.Size).To(gomega.Equal(int32(5))) gomega.Expect(lws.Spec.LeaderWorkerTemplate.RestartPolicy).To(gomega.Equal(v1.NoneRestartPolicy)) - expectedLabels := []string{v1.SetNameLabelKey, v1.GroupIndexLabelKey, v1.WorkerIndexLabelKey, v1.TemplateRevisionHashKey} + expectedLabels := []string{v1.SetNameLabelKey, v1.GroupIndexLabelKey, v1.WorkerIndexLabelKey, v1.RevisionKey} expectedAnnotations := []string{v1.LeaderPodNameAnnotationKey, v1.SizeAnnotationKey} for _, pod := range pods.Items { @@ -154,7 +154,7 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { lwsPods := &corev1.PodList{} testing.ExpectValidPods(ctx, k8sClient, lws, lwsPods) - expectedLabels := []string{v1.SetNameLabelKey, v1.GroupIndexLabelKey, v1.WorkerIndexLabelKey, v1.TemplateRevisionHashKey, v1.SubGroupIndexLabelKey} + expectedLabels := []string{v1.SetNameLabelKey, v1.GroupIndexLabelKey, v1.WorkerIndexLabelKey, v1.RevisionKey, v1.SubGroupIndexLabelKey} expectedAnnotations := []string{v1.LeaderPodNameAnnotationKey, v1.SizeAnnotationKey, v1.SubGroupSizeAnnotationKey} for _, pod := range lwsPods.Items { diff --git a/test/testutils/util.go b/test/testutils/util.go index 1e10aa7b..3dc9e173 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -54,10 +54,10 @@ func CreateWorkerPodsForLeaderPod(ctx context.Context, leaderPod corev1.Pod, k8s Name: leaderPod.Name + "-" + strconv.Itoa(i), Namespace: leaderPod.Namespace, Labels: map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - "worker.pod": "workers", - leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(i), - leaderworkerset.TemplateRevisionHashKey: leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey], + leaderworkerset.SetNameLabelKey: lws.Name, + "worker.pod": "workers", + leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(i), + leaderworkerset.RevisionKey: leaderPod.Labels[leaderworkerset.RevisionKey], }, Annotations: map[string]string{ leaderworkerset.SizeAnnotationKey: strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)), @@ -138,7 +138,7 @@ func CreateLeaderPods(ctx context.Context, leaderSts appsv1.StatefulSet, k8sClie leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(0), leaderworkerset.GroupIndexLabelKey: strconv.Itoa(i), leaderworkerset.GroupUniqueHashLabelKey: "randomValue", - leaderworkerset.TemplateRevisionHashKey: cr.Labels[leaderworkerset.TemplateRevisionHashKey], + leaderworkerset.RevisionKey: revisionutils.GetRevisionKey(cr), }, Annotations: map[string]string{ leaderworkerset.SizeAnnotationKey: strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)), @@ -171,8 +171,8 @@ func ExpectValidPods(ctx context.Context, k8sClient client.Client, lws *leaderwo return err } labelSelector := client.MatchingLabels(map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.TemplateRevisionHashKey: cr.Labels[leaderworkerset.TemplateRevisionHashKey], + leaderworkerset.SetNameLabelKey: lws.Name, + leaderworkerset.RevisionKey: revisionutils.GetRevisionKey(cr), }) if err := k8sClient.List(ctx, podList, labelSelector, client.InNamespace(lws.Namespace)); err != nil { @@ -229,8 +229,8 @@ func GetLeaderPod(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8s if err != nil { return err } - if cr.Labels[leaderworkerset.TemplateRevisionHashKey] != pod.Labels[leaderworkerset.TemplateRevisionHashKey] { - return fmt.Errorf("TemplateHash does not match, expected %s, got %s", cr.Labels[leaderworkerset.TemplateRevisionHashKey], pod.Labels[leaderworkerset.TemplateRevisionHashKey]) + if revisionutils.GetRevisionKey(cr) != pod.Labels[leaderworkerset.RevisionKey] { + return fmt.Errorf("TemplateHash does not match, expected %s, got %s", revisionutils.GetRevisionKey(cr), pod.Labels[leaderworkerset.RevisionKey]) } return nil }, Timeout, Interval).Should(gomega.Succeed()) @@ -283,7 +283,7 @@ func SetLeaderPodToReady(ctx context.Context, k8sClient client.Client, podName s return err } - leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] = cr.Labels[leaderworkerset.TemplateRevisionHashKey] + leaderPod.Labels[leaderworkerset.RevisionKey] = revisionutils.GetRevisionKey(cr) return k8sClient.Update(ctx, &leaderPod) }, Timeout, Interval).Should(gomega.Succeed()) diff --git a/test/testutils/validators.go b/test/testutils/validators.go index 26f24c4f..d2644783 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -154,9 +154,9 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, if err != nil { return err } - hash := cr.Labels[leaderworkerset.TemplateRevisionHashKey] - if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { - return fmt.Errorf("mismatch template revision hash for leader statefulset, got: %s, want: %s", sts.Spec.Template.Labels[leaderworkerset.TemplateRevisionHashKey], hash) + hash := revisionutils.GetRevisionKey(cr) + if revisionutils.GetRevisionKey(&sts) != hash { + return fmt.Errorf("mismatch template revision hash for leader statefulset, got: %s, want: %s", revisionutils.GetRevisionKey(&sts), hash) } if sts.Spec.ServiceName != lws.Name { return errors.New("leader StatefulSet service name should match leaderWorkerSet name") @@ -184,9 +184,9 @@ func ExpectValidLeaderStatefulSet(ctx context.Context, k8sClient client.Client, } // check pod template has correct label if diff := cmp.Diff(sts.Spec.Template.Labels, map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.WorkerIndexLabelKey: "0", - leaderworkerset.TemplateRevisionHashKey: hash, + leaderworkerset.SetNameLabelKey: lws.Name, + leaderworkerset.WorkerIndexLabelKey: "0", + leaderworkerset.RevisionKey: hash, }); diff != "" { return errors.New("leader StatefulSet pod template doesn't have the correct labels: " + diff) } @@ -279,9 +279,9 @@ func ExpectValidWorkerStatefulSets(ctx context.Context, leaderWorkerSet *leaderw if err != nil { return err } - hash := cr.Labels[leaderworkerset.TemplateRevisionHashKey] - if sts.Labels[leaderworkerset.TemplateRevisionHashKey] != hash { - return fmt.Errorf("mismatch template revision hash for worker statefulset, got: %s, want: %s", sts.Labels[leaderworkerset.TemplateRevisionHashKey], hash) + hash := revisionutils.GetRevisionKey(cr) + if sts.Labels[leaderworkerset.RevisionKey] != hash { + return fmt.Errorf("mismatch template revision hash for worker statefulset, got: %s, want: %s", revisionutils.GetRevisionKey(&sts), hash) } if *sts.Spec.Replicas != *lws.Spec.LeaderWorkerTemplate.Size-1 { return errors.New("worker StatefulSet replicas should match leaderWorkerSet replicas") From 949bc68040a90c8d5d6bf4cec8b3f2ada819b436 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Fri, 27 Dec 2024 21:38:52 +0000 Subject: [PATCH 19/27] further changes from templateHash to RevisionKey --- pkg/controllers/leaderworkerset_controller.go | 38 +++++++++---------- pkg/utils/revision/revision_utils.go | 31 +++++++-------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 37f16d95..124a003c 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -228,7 +228,7 @@ func SetupIndexes(indexer client.FieldIndexer) error { // - Otherwise, Replicas is equal to spec.Replicas // - One exception here is when unready replicas of leaderWorkerSet is equal to MaxSurge, // we should reclaim the extra replicas gradually to accommodate for the new replicas. -func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, templateHash string, leaderWorkerSetUpdated bool) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, sts *appsv1.StatefulSet, revisionKey string, leaderWorkerSetUpdated bool) (int32, int32, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) lwsReplicas := *lws.Spec.Replicas @@ -277,7 +277,7 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, return 0, lwsReplicas, nil } - continuousReadyReplicas, lwsUnreadyReplicas, err := r.iterateReplicas(ctx, lws, stsReplicas, templateHash) + continuousReadyReplicas, lwsUnreadyReplicas, err := r.iterateReplicas(ctx, lws, stsReplicas, revisionKey) if err != nil { return 0, 0, err } @@ -309,11 +309,11 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context, return min(partition, utils.NonZeroValue(stsReplicas-int32(rollingStep)-continuousReadyReplicas)), wantReplicas(lwsUnreadyReplicas), nil } -func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, templateHash string) error { +func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, revisionKey string) error { log := ctrl.LoggerFrom(ctx) // construct the statefulset apply configuration - leaderStatefulSetApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(lws, partition, replicas, templateHash) + leaderStatefulSetApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(lws, partition, replicas, revisionKey) if err != nil { log.Error(err, "Constructing StatefulSet apply configuration.") return err @@ -347,7 +347,7 @@ func (r *LeaderWorkerSetReconciler) SSAWithStatefulset(ctx context.Context, lws } // updates the condition of the leaderworkerset to either Progressing or Available. -func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (bool, bool, error) { +func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) (bool, bool, error) { log := ctrl.LoggerFrom(ctx) podSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, @@ -386,7 +386,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l ready = true readyCount++ } - if (noWorkerSts || sts.Labels[leaderworkerset.RevisionKey] == templateHash) && pod.Labels[leaderworkerset.RevisionKey] == templateHash { + if (noWorkerSts || sts.Labels[leaderworkerset.RevisionKey] == revisionKey) && pod.Labels[leaderworkerset.RevisionKey] == revisionKey { updated = true updatedCount++ if index < int(*lws.Spec.Replicas) { @@ -436,7 +436,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l } // Updates status and condition of LeaderWorkerSet and returns whether or not an update actually occurred. -func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (bool, error) { +func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) (bool, error) { updateStatus := false log := ctrl.LoggerFrom(ctx) @@ -472,7 +472,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade } // check if an update is needed - updateConditions, updateDone, err := r.updateConditions(ctx, lws, templateHash) + updateConditions, updateDone, err := r.updateConditions(ctx, lws, revisionKey) if err != nil { return false, err } @@ -490,7 +490,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade // - The first value represents the number of continuous ready replicas ranging from the last index to 0, // to help us judge whether we can update the Partition or not. // - The second value represents the unready replicas whose index is smaller than leaderWorkerSet Replicas. -func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, stsReplicas int32, templateHash string) (int32, int32, error) { +func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, stsReplicas int32, revisionKey string) (int32, int32, error) { podSelector := client.MatchingLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.WorkerIndexLabelKey: "0", @@ -528,7 +528,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le } podTemplateHash := sortedPods[index].Labels[leaderworkerset.RevisionKey] - if !(podTemplateHash == templateHash && podutils.PodRunningAndReady(sortedPods[index])) { + if !(podTemplateHash == revisionKey && podutils.PodRunningAndReady(sortedPods[index])) { return false } @@ -537,7 +537,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le } stsTemplateHash := sortedSts[index].Labels[leaderworkerset.RevisionKey] - return stsTemplateHash == templateHash && statefulsetutils.StatefulsetReady(sortedSts[index]) + return stsTemplateHash == revisionKey && statefulsetutils.StatefulsetReady(sortedSts[index]) } var skip bool @@ -571,16 +571,16 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw } func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { - templateHash := "" + revisionKey := "" if sts != nil && sts.Labels != nil { // Uses the hash in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where - // the templateHash was used to detect update instead of controller revision. - templateHash = revisionutils.GetRevisionKey(sts) + // the revisionKey was used to detect update instead of controller revision. + revisionKey = revisionutils.GetRevisionKey(sts) } - if stsRevision, err := revisionutils.GetRevision(ctx, r.Client, lws, templateHash); sts != nil || err != nil { + if stsRevision, err := revisionutils.GetRevision(ctx, r.Client, lws, revisionKey); sts != nil || err != nil { return stsRevision, err } - revision, err := revisionutils.NewRevision(ctx, r.Client, lws, templateHash) + revision, err := revisionutils.NewRevision(ctx, r.Client, lws, revisionKey) if err != nil { return nil, err } @@ -605,7 +605,7 @@ func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, } // constructLeaderStatefulSetApplyConfiguration constructs the applied configuration for the leader StatefulSet -func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, templateHash string) (*appsapplyv1.StatefulSetApplyConfiguration, error) { +func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWorkerSet, partition, replicas int32, revisionKey string) (*appsapplyv1.StatefulSetApplyConfiguration, error) { var podTemplateSpec corev1.PodTemplateSpec if lws.Spec.LeaderWorkerTemplate.LeaderTemplate != nil { podTemplateSpec = *lws.Spec.LeaderWorkerTemplate.LeaderTemplate.DeepCopy() @@ -626,7 +626,7 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor podTemplateApplyConfiguration.WithLabels(map[string]string{ leaderworkerset.WorkerIndexLabelKey: "0", leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.RevisionKey: templateHash, + leaderworkerset.RevisionKey: revisionKey, }) podAnnotations := make(map[string]string) podAnnotations[leaderworkerset.SizeAnnotationKey] = strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)) @@ -662,7 +662,7 @@ func constructLeaderStatefulSetApplyConfiguration(lws *leaderworkerset.LeaderWor }))). WithLabels(map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, - leaderworkerset.RevisionKey: templateHash, + leaderworkerset.RevisionKey: revisionKey, }). WithAnnotations(map[string]string{ leaderworkerset.ReplicasAnnotationKey: strconv.Itoa(int(*lws.Spec.Replicas)), diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index ba9c70fb..11b61bb8 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -28,7 +28,7 @@ import ( // Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ and // https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go -// EqualRevision returns true if lhs and rhs are either both nil, if the templateRevisionHash is the same, +// EqualRevision returns true if lhs and rhs are either both nil, if the revisionKey is the same, // or if they are semantically equivalent. func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { if lhs == nil || rhs == nil { @@ -64,14 +64,16 @@ func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.O return owned, err } -func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { +// GetRevision returns the controllerRevision that matches the revisionKey that is passed. A nil controllerRevision will be returned if the passed revisionKey is nil, +// or if no controllerRevisions match the revisionKey passed. If more than one controllerRevision matches, the latest revision will be passed. +func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) (*appsv1.ControllerRevision, error) { log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(lws)) ctx = ctrl.LoggerInto(ctx, log) - if templateHash == "" { + if revisionKey == "" { return nil, nil } selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ - leaderworkerset.RevisionKey: templateHash, + leaderworkerset.RevisionKey: revisionKey, }}) if err != nil { return nil, err @@ -116,11 +118,11 @@ func CreateRevision( return created, nil } -// newRevision instantiates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. +// NewRevision instantiates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned // ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set // to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. -func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) (*appsv1.ControllerRevision, error) { +func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) (*appsv1.ControllerRevision, error) { var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, @@ -139,7 +141,7 @@ func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker } templateLabels := map[string]string{ - leaderworkerset.RevisionKey: templateHash, + leaderworkerset.RevisionKey: revisionKey, leaderworkerset.SetNameLabelKey: lws.Name, } @@ -182,8 +184,8 @@ func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.Contro } // TruncateRevisions cleans up all other controller revisions except the currentRevision. -// currentRevision is the one that matches the templateHash that is passed -func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, templateHash string) error { +// currentRevision is the one that matches the revisionKey that is passed +func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) error { selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ leaderworkerset.SetNameLabelKey: lws.Name, }}) @@ -196,7 +198,7 @@ func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leader } for i, revision := range revisions { - if revision.Labels[leaderworkerset.RevisionKey] != templateHash { + if revision.Labels[leaderworkerset.RevisionKey] != revisionKey { if err := k8sClient.Delete(ctx, revisions[i]); err != nil { return err } @@ -244,8 +246,7 @@ func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { } // nextRevision finds the next valid revision number based on revisions. If the length of revisions -// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method -// assumes that revisions has been sorted by Revision. +// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. func nextRevision(revisions []*appsv1.ControllerRevision) int64 { count := len(revisions) if count <= 0 { @@ -261,8 +262,8 @@ func nextRevision(revisions []*appsv1.ControllerRevision) int64 { return max + 1 } -// RevisionName returns the Name for a ControllerRevision in the form prefix-hash-revisionnumber. If the length -// of prefix is greater than 223 bytes, it is truncated to allow for a name that is no larger than 253 bytes. +// revisionName returns the Name for a ControllerRevision in the form prefix-hash-revisionnumber. If the length +// of prefix is greater than 220 bytes, it is truncated to allow for a name that is no larger than 253 bytes. // revision-number allows us to avoid collisions if the created prefix-hash already exists in the history, since revision // will be unique. func revisionName(prefix string, hash string, revisionNumber int64) string { @@ -273,7 +274,7 @@ func revisionName(prefix string, hash string, revisionNumber int64) string { return fmt.Sprintf("%s-%s-%v", prefix, hash, revisionNumber) } -// HashRevision hashes the contents of revision's Data using FNV hashing. +// hashRevision hashes the contents of revision's Data using FNV hashing. // The returned hash will be a safe encoded string to avoid bad words. func hashRevision(revision *appsv1.ControllerRevision) string { hf := fnv.New32() From 6b292d738d3f832ed283f3b434f54236ea97bb0c Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 00:25:09 +0000 Subject: [PATCH 20/27] further changing from templateHash to revisionKey --- pkg/controllers/leaderworkerset_controller.go | 8 +- .../leaderworkerset_controller_test.go | 48 ++--- pkg/controllers/pod_controller.go | 6 +- pkg/controllers/pod_controller_test.go | 28 +-- pkg/utils/revision/revision_utils.go | 185 +++++++++--------- test/testutils/util.go | 6 +- test/testutils/validators.go | 2 +- 7 files changed, 144 insertions(+), 139 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 124a003c..8f632ceb 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -386,7 +386,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l ready = true readyCount++ } - if (noWorkerSts || sts.Labels[leaderworkerset.RevisionKey] == revisionKey) && pod.Labels[leaderworkerset.RevisionKey] == revisionKey { + if (noWorkerSts || revisionutils.GetRevisionKey(&sts) == revisionKey) && revisionutils.GetRevisionKey(&pod) == revisionKey { updated = true updatedCount++ if index < int(*lws.Spec.Replicas) { @@ -527,7 +527,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return false } - podTemplateHash := sortedPods[index].Labels[leaderworkerset.RevisionKey] + podTemplateHash := revisionutils.GetRevisionKey(&sortedPods[index]) if !(podTemplateHash == revisionKey && podutils.PodRunningAndReady(sortedPods[index])) { return false } @@ -536,7 +536,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return true } - stsTemplateHash := sortedSts[index].Labels[leaderworkerset.RevisionKey] + stsTemplateHash := revisionutils.GetRevisionKey(&sortedSts[index]) return stsTemplateHash == revisionKey && statefulsetutils.StatefulsetReady(sortedSts[index]) } @@ -572,7 +572,7 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { revisionKey := "" - if sts != nil && sts.Labels != nil { + if sts != nil { // Uses the hash in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where // the revisionKey was used to detect update instead of controller revision. revisionKey = revisionutils.GetRevisionKey(sts) diff --git a/pkg/controllers/leaderworkerset_controller_test.go b/pkg/controllers/leaderworkerset_controller_test.go index 750cdb72..47ce07d9 100644 --- a/pkg/controllers/leaderworkerset_controller_test.go +++ b/pkg/controllers/leaderworkerset_controller_test.go @@ -46,7 +46,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { if err != nil { t.Fatal(err) } - hash1 := revisionutils.GetRevisionKey(cr1) + revisionKey1 := revisionutils.GetRevisionKey(cr1) lws2 := testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). WorkerTemplateSpec(testutils.MakeWorkerPodSpec()).Obj() @@ -54,17 +54,17 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { if err != nil { t.Fatal(err) } - hash2 := revisionutils.GetRevisionKey(cr2) + revisionKey2 := revisionutils.GetRevisionKey(cr2) tests := []struct { name string - templateHash string + revisionKey string lws *leaderworkerset.LeaderWorkerSet wantApplyConfig *appsapplyv1.StatefulSetApplyConfiguration }{ { - name: "1 replica, size 1, with empty leader template, exclusive placement disabled", - templateHash: hash2, + name: "1 replica, size 1, with empty leader template, exclusive placement disabled", + revisionKey: revisionKey2, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). Replica(1). RolloutStrategy(leaderworkerset.RolloutStrategy{ @@ -86,7 +86,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Namespace: ptr.To[string]("default"), Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash2, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey2, }, Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/replicas": "1"}, }, @@ -103,7 +103,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash2, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey2, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", @@ -129,8 +129,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "1 replica, size 2 , with empty leader template, exclusive placement enabled", - templateHash: hash2, + name: "1 replica, size 2 , with empty leader template, exclusive placement enabled", + revisionKey: revisionKey2, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). Annotation(map[string]string{ "leaderworkerset.sigs.k8s.io/exclusive-topology": "topologyKey", @@ -154,7 +154,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Namespace: ptr.To[string]("default"), Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash2, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey2, }, Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/replicas": "1"}, }, @@ -171,7 +171,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash2, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey2, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -198,8 +198,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "2 replica, size 2, with leader template, exclusive placement enabled", - templateHash: hash1, + name: "2 replica, size 2, with leader template, exclusive placement enabled", + revisionKey: revisionKey1, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Annotation(map[string]string{ "leaderworkerset.sigs.k8s.io/exclusive-topology": "topologyKey", }).Replica(2). @@ -223,7 +223,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Namespace: ptr.To[string]("default"), Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash1, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey1, }, Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/replicas": "2"}, }, @@ -240,7 +240,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash1, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey1, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -266,8 +266,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "2 maxUnavailable, 1 maxSurge, with empty leader template, exclusive placement disabled", - templateHash: hash2, + name: "2 maxUnavailable, 1 maxSurge, with empty leader template, exclusive placement disabled", + revisionKey: revisionKey2, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). Replica(1). RolloutStrategy(leaderworkerset.RolloutStrategy{ @@ -290,7 +290,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Namespace: ptr.To[string]("default"), Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash2, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey2, }, Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/replicas": "1"}, }, @@ -307,7 +307,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash2, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey2, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", @@ -333,8 +333,8 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { }, }, { - name: "1 replica, size 2, with leader template, exclusive placement enabled, subgroupsize enabled", - templateHash: hash1, + name: "1 replica, size 2, with leader template, exclusive placement enabled, subgroupsize enabled", + revisionKey: revisionKey1, lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default").Annotation(map[string]string{ leaderworkerset.SubGroupExclusiveKeyAnnotationKey: "topologyKey", }).SubGroupSize(2).Replica(1). @@ -358,7 +358,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Namespace: ptr.To[string]("default"), Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash1, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey1, }, Annotations: map[string]string{"leaderworkerset.sigs.k8s.io/replicas": "1"}, }, @@ -375,7 +375,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { Labels: map[string]string{ "leaderworkerset.sigs.k8s.io/name": "test-sample", "leaderworkerset.sigs.k8s.io/worker-index": "0", - "leaderworkerset.sigs.k8s.io/template-revision-hash": hash1, + "leaderworkerset.sigs.k8s.io/template-revision-hash": revisionKey1, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -404,7 +404,7 @@ func TestLeaderStatefulSetApplyConfig(t *testing.T) { } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - stsApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(tc.lws, 0, *tc.lws.Spec.Replicas, tc.templateHash) + stsApplyConfig, err := constructLeaderStatefulSetApplyConfiguration(tc.lws, 0, *tc.lws.Spec.Replicas, tc.revisionKey) if err != nil { t.Errorf("failed with error: %s", err.Error()) } diff --git a/pkg/controllers/pod_controller.go b/pkg/controllers/pod_controller.go index 9a089ce7..4dac3714 100644 --- a/pkg/controllers/pod_controller.go +++ b/pkg/controllers/pod_controller.go @@ -119,7 +119,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("defer the creation of the worker statefulset because leader pod is not ready.") return ctrl.Result{}, nil } - revision, err := revisionutils.GetRevision(ctx, r.Client, &leaderWorkerSet, pod.Labels[leaderworkerset.RevisionKey]) + revision, err := revisionutils.GetRevision(ctx, r.Client, &leaderWorkerSet, revisionutils.GetRevisionKey(&pod)) if err != nil { log.Error(err, "Getting lws revisions") return ctrl.Result{}, err @@ -186,7 +186,7 @@ func (r *PodReconciler) handleRestartPolicy(ctx context.Context, pod corev1.Pod, return false, err } // Different revision key means that this pod will be deleted soon and alternative will be created with the matching key - if leader.Labels[leaderworkerset.RevisionKey] != pod.Labels[leaderworkerset.RevisionKey] { + if revisionutils.GetRevisionKey(&leader) != revisionutils.GetRevisionKey(&pod) { return false, nil } } else { @@ -293,7 +293,7 @@ func constructWorkerStatefulSetApplyConfiguration(leaderPod corev1.Pod, lws lead leaderworkerset.GroupIndexLabelKey: leaderPod.Labels[leaderworkerset.GroupIndexLabelKey], leaderworkerset.SetNameLabelKey: lws.Name, leaderworkerset.GroupUniqueHashLabelKey: leaderPod.Labels[leaderworkerset.GroupUniqueHashLabelKey], - leaderworkerset.RevisionKey: leaderPod.Labels[leaderworkerset.RevisionKey], + leaderworkerset.RevisionKey: revisionutils.GetRevisionKey(&leaderPod), } podTemplateApplyConfiguration.WithLabels(labelMap) diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index 4a0992ae..3d8bb98f 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -42,14 +42,14 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { if err != nil { t.Fatal(err) } - updateTemplateHash := revisionutils.GetRevisionKey(updateRevision) + updateRevisionKey := revisionutils.GetRevisionKey(updateRevision) lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" currentRevision, err := revisionutils.NewRevision(context.TODO(), client, lws, "") if err != nil { t.Fatal(err) } - currentTemplateHash := revisionutils.GetRevisionKey(currentRevision) + currentRevisionKey := revisionutils.GetRevisionKey(currentRevision) tests := []struct { name string @@ -70,7 +70,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, }, }, @@ -90,7 +90,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -108,7 +108,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", @@ -144,7 +144,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, }, }, @@ -166,7 +166,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -184,7 +184,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "2", @@ -221,7 +221,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, }, }, }, @@ -242,7 +242,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Labels: map[string]string{ leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, }, @@ -260,7 +260,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { Labels: map[string]string{ leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.RevisionKey: updateTemplateHash, + leaderworkerset.RevisionKey: updateRevisionKey, leaderworkerset.GroupUniqueHashLabelKey: "test-key", }, Annotations: map[string]string{ @@ -299,7 +299,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: currentTemplateHash, + leaderworkerset.RevisionKey: currentRevisionKey, }, }, }, @@ -319,7 +319,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: currentTemplateHash, + leaderworkerset.RevisionKey: currentRevisionKey, }, }, Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ @@ -337,7 +337,7 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { leaderworkerset.SetNameLabelKey: "test-sample", leaderworkerset.GroupIndexLabelKey: "1", leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: currentTemplateHash, + leaderworkerset.RevisionKey: currentRevisionKey, }, Annotations: map[string]string{ "leaderworkerset.sigs.k8s.io/size": "1", diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 11b61bb8..f61c7649 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -28,40 +28,72 @@ import ( // Functions in this package are adapted from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/statefulset/ and // https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/history/controller_history.go -// EqualRevision returns true if lhs and rhs are either both nil, if the revisionKey is the same, -// or if they are semantically equivalent. -func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { - if lhs == nil || rhs == nil { - return lhs == rhs +// NewRevision instantiates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. +// The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned +// ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set +// to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. +func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) (*appsv1.ControllerRevision, error) { + var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") + selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ + leaderworkerset.SetNameLabelKey: lws.Name, + }}) + if err != nil { + return nil, err + } + revisions, err := ListRevisions(ctx, k8sClient, lws, selector) + highestRevision := getHighestRevision(revisions) + revision := int64(1) + if highestRevision != nil { + revision = highestRevision.Revision + 1 + } + if err != nil { + return nil, err + } + patch, err := getPatch(lws) + if err != nil { + return nil, err } - if GetRevisionKey(lhs) == GetRevisionKey(rhs) { - return true + cr := &appsv1.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + leaderworkerset.SetNameLabelKey: lws.Name, + }, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(lws, controllerKind)}, + Namespace: lws.Namespace, + }, + Data: runtime.RawExtension{Raw: patch}, + Revision: revision, } - return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) + hash := hashRevision(cr) + if revisionKey == "" { + revisionKey = hash + } + cr.Name = revisionName(lws.Name, hash, revision) + cr.Labels[leaderworkerset.RevisionKey] = revisionKey + return cr, nil } -// ListRevisions lists all ControllerRevisions matching selector and owned by parent or no other -// controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the -// returned error is not nil, the returned slice is not valid. -func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { - // List all revisions in the namespace that match the selector - revisionList := new(appsv1.ControllerRevisionList) - err := k8sClient.List(ctx, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) - if err != nil { +func CreateRevision( + ctx context.Context, + k8sClient client.Client, + revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { + if err := k8sClient.Create(ctx, revision); err != nil { return nil, err } - history := revisionList.Items - var owned []*appsv1.ControllerRevision - for i := range history { - ref := metav1.GetControllerOfNoCopy(&history[i]) - if ref == nil || ref.UID == parent.GetUID() { - owned = append(owned, &history[i]) - } + created := &appsv1.ControllerRevision{} + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: revision.Namespace, Name: revision.Name}, created); err != nil { + return nil, err + } + return created, nil +} +func GetRevisionKey(obj metav1.Object) string { + if obj != nil && obj.GetLabels() != nil { + return obj.GetLabels()[leaderworkerset.RevisionKey] } - return owned, err + return "" } // GetRevision returns the controllerRevision that matches the revisionKey that is passed. A nil controllerRevision will be returned if the passed revisionKey is nil, @@ -91,76 +123,32 @@ func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker if len(revisions) > 1 { // Since we only create a controllerRevision when the template hash changes, only one should match log.Error(err, "More than one revision exists for the given template hash; returning the latest revision") - return revisions[len(revisions)-1], nil + return getHighestRevision(revisions), nil } return revisions[0], nil } -func GetRevisionKey(obj metav1.Object) string { - if obj.GetLabels() != nil { - return obj.GetLabels()[leaderworkerset.RevisionKey] - } - return "" -} - -func CreateRevision( - ctx context.Context, - k8sClient client.Client, - revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - if err := k8sClient.Create(ctx, revision); err != nil { - return nil, err - } - created := &appsv1.ControllerRevision{} - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: revision.Namespace, Name: revision.Name}, created); err != nil { - return nil, err - } - return created, nil -} - -// NewRevision instantiates a new ControllerRevision containing a patch that reapplies the target state of LeaderWorkerSet. -// The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned -// ControllerRevision is valid. LeaderWorkerSet revisions are stored as patches that re-apply the current state of set -// to a new LeaderWorkerSet using a strategic merge patch to replace the saved state of the new LeaderWorkerSet. -func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) (*appsv1.ControllerRevision, error) { - var controllerKind = leaderworkerset.GroupVersion.WithKind("LeaderWorkerSet") - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: map[string]string{ - leaderworkerset.SetNameLabelKey: lws.Name, - }}) - if err != nil { - return nil, err - } - revisions, err := ListRevisions(ctx, k8sClient, lws, selector) - revision := nextRevision(revisions) - if err != nil { - return nil, err - } - patch, err := getPatch(lws) +// ListRevisions lists all ControllerRevisions matching selector and owned by parent or no other +// controller. If the returned error is nil the returned slice of ControllerRevisions is valid. If the +// returned error is not nil, the returned slice is not valid. +func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { + // List all revisions in the namespace that match the selector + revisionList := new(appsv1.ControllerRevisionList) + err := k8sClient.List(ctx, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) if err != nil { return nil, err } + history := revisionList.Items + var owned []*appsv1.ControllerRevision + for i := range history { + ref := metav1.GetControllerOfNoCopy(&history[i]) + if ref == nil || ref.UID == parent.GetUID() { + owned = append(owned, &history[i]) + } - templateLabels := map[string]string{ - leaderworkerset.RevisionKey: revisionKey, - leaderworkerset.SetNameLabelKey: lws.Name, } - - cr := &appsv1.ControllerRevision{ - ObjectMeta: metav1.ObjectMeta{ - Labels: templateLabels, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(lws, controllerKind)}, - Namespace: lws.Namespace, - }, - Data: runtime.RawExtension{Raw: patch}, - Revision: revision, - } - - hash := hashRevision(cr) - cr.Name = revisionName(lws.Name, hash, revision) - if cr.Labels[leaderworkerset.RevisionKey] == "" { - cr.Labels[leaderworkerset.RevisionKey] = hash - } - return cr, nil + return owned, err } // ApplyRevision returns a new LeaderWorkerSet constructed by restoring the state in revision to set. If the returned error @@ -183,6 +171,20 @@ func ApplyRevision(lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.Contro return restoredLws, nil } +// EqualRevision returns true if lhs and rhs are either both nil, if the revisionKey is the same, +// or if they are semantically equivalent. +func EqualRevision(lhs *appsv1.ControllerRevision, rhs *appsv1.ControllerRevision) bool { + if lhs == nil || rhs == nil { + return lhs == rhs + } + + if GetRevisionKey(lhs) == GetRevisionKey(rhs) { + return true + } + + return bytes.Equal(lhs.Data.Raw, rhs.Data.Raw) && apiequality.Semantic.DeepEqual(lhs.Data.Object, rhs.Data.Object) +} + // TruncateRevisions cleans up all other controller revisions except the currentRevision. // currentRevision is the one that matches the revisionKey that is passed func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, revisionKey string) error { @@ -198,7 +200,7 @@ func TruncateRevisions(ctx context.Context, k8sClient client.Client, lws *leader } for i, revision := range revisions { - if revision.Labels[leaderworkerset.RevisionKey] != revisionKey { + if GetRevisionKey(revision) != revisionKey { if err := k8sClient.Delete(ctx, revisions[i]); err != nil { return err } @@ -245,21 +247,24 @@ func getPatch(lws *leaderworkerset.LeaderWorkerSet) ([]byte, error) { return json.Marshal(objCopy) } -// nextRevision finds the next valid revision number based on revisions. If the length of revisions -// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. -func nextRevision(revisions []*appsv1.ControllerRevision) int64 { +// getHighestRevision finds the next valid revision number based on revisions. If the length of revisions +// is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. It also returns the revision +// with the highest Revision value. +func getHighestRevision(revisions []*appsv1.ControllerRevision) *appsv1.ControllerRevision { count := len(revisions) if count <= 0 { - return 1 + return nil } max := int64(1) + var maxRevision *appsv1.ControllerRevision for _, revision := range revisions { if max < revision.Revision { max = revision.Revision + maxRevision = revision } } - return max + 1 + return maxRevision } // revisionName returns the Name for a ControllerRevision in the form prefix-hash-revisionnumber. If the length diff --git a/test/testutils/util.go b/test/testutils/util.go index 3dc9e173..d3197bfd 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -57,7 +57,7 @@ func CreateWorkerPodsForLeaderPod(ctx context.Context, leaderPod corev1.Pod, k8s leaderworkerset.SetNameLabelKey: lws.Name, "worker.pod": "workers", leaderworkerset.WorkerIndexLabelKey: strconv.Itoa(i), - leaderworkerset.RevisionKey: leaderPod.Labels[leaderworkerset.RevisionKey], + leaderworkerset.RevisionKey: revisionutils.GetRevisionKey(&leaderPod), }, Annotations: map[string]string{ leaderworkerset.SizeAnnotationKey: strconv.Itoa(int(*lws.Spec.LeaderWorkerTemplate.Size)), @@ -229,8 +229,8 @@ func GetLeaderPod(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8s if err != nil { return err } - if revisionutils.GetRevisionKey(cr) != pod.Labels[leaderworkerset.RevisionKey] { - return fmt.Errorf("TemplateHash does not match, expected %s, got %s", revisionutils.GetRevisionKey(cr), pod.Labels[leaderworkerset.RevisionKey]) + if revisionutils.GetRevisionKey(cr) != revisionutils.GetRevisionKey(pod) { + return fmt.Errorf("TemplateHash does not match, expected %s, got %s", revisionutils.GetRevisionKey(cr), revisionutils.GetRevisionKey(pod)) } return nil }, Timeout, Interval).Should(gomega.Succeed()) diff --git a/test/testutils/validators.go b/test/testutils/validators.go index d2644783..e05c0b21 100644 --- a/test/testutils/validators.go +++ b/test/testutils/validators.go @@ -280,7 +280,7 @@ func ExpectValidWorkerStatefulSets(ctx context.Context, leaderWorkerSet *leaderw return err } hash := revisionutils.GetRevisionKey(cr) - if sts.Labels[leaderworkerset.RevisionKey] != hash { + if revisionutils.GetRevisionKey(&sts) != hash { return fmt.Errorf("mismatch template revision hash for worker statefulset, got: %s, want: %s", revisionutils.GetRevisionKey(&sts), hash) } if *sts.Spec.Replicas != *lws.Spec.LeaderWorkerTemplate.Size-1 { From a065e9358e1d7b92b866d5b94689c41f6d5c92c7 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 01:42:33 +0000 Subject: [PATCH 21/27] minor fixes --- pkg/controllers/leaderworkerset_controller.go | 17 ++-- pkg/controllers/pod_controller_test.go | 81 ------------------- pkg/utils/revision/revision_utils.go | 5 +- 3 files changed, 9 insertions(+), 94 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 8f632ceb..fdb07dd3 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -114,8 +114,9 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ log.Error(err, "Creating controller revision") return ctrl.Result{}, err } - - updatedRevision, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) + // none nil updatedRevision means that an update is detected. The revisionKey generated + // for updatedRevision will be applied to the leaderSts to trigger a rolling update + updatedRevision, err := r.getUpdateRevision(ctx, leaderSts, lws, revision) if err != nil { log.Error(err, "Validating if LWS has been updated") return ctrl.Result{}, err @@ -571,12 +572,10 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw } func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { - revisionKey := "" - if sts != nil { - // Uses the hash in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where - // the revisionKey was used to detect update instead of controller revision. - revisionKey = revisionutils.GetRevisionKey(sts) - } + // Uses the revisionKey in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where + // the revisionKey was used to detect update instead of controller revision. If the sts does not exist, the returned revisionKey will + // be nil. + revisionKey := revisionutils.GetRevisionKey(sts) if stsRevision, err := revisionutils.GetRevision(ctx, r.Client, lws, revisionKey); sts != nil || err != nil { return stsRevision, err } @@ -587,7 +586,7 @@ func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Co return revisionutils.CreateRevision(ctx, r.Client, revision) } -func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { +func (r *LeaderWorkerSetReconciler) getUpdateRevision(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { if sts == nil { return nil, nil } diff --git a/pkg/controllers/pod_controller_test.go b/pkg/controllers/pod_controller_test.go index 3d8bb98f..74238d71 100644 --- a/pkg/controllers/pod_controller_test.go +++ b/pkg/controllers/pod_controller_test.go @@ -44,13 +44,6 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { } updateRevisionKey := revisionutils.GetRevisionKey(updateRevision) - lws.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name = "worker" - currentRevision, err := revisionutils.NewRevision(context.TODO(), client, lws, "") - if err != nil { - t.Fatal(err) - } - currentRevisionKey := revisionutils.GetRevisionKey(currentRevision) - tests := []struct { name string pod *corev1.Pod @@ -287,80 +280,6 @@ func TestConstructWorkerStatefulSetApplyConfiguration(t *testing.T) { }, }, }, - { - name: "revision is before update, will use that and the old templateHash to create the worker statefulset configuration", - revision: currentRevision, - pod: &corev1.Pod{ - ObjectMeta: v1.ObjectMeta{ - Name: "test-sample", - Namespace: "default", - Labels: map[string]string{ - leaderworkerset.WorkerIndexLabelKey: "0", - leaderworkerset.SetNameLabelKey: "test-sample", - leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: currentRevisionKey, - }, - }, - }, - lws: testutils.BuildBasicLeaderWorkerSet("test-sample", "default"). - Replica(1). - WorkerTemplateSpec(testutils.MakeWorkerPodSpec()). - Size(1).Obj(), - wantStatefulSetConfig: &appsapplyv1.StatefulSetApplyConfiguration{ - TypeMetaApplyConfiguration: metaapplyv1.TypeMetaApplyConfiguration{ - Kind: ptr.To[string]("StatefulSet"), - APIVersion: ptr.To[string]("apps/v1"), - }, - ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ - Name: ptr.To[string]("test-sample"), - Namespace: ptr.To[string]("default"), - Labels: map[string]string{ - leaderworkerset.SetNameLabelKey: "test-sample", - leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: currentRevisionKey, - }, - }, - Spec: &appsapplyv1.StatefulSetSpecApplyConfiguration{ - Replicas: ptr.To[int32](0), - Selector: &metaapplyv1.LabelSelectorApplyConfiguration{ - MatchLabels: map[string]string{ - leaderworkerset.SetNameLabelKey: "test-sample", - leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.GroupUniqueHashLabelKey: "test-key", - }, - }, - Template: &coreapplyv1.PodTemplateSpecApplyConfiguration{ - ObjectMetaApplyConfiguration: &metaapplyv1.ObjectMetaApplyConfiguration{ - Labels: map[string]string{ - leaderworkerset.SetNameLabelKey: "test-sample", - leaderworkerset.GroupIndexLabelKey: "1", - leaderworkerset.GroupUniqueHashLabelKey: "test-key", - leaderworkerset.RevisionKey: currentRevisionKey, - }, - Annotations: map[string]string{ - "leaderworkerset.sigs.k8s.io/size": "1", - "leaderworkerset.sigs.k8s.io/leader-name": "test-sample", - }, - }, - Spec: &coreapplyv1.PodSpecApplyConfiguration{ - Containers: []coreapplyv1.ContainerApplyConfiguration{ - { - Name: ptr.To[string]("worker"), - Image: ptr.To[string]("nginx:1.14.2"), - Ports: []coreapplyv1.ContainerPortApplyConfiguration{{ContainerPort: ptr.To[int32](8080), Protocol: ptr.To[corev1.Protocol](corev1.ProtocolTCP)}}, - Resources: &coreapplyv1.ResourceRequirementsApplyConfiguration{}, - }, - }, - }, - }, - Ordinals: &appsapplyv1.StatefulSetOrdinalsApplyConfiguration{Start: ptr.To[int32](1)}, - ServiceName: ptr.To[string]("test-sample"), - PodManagementPolicy: ptr.To[appsv1.PodManagementPolicyType](appsv1.ParallelPodManagement), - }, - }, - }, } for _, tc := range tests { diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index f61c7649..cf2d3007 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -75,10 +75,7 @@ func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker return cr, nil } -func CreateRevision( - ctx context.Context, - k8sClient client.Client, - revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { +func CreateRevision(ctx context.Context, k8sClient client.Client, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { if err := k8sClient.Create(ctx, revision); err != nil { return nil, err } From 8b62baa1b0f6d72200c737134b597b3a8d3ac4fc Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 02:15:55 +0000 Subject: [PATCH 22/27] fixed all tests failing, still need to debug other tests --- pkg/controllers/leaderworkerset_controller.go | 23 ++++++++++--------- pkg/utils/revision/revision_utils.go | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index fdb07dd3..1137c037 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -114,9 +114,8 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ log.Error(err, "Creating controller revision") return ctrl.Result{}, err } - // none nil updatedRevision means that an update is detected. The revisionKey generated - // for updatedRevision will be applied to the leaderSts to trigger a rolling update - updatedRevision, err := r.getUpdateRevision(ctx, leaderSts, lws, revision) + + updatedRevision, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) if err != nil { log.Error(err, "Validating if LWS has been updated") return ctrl.Result{}, err @@ -387,7 +386,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l ready = true readyCount++ } - if (noWorkerSts || revisionutils.GetRevisionKey(&sts) == revisionKey) && revisionutils.GetRevisionKey(&pod) == revisionKey { + if (noWorkerSts || sts.Labels[leaderworkerset.RevisionKey] == revisionKey) && pod.Labels[leaderworkerset.RevisionKey] == revisionKey { updated = true updatedCount++ if index < int(*lws.Spec.Replicas) { @@ -528,7 +527,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return false } - podTemplateHash := revisionutils.GetRevisionKey(&sortedPods[index]) + podTemplateHash := sortedPods[index].Labels[leaderworkerset.RevisionKey] if !(podTemplateHash == revisionKey && podutils.PodRunningAndReady(sortedPods[index])) { return false } @@ -537,7 +536,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return true } - stsTemplateHash := revisionutils.GetRevisionKey(&sortedSts[index]) + stsTemplateHash := sortedSts[index].Labels[leaderworkerset.RevisionKey] return stsTemplateHash == revisionKey && statefulsetutils.StatefulsetReady(sortedSts[index]) } @@ -572,10 +571,12 @@ func (r *LeaderWorkerSetReconciler) getLeaderStatefulSet(ctx context.Context, lw } func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { - // Uses the revisionKey in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where - // the revisionKey was used to detect update instead of controller revision. If the sts does not exist, the returned revisionKey will - // be nil. - revisionKey := revisionutils.GetRevisionKey(sts) + revisionKey := "" + if sts != nil { + // Uses the hash in the leader sts to avoid detecting update in the case where LWS controller is upgraded from a version where + // the revisionKey was used to detect update instead of controller revision. + revisionKey = revisionutils.GetRevisionKey(sts) + } if stsRevision, err := revisionutils.GetRevision(ctx, r.Client, lws, revisionKey); sts != nil || err != nil { return stsRevision, err } @@ -586,7 +587,7 @@ func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Co return revisionutils.CreateRevision(ctx, r.Client, revision) } -func (r *LeaderWorkerSetReconciler) getUpdateRevision(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { +func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { if sts == nil { return nil, nil } diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index cf2d3007..39b9be63 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -87,7 +87,7 @@ func CreateRevision(ctx context.Context, k8sClient client.Client, revision *apps } func GetRevisionKey(obj metav1.Object) string { - if obj != nil && obj.GetLabels() != nil { + if obj.GetLabels() != nil { return obj.GetLabels()[leaderworkerset.RevisionKey] } return "" From 897e0092b1ac19dc35e3969cbc9582a9b6901d11 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 02:47:31 +0000 Subject: [PATCH 23/27] added log messages to listRevision for debugging --- pkg/utils/revision/revision_utils.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 39b9be63..763dabcc 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -131,12 +131,16 @@ func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker // returned error is not nil, the returned slice is not valid. func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { // List all revisions in the namespace that match the selector + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(parent)) + ctx = ctrl.LoggerInto(ctx, log) + log.Error(nil, fmt.Sprintf("Looking up controller revision list with selector %v", selector)) revisionList := new(appsv1.ControllerRevisionList) err := k8sClient.List(ctx, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) if err != nil { return nil, err } history := revisionList.Items + log.Error(nil, fmt.Sprintf("Found %d items that matched the selector", len(history))) var owned []*appsv1.ControllerRevision for i := range history { ref := metav1.GetControllerOfNoCopy(&history[i]) @@ -145,6 +149,7 @@ func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.O } } + log.Error(nil, fmt.Sprintf("After filtering out the owned ones, we have %d", len(owned))) return owned, err } From 7822c18c36878594005d8c900b1b2a688d7b7d08 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 03:12:06 +0000 Subject: [PATCH 24/27] fixed bug with getHighestRevision --- pkg/controllers/leaderworkerset_controller.go | 10 +++++----- pkg/utils/revision/revision_utils.go | 7 ++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 1137c037..36266766 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -115,7 +115,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } - updatedRevision, err := r.leaderWorkerSetUpdated(ctx, leaderSts, lws, revision) + updatedRevision, err := r.getUpdatedRevision(ctx, leaderSts, lws, revision) if err != nil { log.Error(err, "Validating if LWS has been updated") return ctrl.Result{}, err @@ -386,7 +386,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l ready = true readyCount++ } - if (noWorkerSts || sts.Labels[leaderworkerset.RevisionKey] == revisionKey) && pod.Labels[leaderworkerset.RevisionKey] == revisionKey { + if (noWorkerSts || revisionutils.GetRevisionKey(&sts) == revisionKey) && revisionutils.GetRevisionKey(&pod) == revisionKey { updated = true updatedCount++ if index < int(*lws.Spec.Replicas) { @@ -527,7 +527,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return false } - podTemplateHash := sortedPods[index].Labels[leaderworkerset.RevisionKey] + podTemplateHash := revisionutils.GetRevisionKey(&sortedPods[index]) if !(podTemplateHash == revisionKey && podutils.PodRunningAndReady(sortedPods[index])) { return false } @@ -536,7 +536,7 @@ func (r *LeaderWorkerSetReconciler) iterateReplicas(ctx context.Context, lws *le return true } - stsTemplateHash := sortedSts[index].Labels[leaderworkerset.RevisionKey] + stsTemplateHash := revisionutils.GetRevisionKey(&sortedSts[index]) return stsTemplateHash == revisionKey && statefulsetutils.StatefulsetReady(sortedSts[index]) } @@ -587,7 +587,7 @@ func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Co return revisionutils.CreateRevision(ctx, r.Client, revision) } -func (r *LeaderWorkerSetReconciler) leaderWorkerSetUpdated(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { +func (r *LeaderWorkerSetReconciler) getUpdatedRevision(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { if sts == nil { return nil, nil } diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 763dabcc..1a8c47de 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -133,14 +133,12 @@ func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.O // List all revisions in the namespace that match the selector log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(parent)) ctx = ctrl.LoggerInto(ctx, log) - log.Error(nil, fmt.Sprintf("Looking up controller revision list with selector %v", selector)) revisionList := new(appsv1.ControllerRevisionList) err := k8sClient.List(ctx, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) if err != nil { return nil, err } history := revisionList.Items - log.Error(nil, fmt.Sprintf("Found %d items that matched the selector", len(history))) var owned []*appsv1.ControllerRevision for i := range history { ref := metav1.GetControllerOfNoCopy(&history[i]) @@ -149,7 +147,6 @@ func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.O } } - log.Error(nil, fmt.Sprintf("After filtering out the owned ones, we have %d", len(owned))) return owned, err } @@ -258,10 +255,10 @@ func getHighestRevision(revisions []*appsv1.ControllerRevision) *appsv1.Controll return nil } - max := int64(1) + max := int64(0) var maxRevision *appsv1.ControllerRevision for _, revision := range revisions { - if max < revision.Revision { + if max <= revision.Revision { max = revision.Revision maxRevision = revision } From cafadfcfadad0f09e40107510fa99f584a08aa8c Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 03:21:31 +0000 Subject: [PATCH 25/27] added log messages for create revision function --- pkg/utils/revision/revision_utils.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 1a8c47de..350fe3d4 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -76,11 +76,14 @@ func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker } func CreateRevision(ctx context.Context, k8sClient client.Client, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { + log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(revision)) + ctx = ctrl.LoggerInto(ctx, log) if err := k8sClient.Create(ctx, revision); err != nil { return nil, err } created := &appsv1.ControllerRevision{} if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: revision.Namespace, Name: revision.Name}, created); err != nil { + log.Error(err, "failed to find the created revision") return nil, err } return created, nil @@ -131,8 +134,6 @@ func GetRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker // returned error is not nil, the returned slice is not valid. func ListRevisions(ctx context.Context, k8sClient client.Client, parent metav1.Object, selector labels.Selector) ([]*appsv1.ControllerRevision, error) { // List all revisions in the namespace that match the selector - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(parent)) - ctx = ctrl.LoggerInto(ctx, log) revisionList := new(appsv1.ControllerRevisionList) err := k8sClient.List(ctx, revisionList, client.InNamespace(parent.GetNamespace()), client.MatchingLabelsSelector{Selector: selector}) if err != nil { From a856b3d3a7a7437f51f0a039e7d72f5c4f11d281 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 03:43:26 +0000 Subject: [PATCH 26/27] removed fetch after creation, not needed --- pkg/controllers/leaderworkerset_controller.go | 4 ++-- pkg/utils/revision/revision_utils.go | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/pkg/controllers/leaderworkerset_controller.go b/pkg/controllers/leaderworkerset_controller.go index 36266766..5b8d410e 100644 --- a/pkg/controllers/leaderworkerset_controller.go +++ b/pkg/controllers/leaderworkerset_controller.go @@ -122,7 +122,7 @@ func (r *LeaderWorkerSetReconciler) Reconcile(ctx context.Context, req ctrl.Requ } lwsUpdated := updatedRevision != nil if lwsUpdated { - revision, err = revisionutils.CreateRevision(ctx, r.Client, updatedRevision) + revision, err = revisionutils.CreateRevision(ctx, r.Client, updatedRevision, lws) if err != nil { log.Error(err, "Creating revision for updated LWS") return ctrl.Result{}, err @@ -584,7 +584,7 @@ func (r *LeaderWorkerSetReconciler) getOrCreateRevisionIfNonExist(ctx context.Co if err != nil { return nil, err } - return revisionutils.CreateRevision(ctx, r.Client, revision) + return revisionutils.CreateRevision(ctx, r.Client, revision, lws) } func (r *LeaderWorkerSetReconciler) getUpdatedRevision(ctx context.Context, sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { diff --git a/pkg/utils/revision/revision_utils.go b/pkg/utils/revision/revision_utils.go index 350fe3d4..b9ed627f 100644 --- a/pkg/utils/revision/revision_utils.go +++ b/pkg/utils/revision/revision_utils.go @@ -15,7 +15,6 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/strategicpatch" "k8s.io/klog/v2" @@ -75,18 +74,11 @@ func NewRevision(ctx context.Context, k8sClient client.Client, lws *leaderworker return cr, nil } -func CreateRevision(ctx context.Context, k8sClient client.Client, revision *appsv1.ControllerRevision) (*appsv1.ControllerRevision, error) { - log := ctrl.LoggerFrom(ctx).WithValues("leaderworkerset", klog.KObj(revision)) - ctx = ctrl.LoggerInto(ctx, log) +func CreateRevision(ctx context.Context, k8sClient client.Client, revision *appsv1.ControllerRevision, lws *leaderworkerset.LeaderWorkerSet) (*appsv1.ControllerRevision, error) { if err := k8sClient.Create(ctx, revision); err != nil { return nil, err } - created := &appsv1.ControllerRevision{} - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: revision.Namespace, Name: revision.Name}, created); err != nil { - log.Error(err, "failed to find the created revision") - return nil, err - } - return created, nil + return revision, nil } func GetRevisionKey(obj metav1.Object) string { From 1faace7a6f53e937c4667d53116af6297a004830 Mon Sep 17 00:00:00 2001 From: Edwinhr716 Date: Sat, 28 Dec 2024 04:21:28 +0000 Subject: [PATCH 27/27] removed e2e tests, will be added as integration tests intead --- test/e2e/e2e_test.go | 58 ------------------------------------------ test/testutils/util.go | 21 --------------- 2 files changed, 79 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index fce7ed54..81416c9a 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -16,8 +16,6 @@ limitations under the License. package e2e import ( - "fmt" - "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" @@ -303,60 +301,4 @@ var _ = ginkgo.Describe("leaderWorkerSet e2e tests", func() { return numberOfPodsInCommon, nil }, timeout, interval).Should(gomega.Equal(0)) }) - ginkgo.It("Not updated worker StatefulSet restarted during rolling update will be restored with old worker spec", func() { - lws = testing.BuildLeaderWorkerSet(ns.Name).Replica(2).Size(2).Obj() - testing.MustCreateLws(ctx, k8sClient, lws) - testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") - - testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) - testing.UpdateWorkerTemplate(ctx, k8sClient, lws) - gomega.Expect(k8sClient.Delete(ctx, &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: lws.Namespace, Name: lws.Name + "-0-1"}})).To(gomega.Succeed()) - gomega.Eventually(func() error { - var sts appsv1.StatefulSet - if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name + "-0", Namespace: lws.Namespace}, &sts); err != nil { - return err - } - // use the original lws object instead of newest, since we are comparing with the old worker template - podTemplateSpec := *lws.Spec.LeaderWorkerTemplate.WorkerTemplate.DeepCopy() - if sts.Spec.Template.Spec.Containers[0].Name != podTemplateSpec.Spec.Containers[0].Name { - return fmt.Errorf("StatefulSet did not have the expected container name") - } - return nil - }, timeout, interval).Should(gomega.Succeed()) - - // Rolling update finishes - testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) - // All worker statfulsets have the updated version - testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) - testing.ExpectValidPods(ctx, k8sClient, lws, &corev1.PodList{}) - testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") - - }) - ginkgo.It("Rolling update with restart policy RecreateGroupOnPodRestart only restarts the leader once", func() { - lws = testing.BuildLeaderWorkerSet(ns.Name).Replica(2).Size(2).MaxSurge(1).RestartPolicy(v1.RecreateGroupOnPodRestart).Obj() - testing.MustCreateLws(ctx, k8sClient, lws) - testing.ExpectLeaderWorkerSetAvailable(ctx, k8sClient, lws, "All replicas are ready") - - initialLeaderPod := &corev1.Pod{} - testing.GetLeaderPod(ctx, lws, k8sClient, initialLeaderPod) - testing.UpdateWorkerTemplate(ctx, k8sClient, lws) - - // Happens during update - testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 3) - midUpdateLeaderPod := &corev1.Pod{} - testing.GetLeaderPod(ctx, lws, k8sClient, midUpdateLeaderPod) - - gomega.Eventually(func() (bool, error) { - return initialLeaderPod.UID == midUpdateLeaderPod.UID, nil - }, timeout, interval).Should(gomega.Equal(false)) - - testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2) - testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true) - finalLeaderPod := &corev1.Pod{} - testing.GetLeaderPod(ctx, lws, k8sClient, finalLeaderPod) - - gomega.Eventually(func() (bool, error) { - return finalLeaderPod.UID == midUpdateLeaderPod.UID, nil - }, timeout, interval).Should(gomega.Equal(true)) - }) }) diff --git a/test/testutils/util.go b/test/testutils/util.go index d3197bfd..8c2c0060 100644 --- a/test/testutils/util.go +++ b/test/testutils/util.go @@ -215,27 +215,6 @@ func GetLeaderStatefulset(ctx context.Context, lws *leaderworkerset.LeaderWorker }, Timeout, Interval).Should(gomega.Succeed()) } -func GetLeaderPod(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8sClient client.Client, pod *corev1.Pod) { - gomega.Eventually(func() error { - if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, lws); err != nil { - return err - } - leaderReplicaIndex := fmt.Sprintf("-%v", (int(*lws.Spec.Replicas) - 1)) - if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name + leaderReplicaIndex, Namespace: lws.Namespace}, pod); err != nil { - return err - } - - cr, err := revisionutils.NewRevision(ctx, k8sClient, lws, "") - if err != nil { - return err - } - if revisionutils.GetRevisionKey(cr) != revisionutils.GetRevisionKey(pod) { - return fmt.Errorf("TemplateHash does not match, expected %s, got %s", revisionutils.GetRevisionKey(cr), revisionutils.GetRevisionKey(pod)) - } - return nil - }, Timeout, Interval).Should(gomega.Succeed()) -} - func GetStatefulSets(ctx context.Context, lws *leaderworkerset.LeaderWorkerSet, k8sClient client.Client, stsl *appsv1.StatefulSetList) { gomega.Eventually(func() (int, error) { if err := k8sClient.List(ctx, stsl, client.InNamespace(lws.Namespace)); err != nil {