Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix pod create failed with pd #286

Merged
merged 5 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 110 additions & 1 deletion pkg/controllers/collaset/collaset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3237,7 +3237,7 @@ var _ = Describe("collaset controller", func() {
Expect(err).Should(BeNil())
}
return podDecoration.Status.UpdatedPods
}, 300*time.Second, 3*time.Second).Should(BeEquivalentTo(2))
}, 30*time.Second, 3*time.Second).Should(BeEquivalentTo(2))

Expect(expectedStatusReplicas(c, cs, 0, 0, 0, 2, 2, 0, 0, 0)).Should(BeNil())
Expect(c.List(context.TODO(), podList, client.InNamespace(cs.Namespace))).Should(BeNil())
Expand Down Expand Up @@ -3276,6 +3276,115 @@ var _ = Describe("collaset controller", func() {
return errors.IsNotFound(err)
}, 5*time.Second, 1*time.Second).Should(BeTrue())
})

It("pod create failed with PodDecoration", func() {
testcase := "test-pod-create-failed-with-poddecoration"
Expect(createNamespace(c, testcase)).Should(BeNil())

// create a bad podDecoration
podDecoration := &appsv1alpha1.PodDecoration{
ObjectMeta: metav1.ObjectMeta{
Namespace: testcase,
Name: "pd-foo",
},
Spec: appsv1alpha1.PodDecorationSpec{
HistoryLimit: 5,
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "foo",
},
},
Weight: int32Pointer(1),
UpdateStrategy: appsv1alpha1.PodDecorationUpdateStrategy{
RollingUpdate: &appsv1alpha1.PodDecorationRollingUpdate{
Partition: int32Pointer(0),
},
},
Template: appsv1alpha1.PodDecorationPodTemplate{
Containers: []*appsv1alpha1.ContainerPatch{
{
InjectPolicy: appsv1alpha1.AfterPrimaryContainer,
Container: corev1.Container{
Name: "sidecar",
Image: imageutils.GetE2EImage(imageutils.Nginx),
Command: []string{
"sleep",
"2h",
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "non-exist",
MountPath: "/non/exist",
},
},
},
},
},
},
},
}
Expect(c.Create(context.TODO(), podDecoration)).Should(BeNil())

cs := &appsv1alpha1.CollaSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: testcase,
Name: "foo",
},
Spec: appsv1alpha1.CollaSetSpec{
Replicas: int32Pointer(2),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "foo",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app": "foo",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "foo",
Image: "nginx:v1",
},
},
},
},
},
}
Expect(c.Create(context.TODO(), cs)).Should(BeNil())

// create pod failed
Eventually(func() bool {
err := c.Get(context.TODO(), types.NamespacedName{Namespace: cs.Namespace, Name: cs.Name}, cs)
if errors.IsNotFound(err) {
return false
}
Expect(err).Should(BeNil())

for _, cond := range cs.Status.Conditions {
if cond.Reason == "ScaleOutFailed" {
return true
}
}
return false
}, 30*time.Second, 3*time.Second).Should(BeTrue())

// update podDecoration with good manner
Expect(c.Get(context.TODO(), types.NamespacedName{Namespace: podDecoration.Namespace, Name: podDecoration.Name}, podDecoration)).Should(BeNil())
podDecoration.Spec.Template.Containers[0].VolumeMounts = []corev1.VolumeMount{}
Expect(c.Update(ctx, podDecoration)).Should(BeNil())

// check pod create succeeded
podList := &corev1.PodList{}
Eventually(func() bool {
Expect(c.List(context.TODO(), podList, client.InNamespace(cs.Namespace))).Should(BeNil())
return len(podList.Items) == 2
}, 5*time.Second, 1*time.Second).Should(BeTrue())
Expect(c.Get(context.TODO(), types.NamespacedName{Namespace: cs.Namespace, Name: cs.Name}, cs)).Should(BeNil())
})
})

func expectedStatusReplicas(c client.Client, cls *appsv1alpha1.CollaSet, scheduledReplicas, readyReplicas, availableReplicas, replicas, updatedReplicas, operatingReplicas,
Expand Down
14 changes: 9 additions & 5 deletions pkg/controllers/collaset/synccontrol/sync_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"strconv"
"sync/atomic"
"time"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -316,12 +317,13 @@ func (r *RealSyncControl) Scale(
podInstanceIDSet := collasetutils.CollectPodInstanceID(activePods)
// find IDs and their contexts which have not been used by owned Pods
availableContext := extractAvailableContexts(diff, ownedIDs, podInstanceIDSet)
needUpdateContext := false
needUpdateContext := atomic.Bool{}
succCount, err := controllerutils.SlowStartBatch(diff, controllerutils.SlowStartInitialBatchSize, false, func(idx int, _ error) (err error) {
availableIDContext := availableContext[idx]
shouldInitPDContext := false
defer func() {
needUpdateContext = decideContextRevision(availableIDContext, resources.UpdatedRevision, err == nil) || shouldInitPDContext
if decideContextRevision(availableIDContext, resources.UpdatedRevision, err == nil) {
needUpdateContext.Store(true)
}
}()
// use revision recorded in Context
revision := resources.UpdatedRevision
Expand Down Expand Up @@ -349,7 +351,7 @@ func (r *RealSyncControl) Scale(
if localErr != nil {
return localErr
}
shouldInitPDContext = true
needUpdateContext.Store(true)
availableIDContext.Put(podcontext.PodDecorationRevisionKey, anno.GetDecorationInfoString(pds))
} else {
// upgrade by recreate pod case
Expand Down Expand Up @@ -385,7 +387,7 @@ func (r *RealSyncControl) Scale(
// add an expectation for this pod creation, before next reconciling
return collasetutils.ActiveExpectations.ExpectCreate(cls, expectations.Pod, pod.Name)
})
if needUpdateContext {
if needUpdateContext.Load() {
logger.V(1).Info("try to update ResourceContext for CollaSet after scaling out")
if updateContextErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
return podcontext.UpdateToPodContext(r.client, cls, ownedIDs)
Expand Down Expand Up @@ -599,9 +601,11 @@ func decideContextRevision(contextDetail *appsv1alpha1.ContextDetail, updatedRev
if contextDetail.Contains(podcontext.JustCreateContextDataKey, "true") {
// TODO choose just create pods' revision according to scaleStrategy
contextDetail.Put(podcontext.RevisionContextDataKey, updatedRevision.Name)
delete(contextDetail.Data, podcontext.PodDecorationRevisionKey)
needUpdateContext = true
} else if contextDetail.Contains(podcontext.RecreateUpdateContextDataKey, "true") {
contextDetail.Put(podcontext.RevisionContextDataKey, updatedRevision.Name)
delete(contextDetail.Data, podcontext.PodDecorationRevisionKey)
needUpdateContext = true
}
// if pod is delete and recreate, never change revisionKey
Expand Down
Loading