Skip to content

Commit

Permalink
status: detect when DaemonSets and Deployments are rolling out
Browse files Browse the repository at this point in the history
There are a few additional states that indicate a Deployment or
DaemonSet is progressing and not yet fully deployed:

- ObservedGeneration < Generation means the relevant controller has not
  yet observed the changes
- UpdatedReplicas / UpdatedNumberScheduled too small means that the
  update has not fully rolled out.

  Fixes: #413 which was somehow reverted in a refactor

Signed-off-by: Casey Callendrello <[email protected]>
  • Loading branch information
squeed committed Feb 13, 2023
1 parent a8bd8f1 commit b928b20
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 11 deletions.
25 changes: 23 additions & 2 deletions status/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ import (
"sync"
"time"

"github.com/cilium/cilium/api/v1/models"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
"github.com/cilium/workerpool"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/cilium/cilium/api/v1/models"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"

"github.com/cilium/cilium-cli/defaults"
)

Expand Down Expand Up @@ -168,6 +169,16 @@ func (k *K8sStatusCollector) deploymentStatus(ctx context.Context, status *Statu
status.AddAggregatedWarning(name, name, fmt.Errorf("%d pods of Deployment %s are not available", unavailable, name))
}

// ObservedGeneration behind: DeploymentController has not yet noticed the latest change
if d.Generation != d.Status.ObservedGeneration {
status.AddAggregatedError(name, name, fmt.Errorf("deployment %s is updated but rollout has not started", name))
}

// Deployment change is not fully rolled out
if d.Status.UpdatedReplicas < d.Status.Replicas {
status.AddAggregatedError(name, name, fmt.Errorf("deployment %s is rolling out - %d out of %d pods updated", name, d.Status.UpdatedReplicas, d.Status.Replicas))
}

return false, nil
}

Expand Down Expand Up @@ -233,6 +244,16 @@ func (k *K8sStatusCollector) daemonSetStatus(ctx context.Context, status *Status
status.AddAggregatedWarning(name, name, fmt.Errorf("%d pods of DaemonSet %s are not available", unavailable, name))
}

// ObservedGeneration behind: DaemonSetController has not yet noticed the latest change
if daemonSet.Generation != daemonSet.Status.ObservedGeneration {
status.AddAggregatedError(name, name, fmt.Errorf("daemonset %s is updated but rollout has not started", name))
}

// DaemonSet change is not fully rolled out
if daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled {
status.AddAggregatedError(name, name, fmt.Errorf("daemonset %s is rolling out - %d out of %d pods updated", name, daemonSet.Status.UpdatedNumberScheduled, daemonSet.Status.DesiredNumberScheduled))
}

return nil
}

Expand Down
38 changes: 29 additions & 9 deletions status/k8s_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ import (
"testing"
"time"

"github.com/cilium/cilium/api/v1/models"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
"github.com/go-openapi/strfmt"
"gopkg.in/check.v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/cilium/cilium/api/v1/models"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"

"github.com/cilium/cilium-cli/defaults"
)

Expand Down Expand Up @@ -73,19 +74,22 @@ func (c *k8sStatusMockClient) addPod(namespace, name, filter string, containers
})
}

func (c *k8sStatusMockClient) setDaemonSet(namespace, name, filter string, desired, ready, available, unavailable int32) {
func (c *k8sStatusMockClient) setDaemonSet(namespace, name, filter string, desired, ready, available, unavailable, updated int32, generation, obvsGeneration int64) {
c.daemonSet = map[string]*appsv1.DaemonSet{}

c.daemonSet[namespace+"/"+name] = &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
Name: name,
Namespace: namespace,
Generation: generation,
},
Status: appsv1.DaemonSetStatus{
DesiredNumberScheduled: desired,
NumberReady: ready,
NumberAvailable: available,
NumberUnavailable: unavailable,
UpdatedNumberScheduled: updated,
ObservedGeneration: obvsGeneration,
},
}

Expand Down Expand Up @@ -166,7 +170,7 @@ func (b *StatusSuite) TestStatus(c *check.C) {
c.Assert(err, check.IsNil)
c.Assert(collector, check.Not(check.IsNil))

client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 10, 10, 0)
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 10, 10, 0, 10, 1, 1)
status, err := collector.Status(context.Background())
c.Assert(err, check.IsNil)
c.Assert(status, check.Not(check.IsNil))
Expand All @@ -179,7 +183,7 @@ func (b *StatusSuite) TestStatus(c *check.C) {
c.Assert(len(status.CiliumStatus), check.Equals, 10)

client.reset()
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 5, 5, 5)
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 5, 5, 5, 10, 2, 2)
status, err = collector.Status(context.Background())
c.Assert(err, check.IsNil)
c.Assert(status, check.Not(check.IsNil))
Expand All @@ -192,7 +196,7 @@ func (b *StatusSuite) TestStatus(c *check.C) {
c.Assert(len(status.CiliumStatus), check.Equals, 5)

client.reset()
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 5, 5, 5)
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 5, 5, 5, 10, 3, 3)
delete(client.status, "cilium-2")
status, err = collector.Status(context.Background())
c.Assert(err, check.IsNil)
Expand All @@ -205,6 +209,22 @@ func (b *StatusSuite) TestStatus(c *check.C) {
c.Assert(status.PhaseCount[defaults.AgentDaemonSetName][string(corev1.PodFailed)], check.Equals, 5)
c.Assert(len(status.CiliumStatus), check.Equals, 5)
c.Assert(status.CiliumStatus["cilium-2"], check.IsNil)

client.reset()
// observed generation behind
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 5, 5, 5, 5, 5, 3, 2)
status, err = collector.Status(context.Background())
c.Assert(err, check.IsNil)
c.Assert(status, check.Not(check.IsNil))
c.Assert(status.Errors["cilium"]["cilium"].Errors, check.HasLen, 1)
c.Assert(status.Errors["cilium"]["cilium"].Errors[0], check.ErrorMatches, ".*rollout has not started.*")

client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 5, 5, 5, 5, 1, 3, 3)
status, err = collector.Status(context.Background())
c.Assert(err, check.IsNil)
c.Assert(status, check.Not(check.IsNil))
c.Assert(status.Errors["cilium"]["cilium"].Errors, check.HasLen, 1)
c.Assert(status.Errors["cilium"]["cilium"].Errors[0], check.ErrorMatches, ".*is rolling out.*")
}

func (b *StatusSuite) TestFormat(c *check.C) {
Expand All @@ -215,7 +235,7 @@ func (b *StatusSuite) TestFormat(c *check.C) {
c.Assert(err, check.IsNil)
c.Assert(collector, check.Not(check.IsNil))

client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 5, 5, 5)
client.setDaemonSet("kube-system", defaults.AgentDaemonSetName, defaults.AgentPodSelector, 10, 5, 5, 5, 10, 4, 4)
delete(client.status, "cilium-2")

client.addPod("kube-system", "cilium-operator-1", "k8s-app=cilium-operator", []corev1.Container{{Image: "cilium-operator:1.9"}}, corev1.PodStatus{Phase: corev1.PodRunning})
Expand Down

0 comments on commit b928b20

Please sign in to comment.