Skip to content

Commit

Permalink
install: don't delete the 'aws-node' DaemonSet
Browse files Browse the repository at this point in the history
Instead of deleting the 'aws-node' DaemonSet, which is permanent, we
can be gentler and just evict its pods by setting a node selector that
won't ever be matched (unless someone wants it to by explicitly setting
the required label).

This allows 'cilium uninstall' to gracefully revert the 'aws-node'
DaemonSet to its previous state, as it might contain customizations
that would otherwise be lost.

Signed-off-by: Bruno Miguel Custódio <[email protected]>
  • Loading branch information
bmcustodio committed Jun 24, 2021
1 parent dd0df19 commit 7c26ae4
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 4 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/eks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ jobs:
- name: Wait for test job
run: |
kubectl -n kube-system wait job/cilium-cli --for=condition=complete --timeout=10m
- name: Make sure the 'aws-node' DaemonSet exists but has no scheduled pods
run: |
[[ $(kubectl -n kube-system get ds/aws-node -o jsonpath='{.status.currentNumberScheduled}') == 0 ]]
- name: Post-test information gathering
if: ${{ failure() }}
Expand Down
22 changes: 22 additions & 0 deletions install/aws.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2021 Authors of Cilium
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package install

const (
AwsNodeDaemonSetName = "aws-node"
AwsNodeDaemonSetNamespace = "kube-system"
AwsNodeDaemonSetNodeSelectorKey = "io.cilium/aws-node-enabled"
AwsNodeDaemonSetNodeSelectorValue = "true"
)
8 changes: 5 additions & 3 deletions install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -1506,9 +1506,11 @@ func (k *K8sInstaller) Install(ctx context.Context) error {

switch k.flavor.Kind {
case k8s.KindEKS:
if _, err := k.client.GetDaemonSet(ctx, "kube-system", "aws-node", metav1.GetOptions{}); err == nil {
k.Log("🔥 Deleting aws-node DaemonSet...")
if err := k.client.DeleteDaemonSet(ctx, "kube-system", "aws-node", metav1.DeleteOptions{}); err != nil {
if _, err := k.client.GetDaemonSet(ctx, AwsNodeDaemonSetNamespace, AwsNodeDaemonSetName, metav1.GetOptions{}); err == nil {
k.Log("🔥 Patching the %q DaemonSet to evict its pods...", AwsNodeDaemonSetName)
patch := []byte(fmt.Sprintf(`{"spec":{"template":{"spec":{"nodeSelector":{"%s":"%s"}}}}}`, AwsNodeDaemonSetNodeSelectorKey, AwsNodeDaemonSetNodeSelectorValue))
if _, err := k.client.PatchDaemonSet(ctx, AwsNodeDaemonSetNamespace, AwsNodeDaemonSetName, types.StrategicMergePatchType, patch, metav1.PatchOptions{}); err != nil {
k.Log("❌ Unable to patch the %q DaemonSet", AwsNodeDaemonSetName)
return err
}
}
Expand Down
8 changes: 7 additions & 1 deletion install/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ import (
"context"
"fmt"
"io"
"strings"
"time"

"github.com/cilium/cilium-cli/clustermesh"
"github.com/cilium/cilium-cli/defaults"
"github.com/cilium/cilium-cli/internal/k8s"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)

var retryInterval = 2 * time.Second
Expand Down Expand Up @@ -86,7 +88,11 @@ func (k *K8sUninstaller) Uninstall(ctx context.Context) error {

switch k.flavor.Kind {
case k8s.KindEKS:
k.Log("⚠️ The aws-node DaemonSet will still be missing. You have to re-create it.")
bytes := []byte(fmt.Sprintf(`[{"op":"remove","path":"/spec/template/spec/nodeSelector/%s"}]`, strings.ReplaceAll(AwsNodeDaemonSetNodeSelectorKey, "/", "~1")))
k.Log("⏪ Undoing the changes to the %q DaemonSet...", AwsNodeDaemonSetName)
if _, err := k.client.PatchDaemonSet(ctx, AwsNodeDaemonSetNamespace, AwsNodeDaemonSetName, types.JSONPatchType, bytes, metav1.PatchOptions{}); err != nil {
k.Log("❌ Failed to patch the %q DaemonSet, please remove it's node selector manually", AwsNodeDaemonSetName)
}
case k8s.KindGKE:
k.Log("🔥 Deleting GKE Node Init DaemonSet...")
k.client.DeleteDaemonSet(ctx, k.params.Namespace, gkeInitName, metav1.DeleteOptions{})
Expand Down

0 comments on commit 7c26ae4

Please sign in to comment.