From 3d2fbdbecb22b37b32417b6a548be1ae212ba3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Martins?= Date: Mon, 5 Jul 2021 16:09:49 +0200 Subject: [PATCH] .github: create clusters with taints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As recommended to users, cilium-cli should also set the clusters with taints. This guarantees that Cilium is properly configured and application pods can start successfully. Signed-off-by: André Martins --- .github/workflows/aks.yaml | 39 ++++++++++++++++++-- .github/workflows/eks-tunnel.yaml | 46 +++++++++++++++--------- .github/workflows/eks.yaml | 46 +++++++++++++++--------- .github/workflows/externalworkloads.yaml | 1 + .github/workflows/gke.yaml | 1 + .github/workflows/multicluster.yaml | 2 ++ defaults/defaults.go | 2 +- 7 files changed, 100 insertions(+), 37 deletions(-) diff --git a/.github/workflows/aks.yaml b/.github/workflows/aks.yaml index 4a3ccee97e..5ce46578ef 100644 --- a/.github/workflows/aks.yaml +++ b/.github/workflows/aks.yaml @@ -68,22 +68,46 @@ jobs: echo ::set-output name=owner::${OWNER} - name: Create AKS cluster + id: cluster-creation run: | + # Create group az group create \ --name ${{ env.name }} \ --location ${{ env.location }} \ --tags usage=${{ github.repository_owner }}-${{ github.event.repository.name }} owner=${{ steps.vars.outputs.owner }} + + # Create cluster with a 1 node-count (we will remove this node pool + # afterwards) + # Details: Basic load balancers are not supported with multiple node + # pools. Create a cluster with standard load balancer selected to use + # multiple node pools, learn more at https://aka.ms/aks/nodepools. az aks create \ --resource-group ${{ env.name }} \ --name ${{ env.name }} \ --location ${{ env.location }} \ --network-plugin azure \ - --node-count 2 \ + --node-count 1 \ + --load-balancer-sku standard \ --node-vm-size Standard_B2s \ --node-osdisk-size 30 \ - --load-balancer-sku basic \ --generate-ssh-keys + # Get the name of the node pool that we will delete afterwards + echo ::set-output name=nodepool_to_delete::$(az aks nodepool list --cluster-name ${{ env.name }} -g ${{ env.name }} -o json | jq -r '.[0].name') + + # Create a node pool with the taint 'node.cilium.io/agent-not-ready=true:NoSchedule' + # and with 'mode=system' as it it the same mode used for the nodepool + # created with the cluster. + az aks nodepool add \ + --name nodepool2 \ + --cluster-name ${{ env.name }} \ + --resource-group ${{ env.name }} \ + --node-count 2 \ + --node-vm-size Standard_B2s \ + --node-osdisk-size 30 \ + --mode system \ + --node-taints node.cilium.io/agent-not-ready=true:NoSchedule + - name: Get cluster credentials run: | az aks get-credentials \ @@ -98,6 +122,17 @@ jobs: --wait=false \ --config monitor-aggregation=none + - name: Delete the first node pool + run: | + # We can only delete the first node pool after Cilium is installed + # because some pods have Pod Disruption Budgets set. If we try to + # delete the first node pool without the second node pool being ready, + # AKS will not succeed with the pool deletion because some Deployments + # can't cease to exist in the cluster. + az aks nodepool delete --name ${{ steps.cluster-creation.outputs.nodepool_to_delete }} \ + --cluster-name ${{ env.name }} \ + --resource-group ${{ env.name }} + - name: Enable Relay run: | cilium hubble enable diff --git a/.github/workflows/eks-tunnel.yaml b/.github/workflows/eks-tunnel.yaml index afe33789af..0462766913 100644 --- a/.github/workflows/eks-tunnel.yaml +++ b/.github/workflows/eks-tunnel.yaml @@ -55,12 +55,36 @@ jobs: echo ::set-output name=sha::${SHA} echo ::set-output name=owner::${OWNER} - - name: Create EKS cluster without nodegroup + - name: Create EKS cluster with nodegroup run: | - eksctl create cluster \ - --name ${{ env.clusterName }} \ - --tags "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \ - --without-nodegroup + cat < eks-config.yaml + apiVersion: eksctl.io/v1alpha5 + kind: ClusterConfig + + metadata: + name: ${{ env.clusterName }} + region: ${{ env.region }} + tags: + usage: "${{ github.repository_owner }}-${{ github.event.repository.name }}" + owner: "${{ steps.vars.outputs.owner }}" + + managedNodeGroups: + - name: ng-1 + instanceTypes: + - t3.medium + - t3a.medium + desiredCapacity: 2 + spot: true + privateNetworking: true + volumeType: "gp3" + volumeSize: 10 + taints: + - key: "node.cilium.io/agent-not-ready" + value: "true" + effect: "NoSchedule" + EOF + + eksctl create cluster -f ./eks-config.yaml - name: Create kubeconfig and load it in configmap run: | @@ -80,18 +104,6 @@ jobs: --set job_name=cilium-cli-install \ --set test_script_cm=cilium-cli-test-script-install - - name: Add managed spot nodegroup - run: | - eksctl create nodegroup \ - --cluster ${{ env.clusterName }} \ - --nodes 2 \ - --instance-types "t3.medium,t3a.medium" \ - --node-volume-type gp3 \ - --node-volume-size 10 \ - --managed \ - --spot \ - --node-private-networking - - name: Make sure the 'aws-node' DaemonSet exists but has no scheduled pods run: | [[ $(kubectl -n kube-system get ds/aws-node -o jsonpath='{.status.currentNumberScheduled}') == 0 ]] diff --git a/.github/workflows/eks.yaml b/.github/workflows/eks.yaml index 3e33528f51..6697e313b6 100644 --- a/.github/workflows/eks.yaml +++ b/.github/workflows/eks.yaml @@ -55,12 +55,36 @@ jobs: echo ::set-output name=sha::${SHA} echo ::set-output name=owner::${OWNER} - - name: Create EKS cluster without nodegroup + - name: Create EKS cluster nodegroup run: | - eksctl create cluster \ - --name ${{ env.clusterName }} \ - --tags "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \ - --without-nodegroup + cat < eks-config.yaml + apiVersion: eksctl.io/v1alpha5 + kind: ClusterConfig + + metadata: + name: ${{ env.clusterName }} + region: ${{ env.region }} + tags: + usage: "${{ github.repository_owner }}-${{ github.event.repository.name }}" + owner: "${{ steps.vars.outputs.owner }}" + + managedNodeGroups: + - name: ng-1 + instanceTypes: + - t3.medium + - t3a.medium + desiredCapacity: 2 + spot: true + privateNetworking: true + volumeType: "gp3" + volumeSize: 10 + taints: + - key: "node.cilium.io/agent-not-ready" + value: "true" + effect: "NoSchedule" + EOF + + eksctl create cluster -f ./eks-config.yaml - name: Create kubeconfig and load it in configmap run: | @@ -80,18 +104,6 @@ jobs: --set job_name=cilium-cli-install \ --set test_script_cm=cilium-cli-test-script-install - - name: Add managed spot nodegroup - run: | - eksctl create nodegroup \ - --cluster ${{ env.clusterName }} \ - --nodes 2 \ - --instance-types "t3.medium,t3a.medium" \ - --node-volume-type gp3 \ - --node-volume-size 10 \ - --managed \ - --spot \ - --node-private-networking - - name: Make sure the 'aws-node' DaemonSet exists but has no scheduled pods run: | [[ $(kubectl -n kube-system get ds/aws-node -o jsonpath='{.status.currentNumberScheduled}') == 0 ]] diff --git a/.github/workflows/externalworkloads.yaml b/.github/workflows/externalworkloads.yaml index 455e6c72e7..198d1ed1e7 100644 --- a/.github/workflows/externalworkloads.yaml +++ b/.github/workflows/externalworkloads.yaml @@ -80,6 +80,7 @@ jobs: --machine-type e2-custom-2-4096 \ --disk-type pd-standard \ --disk-size 10GB \ + --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \ --preemptible CLUSTER_CIDR=$(gcloud container clusters describe ${{ env.clusterName }} --zone ${{ env.zone }} --format="value(clusterIpv4Cidr)") echo ::set-output name=cluster_cidr::${CLUSTER_CIDR} diff --git a/.github/workflows/gke.yaml b/.github/workflows/gke.yaml index 7803dbd99b..2ead4c117a 100644 --- a/.github/workflows/gke.yaml +++ b/.github/workflows/gke.yaml @@ -64,6 +64,7 @@ jobs: --machine-type e2-custom-2-4096 \ --disk-type pd-standard \ --disk-size 10GB \ + --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \ --preemptible CLUSTER_CIDR=$(gcloud container clusters describe ${{ env.clusterName }} --zone ${{ env.zone }} --format="value(clusterIpv4Cidr)") echo ::set-output name=cluster_cidr::${CLUSTER_CIDR} diff --git a/.github/workflows/multicluster.yaml b/.github/workflows/multicluster.yaml index c0a9068956..bd2ac02c59 100644 --- a/.github/workflows/multicluster.yaml +++ b/.github/workflows/multicluster.yaml @@ -65,6 +65,7 @@ jobs: --machine-type e2-custom-2-4096 \ --disk-type pd-standard \ --disk-size 10GB \ + --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \ --preemptible \ --enable-ip-alias @@ -87,6 +88,7 @@ jobs: --machine-type e2-custom-2-4096 \ --disk-type pd-standard \ --disk-size 10GB \ + --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \ --preemptible \ --enable-ip-alias diff --git a/defaults/defaults.go b/defaults/defaults.go index 1236891e0f..46fd3c09ef 100644 --- a/defaults/defaults.go +++ b/defaults/defaults.go @@ -76,7 +76,7 @@ const ( ConnectivityCheckJSONMockImage = "quay.io/cilium/json-mock:v1.3.0@sha256:2729064827fa9dbfface8d3df424feb6c792a0ba07117b844349635c93c06d2b" ConfigMapName = "cilium-config" - Version = "v1.10.2" + Version = "v1.10.3" TunnelType = "vxlan"