Skip to content

Commit

Permalink
.github: create clusters with taints
Browse files Browse the repository at this point in the history
As recommended to users, cilium-cli should also set the clusters with
taints. This guarantees that Cilium is properly configured and
application pods can start successfully.

Signed-off-by: André Martins <[email protected]>
  • Loading branch information
aanm authored and tklauser committed Jul 16, 2021
1 parent d19efde commit 3d2fbdb
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 37 deletions.
39 changes: 37 additions & 2 deletions .github/workflows/aks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,46 @@ jobs:
echo ::set-output name=owner::${OWNER}
- name: Create AKS cluster
id: cluster-creation
run: |
# Create group
az group create \
--name ${{ env.name }} \
--location ${{ env.location }} \
--tags usage=${{ github.repository_owner }}-${{ github.event.repository.name }} owner=${{ steps.vars.outputs.owner }}
# Create cluster with a 1 node-count (we will remove this node pool
# afterwards)
# Details: Basic load balancers are not supported with multiple node
# pools. Create a cluster with standard load balancer selected to use
# multiple node pools, learn more at https://aka.ms/aks/nodepools.
az aks create \
--resource-group ${{ env.name }} \
--name ${{ env.name }} \
--location ${{ env.location }} \
--network-plugin azure \
--node-count 2 \
--node-count 1 \
--load-balancer-sku standard \
--node-vm-size Standard_B2s \
--node-osdisk-size 30 \
--load-balancer-sku basic \
--generate-ssh-keys
# Get the name of the node pool that we will delete afterwards
echo ::set-output name=nodepool_to_delete::$(az aks nodepool list --cluster-name ${{ env.name }} -g ${{ env.name }} -o json | jq -r '.[0].name')
# Create a node pool with the taint 'node.cilium.io/agent-not-ready=true:NoSchedule'
# and with 'mode=system' as it it the same mode used for the nodepool
# created with the cluster.
az aks nodepool add \
--name nodepool2 \
--cluster-name ${{ env.name }} \
--resource-group ${{ env.name }} \
--node-count 2 \
--node-vm-size Standard_B2s \
--node-osdisk-size 30 \
--mode system \
--node-taints node.cilium.io/agent-not-ready=true:NoSchedule
- name: Get cluster credentials
run: |
az aks get-credentials \
Expand All @@ -98,6 +122,17 @@ jobs:
--wait=false \
--config monitor-aggregation=none
- name: Delete the first node pool
run: |
# We can only delete the first node pool after Cilium is installed
# because some pods have Pod Disruption Budgets set. If we try to
# delete the first node pool without the second node pool being ready,
# AKS will not succeed with the pool deletion because some Deployments
# can't cease to exist in the cluster.
az aks nodepool delete --name ${{ steps.cluster-creation.outputs.nodepool_to_delete }} \
--cluster-name ${{ env.name }} \
--resource-group ${{ env.name }}
- name: Enable Relay
run: |
cilium hubble enable
Expand Down
46 changes: 29 additions & 17 deletions .github/workflows/eks-tunnel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,36 @@ jobs:
echo ::set-output name=sha::${SHA}
echo ::set-output name=owner::${OWNER}
- name: Create EKS cluster without nodegroup
- name: Create EKS cluster with nodegroup
run: |
eksctl create cluster \
--name ${{ env.clusterName }} \
--tags "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--without-nodegroup
cat <<EOF > eks-config.yaml
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: ${{ env.clusterName }}
region: ${{ env.region }}
tags:
usage: "${{ github.repository_owner }}-${{ github.event.repository.name }}"
owner: "${{ steps.vars.outputs.owner }}"
managedNodeGroups:
- name: ng-1
instanceTypes:
- t3.medium
- t3a.medium
desiredCapacity: 2
spot: true
privateNetworking: true
volumeType: "gp3"
volumeSize: 10
taints:
- key: "node.cilium.io/agent-not-ready"
value: "true"
effect: "NoSchedule"
EOF
eksctl create cluster -f ./eks-config.yaml
- name: Create kubeconfig and load it in configmap
run: |
Expand All @@ -80,18 +104,6 @@ jobs:
--set job_name=cilium-cli-install \
--set test_script_cm=cilium-cli-test-script-install
- name: Add managed spot nodegroup
run: |
eksctl create nodegroup \
--cluster ${{ env.clusterName }} \
--nodes 2 \
--instance-types "t3.medium,t3a.medium" \
--node-volume-type gp3 \
--node-volume-size 10 \
--managed \
--spot \
--node-private-networking
- name: Make sure the 'aws-node' DaemonSet exists but has no scheduled pods
run: |
[[ $(kubectl -n kube-system get ds/aws-node -o jsonpath='{.status.currentNumberScheduled}') == 0 ]]
Expand Down
46 changes: 29 additions & 17 deletions .github/workflows/eks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,36 @@ jobs:
echo ::set-output name=sha::${SHA}
echo ::set-output name=owner::${OWNER}
- name: Create EKS cluster without nodegroup
- name: Create EKS cluster nodegroup
run: |
eksctl create cluster \
--name ${{ env.clusterName }} \
--tags "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--without-nodegroup
cat <<EOF > eks-config.yaml
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: ${{ env.clusterName }}
region: ${{ env.region }}
tags:
usage: "${{ github.repository_owner }}-${{ github.event.repository.name }}"
owner: "${{ steps.vars.outputs.owner }}"
managedNodeGroups:
- name: ng-1
instanceTypes:
- t3.medium
- t3a.medium
desiredCapacity: 2
spot: true
privateNetworking: true
volumeType: "gp3"
volumeSize: 10
taints:
- key: "node.cilium.io/agent-not-ready"
value: "true"
effect: "NoSchedule"
EOF
eksctl create cluster -f ./eks-config.yaml
- name: Create kubeconfig and load it in configmap
run: |
Expand All @@ -80,18 +104,6 @@ jobs:
--set job_name=cilium-cli-install \
--set test_script_cm=cilium-cli-test-script-install
- name: Add managed spot nodegroup
run: |
eksctl create nodegroup \
--cluster ${{ env.clusterName }} \
--nodes 2 \
--instance-types "t3.medium,t3a.medium" \
--node-volume-type gp3 \
--node-volume-size 10 \
--managed \
--spot \
--node-private-networking
- name: Make sure the 'aws-node' DaemonSet exists but has no scheduled pods
run: |
[[ $(kubectl -n kube-system get ds/aws-node -o jsonpath='{.status.currentNumberScheduled}') == 0 ]]
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/externalworkloads.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
--preemptible
CLUSTER_CIDR=$(gcloud container clusters describe ${{ env.clusterName }} --zone ${{ env.zone }} --format="value(clusterIpv4Cidr)")
echo ::set-output name=cluster_cidr::${CLUSTER_CIDR}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/gke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ jobs:
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
--preemptible
CLUSTER_CIDR=$(gcloud container clusters describe ${{ env.clusterName }} --zone ${{ env.zone }} --format="value(clusterIpv4Cidr)")
echo ::set-output name=cluster_cidr::${CLUSTER_CIDR}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/multicluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ jobs:
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
--preemptible \
--enable-ip-alias
Expand All @@ -87,6 +88,7 @@ jobs:
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
--preemptible \
--enable-ip-alias
Expand Down
2 changes: 1 addition & 1 deletion defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ const (
ConnectivityCheckJSONMockImage = "quay.io/cilium/json-mock:v1.3.0@sha256:2729064827fa9dbfface8d3df424feb6c792a0ba07117b844349635c93c06d2b"

ConfigMapName = "cilium-config"
Version = "v1.10.2"
Version = "v1.10.3"

TunnelType = "vxlan"

Expand Down

0 comments on commit 3d2fbdb

Please sign in to comment.