Skip to content

Multicluster

Multicluster #11168

Workflow file for this run

name: Multicluster
# Any change in triggers needs to be reflected in the concurrency group.
on:
### FOR TESTING PURPOSES
# This workflow runs in the context of `main`, and ignores changes to
# workflow files in PRs. For testing changes to this workflow from a PR:
# - Make sure the PR uses a branch from the base repository (requires write
# privileges). It will not work with a branch from a fork (missing secrets).
# - Uncomment the `pull_request` event below, commit separately with a `DO
# NOT MERGE` message, and push to the PR. As long as the commit is present,
# any push to the PR will trigger this workflow.
# - Don't forget to remove the `DO NOT MERGE` commit once satisfied. The run
# will disappear from the PR checks: please provide a direct link to the
# successful workflow run (can be found from Actions tab) in a comment.
#
# pull_request: {}
###
pull_request_target: {}
# Run every 6 hours
schedule:
- cron: '0 3/6 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || 'scheduled' }}
cancel-in-progress: true
env:
zone: us-west2-a
# renovate: datasource=github-releases depName=cilium/cilium
cilium_version: v1.16.4
kubectl_version: v1.23.6
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
installation-and-connectivity:
name: Multicluster Installation and Connectivity Test
if: ${{ github.repository == 'cilium/cilium-cli' }}
runs-on: ubuntu-24.04
timeout-minutes: 45
strategy:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
# Note: These names currently approach the limit of 40 characters
- name: Set mode-specific names
run: |
echo "clusterNameBase=${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}" >> $GITHUB_ENV
echo "clusterName1=${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-1" >> $GITHUB_ENV
echo "clusterName2=${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-2" >> $GITHUB_ENV
echo "firewallRuleName=${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}" >> $GITHUB_ENV
- name: Install kubectl
run: |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl"
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256"
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client
- name: Set up gcloud credentials
id: 'auth'
uses: google-github-actions/auth@6fc4af4b145ae7821d527454aa9bd537d1f2dc5f # v2.1.7
with:
workload_identity_provider: ${{ secrets.GCP_PR_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_PR_SA_CLI }}
create_credentials_file: true
export_environment_variables: true
- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@6189d56e4096ee891640bb02ac264be376592d6a # v2.1.2
with:
project_id: ${{ secrets.GCP_PR_PROJECT_ID }}
version: "405.0.0"
- name: Install gke-gcloud-auth-plugin
run: |
gcloud components install gke-gcloud-auth-plugin
- name: Display gcloud CLI info
run: |
gcloud info
- name: Set up Go
uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
with:
# renovate: datasource=golang-version depName=go
go-version: 1.23.4
- name: Set up job variables
id: vars
run: |
if [ ${{ github.event.issue.pull_request || github.event.pull_request }} ]; then
PR_API_JSON=$(curl \
-H "Accept: application/vnd.github.v3+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
${{ github.event.issue.pull_request.url || github.event.pull_request.url }})
SHA=$(echo "$PR_API_JSON" | jq -r ".head.sha")
OWNER=$(echo "$PR_API_JSON" | jq -r ".number")
else
SHA=${{ github.sha }}
OWNER=${{ github.sha }}
fi
echo "sha=${SHA}" >> $GITHUB_OUTPUT
echo "owner=${OWNER}" >> $GITHUB_OUTPUT
- name: Create GKE cluster 2
run: |
gcloud container clusters create ${{ env.clusterName2 }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ env.zone }} \
--enable-ip-alias \
--create-subnetwork="range=/26" \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 20GB \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--preemptible \
--async
- name: Create GKE cluster 1
run: |
gcloud container clusters create ${{ env.clusterName1 }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ env.zone }} \
--enable-ip-alias \
--create-subnetwork="range=/26" \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 20GB \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--preemptible \
--async
- name: Wait for clusters to be provisioned
run: |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterNameBase }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 10
done
- name: Get cluster credentials and save context names
id: contexts
run: |
gcloud container clusters get-credentials ${{ env.clusterName1 }} --zone ${{ env.zone }}
gcloud container clusters get-credentials ${{ env.clusterName2 }} --zone ${{ env.zone }}
CLUSTER1=$(kubectl config view | grep "${{ env.clusterName1 }}" | head -1 | awk '{print $2}')
CLUSTER2=$(kubectl config view | grep "${{ env.clusterName2 }}" | head -1 | awk '{print $2}')
echo "cluster1=${CLUSTER1}" >> $GITHUB_OUTPUT
echo "cluster2=${CLUSTER2}" >> $GITHUB_OUTPUT
- name: Allow cross-cluster traffic
run: |
TAG1=$(gcloud compute firewall-rules list --filter="name~^gke-${{ env.clusterName1 }}-[0-9a-z]*-all$" --format="value(name)")
TAG2=$(gcloud compute firewall-rules list --filter="name~^gke-${{ env.clusterName2 }}-[0-9a-z]*-all$" --format="value(name)")
gcloud compute firewall-rules describe $TAG1
gcloud compute firewall-rules describe $TAG2
gcloud compute firewall-rules create ${{ env.firewallRuleName }} --allow tcp,udp,icmp,sctp,esp,ah --priority=999 --source-ranges=10.0.0.0/9 --target-tags=${TAG1/-all/-node},${TAG2/-all/-node}
gcloud compute firewall-rules describe ${{ env.firewallRuleName }}
- name: Install Cilium CLI
uses: ./
with:
skip-build: 'true'
image-tag: ${{ steps.vars.outputs.sha }}
- name: Install Cilium and run tests
timeout-minutes: 60
run: |
# Install Cilium in cluster1
cilium install \
--version "${{ env.cilium_version }}" \
--context "${{ steps.contexts.outputs.cluster1 }}" \
--set loadBalancer.l7.backend=envoy \
--set tls.secretsBackend=k8s \
--set cluster.name="${{ env.clusterName1 }}" \
--set cluster.id=1 \
--set bpf.monitorAggregation=none \
--set ipv4NativeRoutingCIDR=10.0.0.0/9 \
--set hubble.eventQueueSize=65536
# Copy the CA cert from cluster1 to cluster2
kubectl --context ${{ steps.contexts.outputs.cluster1 }} get secrets -n kube-system cilium-ca -oyaml \
| kubectl --context ${{ steps.contexts.outputs.cluster2 }} apply -f -
# This seeds all CAs in cluster2 due to logic in the helm chart found here, e.g. for Hubble
# https://github.com/cilium/cilium/blob/8b6aa6eda91927275ae722ac020deeb5a9ce479d/install/kubernetes/cilium/templates/hubble/tls-helm/_helpers.tpl#L24-L33
# Install Cilium in cluster2
cilium install \
--version "${{ env.cilium_version }}" \
--context "${{ steps.contexts.outputs.cluster2 }}" \
--set loadBalancer.l7.backend=envoy \
--set tls.secretsBackend=k8s \
--set cluster.name="${{ env.clusterName2 }}" \
--set cluster.id=2 \
--set bpf.monitorAggregation=none \
--set ipv4NativeRoutingCIDR=10.0.0.0/9 \
--set hubble.eventQueueSize=65536
# Enable Relay
cilium --context "${{ steps.contexts.outputs.cluster1 }}" hubble enable
cilium --context "${{ steps.contexts.outputs.cluster2 }}" hubble enable --relay=false
# Wait for cilium and hubble relay to be ready
# NB: necessary to work against occassional flakes due to https://github.com/cilium/cilium-cli/issues/918
cilium --context "${{ steps.contexts.outputs.cluster1 }}" status --wait
cilium --context "${{ steps.contexts.outputs.cluster2 }}" status --wait
# Enable cluster mesh
# Test autodetection of service parameters for GKE
cilium --context "${{ steps.contexts.outputs.cluster1 }}" clustermesh enable
cilium --context "${{ steps.contexts.outputs.cluster2 }}" clustermesh enable
# Wait for cluster mesh status to be ready
cilium --context "${{ steps.contexts.outputs.cluster1 }}" clustermesh status --wait
cilium --context "${{ steps.contexts.outputs.cluster2 }}" clustermesh status --wait
# Print clustermesh Service annotations
printf "Service annotations for Cluster 1 %s\n" \
$(kubectl --context "${{ steps.contexts.outputs.cluster1 }}" get svc -n kube-system clustermesh-apiserver -o jsonpath='{.metadata.annotations}')
printf "Service annotations for Cluster 2 %s\n" \
$(kubectl --context "${{ steps.contexts.outputs.cluster2 }}" get svc -n kube-system clustermesh-apiserver -o jsonpath='{.metadata.annotations}')
# Connect clusters
cilium --context "${{ steps.contexts.outputs.cluster1 }}" clustermesh connect --destination-context "${{ steps.contexts.outputs.cluster2 }}"
# Wait for cluster mesh status to be ready
cilium --context "${{ steps.contexts.outputs.cluster1 }}" clustermesh status --wait
cilium --context "${{ steps.contexts.outputs.cluster2 }}" clustermesh status --wait
# Port forward Relay
cilium --context "${{ steps.contexts.outputs.cluster1 }}" hubble port-forward&
sleep 10s
nc -nvz 127.0.0.1 4245
# Run connectivity test
cilium --context "${{ steps.contexts.outputs.cluster1 }}" connectivity test --test-concurrency=5 \
--multi-cluster "${{ steps.contexts.outputs.cluster2 }}" --test '!/*-deny,!/pod-to-.*-nodeport' \
--all-flows --collect-sysdump-on-failure --external-target google.com. \
--log-check-levels error # don't check for warnings in logs, TODO: address warnings and include check again
- name: Post-test information gathering
if: ${{ !success() }}
run: |
echo "=== Retrieve cluster1 state ==="
kubectl --context "${{ steps.contexts.outputs.cluster1 }}" get pods --all-namespaces -o wide
cilium --context "${{ steps.contexts.outputs.cluster1 }}" status
cilium --context "${{ steps.contexts.outputs.cluster1 }}" clustermesh status
cilium --context "${{ steps.contexts.outputs.cluster1 }}" sysdump --output-filename cilium-sysdump-cluster1
echo "=== Retrieve cluster2 state ==="
kubectl --context "${{ steps.contexts.outputs.cluster2 }}" get pods --all-namespaces -o wide
cilium --context "${{ steps.contexts.outputs.cluster2 }}" status
cilium --context "${{ steps.contexts.outputs.cluster2 }}" clustermesh status
cilium --context "${{ steps.contexts.outputs.cluster2 }}" sysdump --output-filename cilium-sysdump-cluster2
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Clean up GKE
if: ${{ always() }}
run: |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterNameBase }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15
done
gcloud container clusters delete ${{ env.clusterName1 }} --zone ${{ env.zone }} --quiet --async
gcloud container clusters delete ${{ env.clusterName2 }} --zone ${{ env.zone }} --quiet --async
gcloud compute firewall-rules delete ${{ env.firewallRuleName }} --quiet
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: cilium-sysdump-out.zip
path: |
cilium-sysdump-cluster1.zip
cilium-sysdump-cluster2.zip
retention-days: 5