Skip to content

Fix clusteremesh status retrieval on Cilium v1.14 when kvstoremesh is enabled #9898

Fix clusteremesh status retrieval on Cilium v1.14 when kvstoremesh is enabled

Fix clusteremesh status retrieval on Cilium v1.14 when kvstoremesh is enabled #9898

name: External Workloads
# Any change in triggers needs to be reflected in the concurrency group.
on:
### FOR TESTING PURPOSES
# This workflow runs in the context of `main`, and ignores changes to
# workflow files in PRs. For testing changes to this workflow from a PR:
# - Make sure the PR uses a branch from the base repository (requires write
# privileges). It will not work with a branch from a fork (missing secrets).
# - Uncomment the `pull_request` event below, commit separately with a `DO
# NOT MERGE` message, and push to the PR. As long as the commit is present,
# any push to the PR will trigger this workflow.
# - Don't forget to remove the `DO NOT MERGE` commit once satisfied. The run
# will disappear from the PR checks: please provide a direct link to the
# successful workflow run (can be found from Actions tab) in a comment.
#
# pull_request: {}
###
pull_request_target: {}
# Run every 6 hours
schedule:
- cron: '0 4/6 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || 'scheduled' }}
cancel-in-progress: true
env:
zone: us-west2-a
vmStartupScript: .github/gcp-vm-startup.sh
# renovate: datasource=github-releases depName=cilium/cilium
cilium_version: v1.15.5
kubectl_version: v1.23.6
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
installation-and-connectivity:
name: External Workloads Installation and Connectivity Test
if: ${{ github.repository == 'cilium/cilium-cli' }}
runs-on: ubuntu-22.04
timeout-minutes: 45
steps:
- name: Set cluster name
run: |
echo "clusterName=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-vm${{ github.run_attempt }}" >> $GITHUB_ENV
echo "vmName=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-vm${{ github.run_attempt }}" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- name: Install kubectl
run: |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl"
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256"
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client
- name: Set up gcloud credentials
id: 'auth'
uses: google-github-actions/auth@71fee32a0bb7e97b4d33d548e7d957010649d8fa # v2.1.3
with:
workload_identity_provider: ${{ secrets.GCP_PR_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_PR_SA_CLI }}
create_credentials_file: true
export_environment_variables: true
- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200 # v2.1.0
with:
project_id: ${{ secrets.GCP_PR_PROJECT_ID }}
version: "405.0.0"
- name: Install gke-gcloud-auth-plugin
run: |
gcloud components install gke-gcloud-auth-plugin
- name: Display gcloud CLI info
run: |
gcloud info
- name: Set up job variables
id: vars
run: |
if [ ${{ github.event.issue.pull_request || github.event.pull_request }} ]; then
PR_API_JSON=$(curl \
-H "Accept: application/vnd.github.v3+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
${{ github.event.issue.pull_request.url || github.event.pull_request.url }})
SHA=$(echo "$PR_API_JSON" | jq -r ".head.sha")
OWNER=$(echo "$PR_API_JSON" | jq -r ".number")
else
SHA=${{ github.sha }}
OWNER=${{ github.sha }}
fi
echo "sha=${SHA}" >> $GITHUB_OUTPUT
echo "owner=${OWNER}" >> $GITHUB_OUTPUT
- name: Create GCP VM
uses: nick-invision/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
with:
retry_on: error
timeout_minutes: 1
max_attempts: 10
command: |
gcloud compute instances create ${{ env.vmName }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ env.zone }} \
--machine-type e2-custom-2-4096 \
--boot-disk-type pd-standard \
--boot-disk-size 15GB \
--preemptible \
--image-project ubuntu-os-cloud \
--image-family ubuntu-2004-lts \
--metadata hostname=${{ env.vmName }} \
--metadata-from-file startup-script=${{ env.vmStartupScript}}
- name: Create GKE cluster
id: cluster
run: |
gcloud container clusters create ${{ env.clusterName }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ env.zone }} \
--enable-ip-alias \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 15GB \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--preemptible
CLUSTER_CIDR=$(gcloud container clusters describe ${{ env.clusterName }} --zone ${{ env.zone }} --format="value(clusterIpv4Cidr)")
echo "cluster_cidr=${CLUSTER_CIDR}" >> $GITHUB_OUTPUT
- name: Get cluster credentials
run: |
gcloud container clusters get-credentials ${{ env.clusterName }} --zone ${{ env.zone }}
- name: Create gcloud-free kubeconfig and load it in configmap
run: |
.github/get-kubeconfig.sh
kubectl create configmap cilium-cli-kubeconfig -n kube-system --from-file kubeconfig
- name: Load cilium install script in configmap
run: |
kubectl create configmap cilium-cli-test-script-install -n kube-system --from-file=in-cluster-test-script.sh=.github/in-cluster-test-scripts/external-workloads-install.sh
- name: Create cilium-cli install job
run: |
helm install .github/cilium-cli-test-job-chart \
--generate-name \
--set tag=${{ steps.vars.outputs.sha }} \
--set cilium_version=${{ env.cilium_version }} \
--set cluster_name=${{ env.clusterName }} \
--set job_name=cilium-cli-install \
--set test_script_cm=cilium-cli-test-script-install \
--set vm_name=${{ env.vmName }} \
--set cluster_cidr=${{ steps.cluster.outputs.cluster_cidr }}
- name: Wait for install job
env:
timeout: 10m
run: |
# Background wait for job to complete or timeout
kubectl -n kube-system wait job/cilium-cli-install --for=condition=complete --timeout=${{ env.timeout }} &
complete_pid=$!
# Background wait for job to fail
(kubectl -n kube-system wait job/cilium-cli-install --for=condition=failed --timeout=${{ env.timeout }} && exit 1) &
failed_pid=$!
# Active wait for whichever background process ends first
wait -n $complete_pid $failed_pid
EXIT_CODE=$?
# Retrieve job logs
kubectl logs --timestamps -n kube-system job/cilium-cli-install
exit ${EXIT_CODE}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Copy VM install script from cilium-cli-install pod
run: |
kubectl -n kube-system get configmap install-external-workload-script -o go-template='{{ .data.script }}' > install-external-workload.sh
chmod +x install-external-workload.sh
- name: Finish installing Cilium on VM
run: |
gcloud compute scp install-external-workload.sh ${{ env.vmName }}:~/ --zone ${{ env.zone }}
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \
--command "~/install-external-workload.sh"
sleep 5s
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \
--command "sudo cilium status"
- name: Verify cluster DNS on VM
run: |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \
--command "nslookup -d2 -retry=10 -timeout=5 -norecurse clustermesh-apiserver.kube-system.svc.cluster.local \$(systemd-resolve --status | grep -m 1 \"Current DNS Server:\" | cut -d':' -f2)"
- name: Ping clustermesh-apiserver from VM
run: |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \
--command "ping -c 3 \$(sudo cilium service list get -o jsonpath='{[?(@.spec.flags.name==\"clustermesh-apiserver\")].spec.backend-addresses[0].ip}')"
- name: Load cilium test script in configmap
run: |
kubectl create configmap cilium-cli-test-script -n kube-system --from-file=in-cluster-test-script.sh=.github/in-cluster-test-scripts/external-workloads.sh
- name: Create cilium-cli test job
run: |
helm install .github/cilium-cli-test-job-chart \
--generate-name \
--set tag=${{ steps.vars.outputs.sha }} \
--set job_name=cilium-cli \
--set test_script_cm=cilium-cli-test-script
- name: Wait for test job
env:
timeout: 15m
run: |
# Background wait for job to complete or timeout
kubectl -n kube-system wait job/cilium-cli --for=condition=complete --timeout=${{ env.timeout }} &
complete_pid=$!
# Background wait for job to fail
(kubectl -n kube-system wait job/cilium-cli --for=condition=failed --timeout=${{ env.timeout }} && exit 1) &
failed_pid=$!
# Active wait for whichever background process ends first
wait -n $complete_pid $failed_pid
EXIT_CODE=$?
# Retrieve job logs
kubectl logs --timestamps -n kube-system job/cilium-cli
exit ${EXIT_CODE}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Post-test information gathering
if: ${{ !success() }}
run: |
echo "=== Retrieve VM state ==="
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} --command "sudo cilium status"
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} --command "sudo docker logs cilium --timestamps"
echo "=== Install latest stable CLI ==="
curl -sSL --remote-name-all https://github.com/cilium/cilium-cli/releases/latest/download/cilium-linux-amd64.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/bin
rm cilium-linux-amd64.tar.gz{,.sha256sum}
cilium version
echo "=== Retrieve cluster state ==="
kubectl get pods --all-namespaces -o wide
kubectl get cew --all-namespaces -o wide
kubectl get cep --all-namespaces -o wide
cilium status
cilium clustermesh status
cilium clustermesh vm status
cilium sysdump --output-filename cilium-sysdump-out
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Clean up GKE
if: ${{ always() }}
run: |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterName }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15
done
gcloud container clusters delete ${{ env.clusterName }} --zone ${{ env.zone }} --quiet --async
gcloud compute instances delete ${{ env.vmName }} --zone ${{ env.zone }} --quiet
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-sysdump-out.zip
path: cilium-sysdump-out.zip
retention-days: 5