diff --git a/.github/workflows/soak-interrupts.yaml b/.github/workflows/soak-interrupts.yaml new file mode 100644 index 000000000..c1dc48cf5 --- /dev/null +++ b/.github/workflows/soak-interrupts.yaml @@ -0,0 +1,212 @@ +name: Soak Interrupts Test + +permissions: read-all +on: + workflow_dispatch: + inputs: + branch: + description: 'Branch to run the workflow on' + required: true + default: 'main' + http2: + description: 'use http2 watcher' + required: false + default: 'false' + schedule: + - cron: '0 4 * * *' # 12AM EST/9PM PST + +jobs: + pepr-build: + name: controller image + runs-on: ubuntu-latest + steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + egress-policy: audit + + - name: clone pepr + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + repository: defenseunicorns/pepr + path: pepr + + - name: "set env: PEPR" + run: echo "PEPR=${GITHUB_WORKSPACE}/pepr" >> "$GITHUB_ENV" + + - name: setup node + uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + with: + node-version: 20 + cache: "npm" + cache-dependency-path: pepr + + - name: install pepr deps + run: | + cd "$PEPR" + npm ci + + - name: build pepr image + run: | + cd "$PEPR" + npm run build:image + + - name: tar pepr image + run: | + PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar" + echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV" + docker image save --output "$PEPR_TAR" pepr:dev + + - name: upload image tar artifact + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: pepr-img.tar + path: pepr-img.tar + retention-days: 1 + + soak-interrupts-test: + name: soak-interrupts-test + runs-on: ubuntu-latest + needs: + - pepr-build + + steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + egress-policy: audit + + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + + - name: "install k3d" + run: "curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash" + shell: bash + + - name: dowload image tar artifact + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: pepr-img.tar + path: ${{ github.workspace }} + + - name: import pepr image from tar + run: | + PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar" + echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV" + docker image load --input "$PEPR_TAR" + + - name: Create k3d Cluster + run: "k3d cluster create" + shell: bash + + - name: Import pepr image into k3d + run: "k3d image import pepr:dev -c k3s-default" + shell: bash + + - name: Install istioctl + run: | + curl -L https://istio.io/downloadIstio | sh - + shell: bash + + - name: Install default profile + run: | + cd istio*/bin + ./istioctl install --set profile=demo -y + + - name: Set up Kubernetes + uses: azure/setup-kubectl@3e0aec4d80787158d308d7b364cb1b702e7feb7f # v4.0.0 + with: + version: 'latest' + + - name: Create logs directory + run: mkdir -p logs + + - name: Deploy Pepr + if: ${{ (github.event.inputs.http2 || 'none') != 'true' }} + run: | + kubectl apply -f hack/soak.ci.yaml + + - name: Deploy http2 Pepr + if: ${{ (github.event.inputs.http2 || 'none') == 'true' }} + run: | + kubectl apply -f hack/soak-http2.ci.yaml + + - name: Deploy applications + run: | + kubectl apply -f hack/auditor.ci.yaml + + - name: Wait for 15 minutes before starting pod checks + run: | + sleep 10s + kubectl wait --for=condition=ready -n istio-system pod -l istio=pilot + kubectl wait --for=condition=ready -n istio-system pod -l app=istio-ingressgateway + kubectl wait --for=condition=ready -n watch-auditor pod -l app=watch-auditor + kubectl wait --for=condition=ready -n pepr-system pod -l app=pepr-soak-ci-watcher + + - name: Run the soak test and collect metrics + run: | + # Initialize the map to store pod counts + declare -A pod_map + + update_pod_map() { + for pod in $(kubectl get pods -n pepr-demo -o jsonpath='{.items[*].metadata.name}'); do + count=${pod_map[$pod]} + if [ -z "$count" ]; then + pod_map[$pod]=1 + else + pod_map[$pod]=$((count + 1)) + fi + done + } + touch logs/auditor-log.txt + touch logs/informer-log.txt + + update_pod_map + + collect_metrics() { + kubectl exec metrics-collector -n watch-auditor -- curl watch-auditor:8080/metrics | grep watch_controller_failures_total > logs/auditor-log.txt || true + kubectl exec metrics-collector -n watch-auditor -- curl -k https://pepr-soak-ci-watcher.pepr-system.svc.cluster.local/metrics | egrep -E "pepr_cache_miss|pepr_resync_failure_count" > logs/informer-log.txt + kubectl logs -n pepr-system deploy/pepr-soak-ci-watcher > logs/watch-log.txt + } + + # Start collecting metrics every 5 minutes and checking pod counts every 30 minutes + for i in {1..13}; do # 13 iterations cover 65 minutes (1 hours and 5 minutes) (Every 5 mins x 13 = 65 mins = 1 hour 5 mins) + collect_metrics + cat logs/informer-log.txt + cat logs/auditor-log.txt + if [ $((i % 2)) -eq 0 ]; then # Every 10 minutes + update_pod_map + + # get a list of pods every 10 mins + kubectl get pods -n pepr-demo + kubectl top po -n pepr-system + kubectl get po -n pepr-system + + # Verify that no pod's count exceeds 1 + for pod in "${!pod_map[@]}"; do + echo "$pod: ${pod_map[$pod]}" + if [ "${pod_map[$pod]}" -gt 1 ]; then + echo "Test failed: Pod $pod has count ${pod_map[$pod]}" + exit 1 + fi + done + + # Every 20 minutes, scale up or down the `watch-auditor` deployment + if [ $((i % 4)) -eq 0 ]; then + echo "Scaling down the watch-auditor deployment to 0 replicas" + kubectl scale deploy/watch-auditor -n watch-auditor --replicas=0 + else + echo "Scaling up the watch-auditor deployment to 1 replica" + kubectl scale deploy/watch-auditor -n watch-auditor --replicas=1 + fi + fi + sleep 300s # Sleep for 5 minutes before the next iteration + done + + echo "Soak interrupt test passed successfully!" + shell: bash + + - name: Upload logs + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: soak-test-logs + path: logs