From 01c7660660da2da7989adef7c66ccf098287415c Mon Sep 17 00:00:00 2001 From: Case Wylie Date: Thu, 23 Jan 2025 11:52:05 -0500 Subject: [PATCH] chore: remove soak-interrupts.yaml (#1714) ## Description The intention behind the soak-interrupts test was to test the informer pattern by disconnecting the watch and using the `LAST_SEEN_TIME` and relist to trigger trigger fetch calls against watch resources t update the ephemeral informer cache and dispatch missed events. There is currently no way to "turn off" the watch connection without also turning off the "list" which both communicate to the kube-apiserver. If there were a way it would need to be a new feature in KFC. The way this test is written now it scales down the Pepr pod and brings it back up and expects the events to be processed. This case is tested in nearly every excellent example that has a watch and in the UDS test. There is nothing unique about this test. ## Related Issue Fixes #1708 Relates to # ## Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [x] Other (security config, docs update, etc) ## Checklist before merging - [x] Unit, [Journey](https://github.com/defenseunicorns/pepr/tree/main/journey), [E2E Tests](https://github.com/defenseunicorns/pepr-excellent-examples), [docs](https://github.com/defenseunicorns/pepr/tree/main/docs), [adr](https://github.com/defenseunicorns/pepr/tree/main/adr) added or updated as needed - [x] [Contributor Guide Steps](https://docs.pepr.dev/main/contribute/#submitting-a-pull-request) followed Signed-off-by: Case Wylie --- .github/workflows/soak-interrupts.yaml | 202 ------------------------- 1 file changed, 202 deletions(-) delete mode 100644 .github/workflows/soak-interrupts.yaml diff --git a/.github/workflows/soak-interrupts.yaml b/.github/workflows/soak-interrupts.yaml deleted file mode 100644 index 3b78c3b9f..000000000 --- a/.github/workflows/soak-interrupts.yaml +++ /dev/null @@ -1,202 +0,0 @@ -name: Soak Interrupts Test - -permissions: read-all -on: - workflow_dispatch: - inputs: - branch: - description: 'Branch to run the workflow on' - required: true - default: 'main' - schedule: - - cron: '0 4 * * *' # 12AM EST/9PM PST - -jobs: - pepr-build: - name: controller image - runs-on: ubuntu-latest - steps: - - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 - with: - egress-policy: audit - - - name: clone pepr - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - repository: defenseunicorns/pepr - path: pepr - - - name: "set env: PEPR" - run: echo "PEPR=${GITHUB_WORKSPACE}/pepr" >> "$GITHUB_ENV" - - - name: setup node - uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 - with: - node-version: 20 - cache: "npm" - cache-dependency-path: pepr - - - name: install pepr deps - run: | - cd "$PEPR" - npm ci - - - name: build pepr image - run: | - cd "$PEPR" - npm run build:image - - - name: tar pepr image - run: | - PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar" - echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV" - docker image save --output "$PEPR_TAR" pepr:dev - - - name: upload image tar artifact - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 - with: - name: pepr-img.tar - path: pepr-img.tar - retention-days: 1 - - soak-interrupts-test: - name: soak-interrupts-test - runs-on: ubuntu-latest - needs: - - pepr-build - - steps: - - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 - with: - egress-policy: audit - - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - - name: "install k3d" - run: "curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash" - shell: bash - - - name: dowload image tar artifact - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: pepr-img.tar - path: ${{ github.workspace }} - - - name: import pepr image from tar - run: | - PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar" - echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV" - docker image load --input "$PEPR_TAR" - - - name: Create k3d Cluster - run: "k3d cluster create" - shell: bash - - - name: Import pepr image into k3d - run: "k3d image import pepr:dev -c k3s-default" - shell: bash - - - name: Install istioctl - run: | - curl -L https://istio.io/downloadIstio | sh - - shell: bash - - - name: Install default profile - run: | - cd istio*/bin - ./istioctl install --set profile=demo -y - - - name: Set up Kubernetes - uses: azure/setup-kubectl@3e0aec4d80787158d308d7b364cb1b702e7feb7f # v4.0.0 - with: - version: 'latest' - - - name: Create logs directory - run: mkdir -p logs - - - name: Deploy Pepr - run: | - kubectl apply -f hack/soak.ci.yaml - - - name: Deploy applications - run: | - kubectl apply -f hack/auditor.ci.yaml - - - name: Wait for 15 minutes before starting pod checks - run: | - sleep 10s - kubectl wait --for=condition=ready -n istio-system pod -l istio=pilot - kubectl wait --for=condition=ready -n istio-system pod -l app=istio-ingressgateway - kubectl wait --for=condition=ready -n watch-auditor pod -l app=watch-auditor - kubectl wait --for=condition=ready -n pepr-system pod -l app=pepr-soak-ci-watcher - - - name: Run the soak test and collect metrics - run: | - # Initialize the map to store pod counts - declare -A pod_map - - update_pod_map() { - for pod in $(kubectl get pods -n pepr-demo -o jsonpath='{.items[*].metadata.name}'); do - count=${pod_map[$pod]} - if [ -z "$count" ]; then - pod_map[$pod]=1 - else - pod_map[$pod]=$((count + 1)) - fi - done - } - touch logs/auditor-log.txt - touch logs/informer-log.txt - - update_pod_map - - collect_metrics() { - kubectl exec metrics-collector -n watch-auditor -- curl watch-auditor:8080/metrics | grep watch_controller_failures_total > logs/auditor-log.txt || true - kubectl exec metrics-collector -n watch-auditor -- curl -k https://pepr-soak-ci-watcher.pepr-system.svc.cluster.local/metrics | egrep -E "pepr_cache_miss|pepr_resync_failure_count" > logs/informer-log.txt - kubectl logs -n pepr-system deploy/pepr-soak-ci-watcher > logs/watch-log.txt - } - - # Start collecting metrics every 5 minutes and checking pod counts every 30 minutes - for i in {1..13}; do # 13 iterations cover 65 minutes (1 hours and 5 minutes) (Every 5 mins x 13 = 65 mins = 1 hour 5 mins) - collect_metrics - cat logs/informer-log.txt - cat logs/auditor-log.txt - if [ $((i % 2)) -eq 0 ]; then # Every 10 minutes - update_pod_map - - # get a list of pods every 10 mins - kubectl get pods -n pepr-demo - kubectl top po -n pepr-system - kubectl get po -n pepr-system - - # Verify that no pod's count exceeds 1 - for pod in "${!pod_map[@]}"; do - echo "$pod: ${pod_map[$pod]}" - if [ "${pod_map[$pod]}" -gt 1 ]; then - echo "Test failed: Pod $pod has count ${pod_map[$pod]}" - exit 1 - fi - done - - # Every 20 minutes, scale up or down the watcher deployment - if [ $((i % 4)) -eq 0 ]; then - echo "Scaling down the watch-auditor deployment to 0 replicas" - kubectl scale deploy/pepr-soak-ci-watcher -n pepr-system --replicas=0 - else - echo "Scaling up the watch-auditor deployment to 1 replica" - kubectl scale deploy/pepr-soak-ci-watcher -n pepr-system --replicas=1 - fi - fi - sleep 300s # Sleep for 5 minutes before the next iteration - done - - echo "Soak interrupt test passed successfully!" - shell: bash - - - name: Upload logs - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 - with: - name: soak-test-logs - path: logs