Skip to content

Commit

Permalink
chore: soak interruptions for checkresync test (#1299)
Browse files Browse the repository at this point in the history
## Description

Adds soak interrupts test that tests the branch where checkResync runs.
CheckResync runs when the informer has not seen an event in the
LAST_SEEN limit. Since our http2Watch is not dropping connections we
need to create a new test to ensure this branch of the code works.


## Related Issue

Fixes #1298 
<!-- or -->
Relates to #

## Type of change

- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [x] Other (security config, docs update, etc)

## Checklist before merging
- [x] Unit,
[Journey](https://github.com/defenseunicorns/pepr/tree/main/journey),
[E2E Tests](https://github.com/defenseunicorns/pepr-excellent-examples),
[docs](https://github.com/defenseunicorns/pepr/tree/main/docs),
[adr](https://github.com/defenseunicorns/pepr/tree/main/adr) added or
updated as needed
- [x] [Contributor Guide
Steps](https://docs.pepr.dev/main/contribute/#submitting-a-pull-request)
followed

---------

Signed-off-by: Case Wylie <[email protected]>
  • Loading branch information
cmwylie19 authored Oct 18, 2024
1 parent 900eb01 commit 0ff4a12
Showing 1 changed file with 212 additions and 0 deletions.
212 changes: 212 additions & 0 deletions .github/workflows/soak-interrupts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
name: Soak Interrupts Test

permissions: read-all
on:
workflow_dispatch:
inputs:
branch:
description: 'Branch to run the workflow on'
required: true
default: 'main'
http2:
description: 'use http2 watcher'
required: false
default: 'false'
schedule:
- cron: '0 4 * * *' # 12AM EST/9PM PST

jobs:
pepr-build:
name: controller image
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit

- name: clone pepr
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
with:
repository: defenseunicorns/pepr
path: pepr

- name: "set env: PEPR"
run: echo "PEPR=${GITHUB_WORKSPACE}/pepr" >> "$GITHUB_ENV"

- name: setup node
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
with:
node-version: 20
cache: "npm"
cache-dependency-path: pepr

- name: install pepr deps
run: |
cd "$PEPR"
npm ci
- name: build pepr image
run: |
cd "$PEPR"
npm run build:image
- name: tar pepr image
run: |
PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar"
echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV"
docker image save --output "$PEPR_TAR" pepr:dev
- name: upload image tar artifact
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: pepr-img.tar
path: pepr-img.tar
retention-days: 1

soak-interrupts-test:
name: soak-interrupts-test
runs-on: ubuntu-latest
needs:
- pepr-build

steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit

- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

- name: "install k3d"
run: "curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash"
shell: bash

- name: dowload image tar artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: pepr-img.tar
path: ${{ github.workspace }}

- name: import pepr image from tar
run: |
PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar"
echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV"
docker image load --input "$PEPR_TAR"
- name: Create k3d Cluster
run: "k3d cluster create"
shell: bash

- name: Import pepr image into k3d
run: "k3d image import pepr:dev -c k3s-default"
shell: bash

- name: Install istioctl
run: |
curl -L https://istio.io/downloadIstio | sh -
shell: bash

- name: Install default profile
run: |
cd istio*/bin
./istioctl install --set profile=demo -y
- name: Set up Kubernetes
uses: azure/setup-kubectl@3e0aec4d80787158d308d7b364cb1b702e7feb7f # v4.0.0
with:
version: 'latest'

- name: Create logs directory
run: mkdir -p logs

- name: Deploy Pepr
if: ${{ (github.event.inputs.http2 || 'none') != 'true' }}
run: |
kubectl apply -f hack/soak.ci.yaml
- name: Deploy http2 Pepr
if: ${{ (github.event.inputs.http2 || 'none') == 'true' }}
run: |
kubectl apply -f hack/soak-http2.ci.yaml
- name: Deploy applications
run: |
kubectl apply -f hack/auditor.ci.yaml
- name: Wait for 15 minutes before starting pod checks
run: |
sleep 10s
kubectl wait --for=condition=ready -n istio-system pod -l istio=pilot
kubectl wait --for=condition=ready -n istio-system pod -l app=istio-ingressgateway
kubectl wait --for=condition=ready -n watch-auditor pod -l app=watch-auditor
kubectl wait --for=condition=ready -n pepr-system pod -l app=pepr-soak-ci-watcher
- name: Run the soak test and collect metrics
run: |
# Initialize the map to store pod counts
declare -A pod_map
update_pod_map() {
for pod in $(kubectl get pods -n pepr-demo -o jsonpath='{.items[*].metadata.name}'); do
count=${pod_map[$pod]}
if [ -z "$count" ]; then
pod_map[$pod]=1
else
pod_map[$pod]=$((count + 1))
fi
done
}
touch logs/auditor-log.txt
touch logs/informer-log.txt
update_pod_map
collect_metrics() {
kubectl exec metrics-collector -n watch-auditor -- curl watch-auditor:8080/metrics | grep watch_controller_failures_total > logs/auditor-log.txt || true
kubectl exec metrics-collector -n watch-auditor -- curl -k https://pepr-soak-ci-watcher.pepr-system.svc.cluster.local/metrics | egrep -E "pepr_cache_miss|pepr_resync_failure_count" > logs/informer-log.txt
kubectl logs -n pepr-system deploy/pepr-soak-ci-watcher > logs/watch-log.txt
}
# Start collecting metrics every 5 minutes and checking pod counts every 30 minutes
for i in {1..13}; do # 13 iterations cover 65 minutes (1 hours and 5 minutes) (Every 5 mins x 13 = 65 mins = 1 hour 5 mins)
collect_metrics
cat logs/informer-log.txt
cat logs/auditor-log.txt
if [ $((i % 2)) -eq 0 ]; then # Every 10 minutes
update_pod_map
# get a list of pods every 10 mins
kubectl get pods -n pepr-demo
kubectl top po -n pepr-system
kubectl get po -n pepr-system
# Verify that no pod's count exceeds 1
for pod in "${!pod_map[@]}"; do
echo "$pod: ${pod_map[$pod]}"
if [ "${pod_map[$pod]}" -gt 1 ]; then
echo "Test failed: Pod $pod has count ${pod_map[$pod]}"
exit 1
fi
done
# Every 20 minutes, scale up or down the `watch-auditor` deployment
if [ $((i % 4)) -eq 0 ]; then
echo "Scaling down the watch-auditor deployment to 0 replicas"
kubectl scale deploy/watch-auditor -n watch-auditor --replicas=0
else
echo "Scaling up the watch-auditor deployment to 1 replica"
kubectl scale deploy/watch-auditor -n watch-auditor --replicas=1
fi
fi
sleep 300s # Sleep for 5 minutes before the next iteration
done
echo "Soak interrupt test passed successfully!"
shell: bash

- name: Upload logs
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: soak-test-logs
path: logs

0 comments on commit 0ff4a12

Please sign in to comment.