Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: soak interruptions for checkresync test #1299

Merged
merged 12 commits into from
Oct 18, 2024
212 changes: 212 additions & 0 deletions .github/workflows/soak-interrupts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
name: Soak Interrupts Test

permissions: read-all
on:
workflow_dispatch:
inputs:
branch:
description: 'Branch to run the workflow on'
required: true
default: 'main'
http2:
description: 'use http2 watcher'
required: false
default: 'false'
schedule:
- cron: '0 4 * * *' # 12AM EST/9PM PST

jobs:
pepr-build:
name: controller image
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit

- name: clone pepr
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
with:
repository: defenseunicorns/pepr
path: pepr

- name: "set env: PEPR"
run: echo "PEPR=${GITHUB_WORKSPACE}/pepr" >> "$GITHUB_ENV"

- name: setup node
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
with:
node-version: 20
cache: "npm"
cache-dependency-path: pepr

- name: install pepr deps
run: |
cd "$PEPR"
npm ci

- name: build pepr image
run: |
cd "$PEPR"
npm run build:image

- name: tar pepr image
run: |
PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar"
echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV"
docker image save --output "$PEPR_TAR" pepr:dev

- name: upload image tar artifact
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: pepr-img.tar
path: pepr-img.tar
retention-days: 1

soak-test:
cmwylie19 marked this conversation as resolved.
Show resolved Hide resolved
name: soak-test
cmwylie19 marked this conversation as resolved.
Show resolved Hide resolved
runs-on: ubuntu-latest
needs:
- pepr-build

steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit

- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

- name: "install k3d"
run: "curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash"
shell: bash

- name: dowload image tar artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: pepr-img.tar
path: ${{ github.workspace }}

- name: import pepr image from tar
run: |
PEPR_TAR="${GITHUB_WORKSPACE}/pepr-img.tar"
echo "PEPR_TAR=${PEPR_TAR}" >> "$GITHUB_ENV"
docker image load --input "$PEPR_TAR"

- name: Create k3d Cluster
run: "k3d cluster create"
shell: bash

- name: Import pepr image into k3d
run: "k3d image import pepr:dev -c k3s-default"
shell: bash

- name: Install istioctl
run: |
curl -L https://istio.io/downloadIstio | sh -
shell: bash

- name: Install default profile
run: |
cd istio*/bin
./istioctl install --set profile=demo -y

- name: Set up Kubernetes
uses: azure/setup-kubectl@3e0aec4d80787158d308d7b364cb1b702e7feb7f # v4.0.0
with:
version: 'latest'

- name: Create logs directory
run: mkdir -p logs

- name: Deploy Pepr
if: ${{ (github.event.inputs.http2 || 'none') != 'true' }}
run: |
kubectl apply -f hack/soak.ci.yaml

- name: Deploy http2 Pepr
if: ${{ (github.event.inputs.http2 || 'none') == 'true' }}
run: |
kubectl apply -f hack/soak-http2.ci.yaml

- name: Deploy applications
run: |
kubectl apply -f hack/auditor.ci.yaml

- name: Wait for 15 minutes before starting pod checks
run: |
sleep 10s
kubectl wait --for=condition=ready -n istio-system pod -l istio=pilot
kubectl wait --for=condition=ready -n istio-system pod -l app=istio-ingressgateway
kubectl wait --for=condition=ready -n watch-auditor pod -l app=watch-auditor
kubectl wait --for=condition=ready -n pepr-system pod -l app=pepr-soak-ci-watcher

- name: Run the soak test and collect metrics
run: |
# Initialize the map to store pod counts
declare -A pod_map

update_pod_map() {
for pod in $(kubectl get pods -n pepr-demo -o jsonpath='{.items[*].metadata.name}'); do
count=${pod_map[$pod]}
if [ -z "$count" ]; then
pod_map[$pod]=1
else
pod_map[$pod]=$((count + 1))
fi
done
}
touch logs/auditor-log.txt
touch logs/informer-log.txt

update_pod_map

collect_metrics() {
kubectl exec metrics-collector -n watch-auditor -- curl watch-auditor:8080/metrics | grep watch_controller_failures_total > logs/auditor-log.txt || true
kubectl exec metrics-collector -n watch-auditor -- curl -k https://pepr-soak-ci-watcher.pepr-system.svc.cluster.local/metrics | egrep -E "pepr_cache_miss|pepr_resync_failure_count" > logs/informer-log.txt
kubectl logs -n pepr-system deploy/pepr-soak-ci-watcher > logs/watch-log.txt
}

# Start collecting metrics every 5 minutes and checking pod counts every 30 minutes
for i in {1..13}; do # 13 iterations cover 65 minutes (1 hours and 5 minutes) (Every 5 mins x 13 = 65 mins = 1 hour 5 mins)
collect_metrics
cat logs/informer-log.txt
cat logs/auditor-log.txt
if [ $((i % 2)) -eq 0 ]; then # Every 10 minutes
update_pod_map

# get a list of pods every 10 mins
kubectl get pods -n pepr-demo
kubectl top po -n pepr-system
kubectl get po -n pepr-system

# Verify that no pod's count exceeds 1
for pod in "${!pod_map[@]}"; do
echo "$pod: ${pod_map[$pod]}"
if [ "${pod_map[$pod]}" -gt 1 ]; then
echo "Test failed: Pod $pod has count ${pod_map[$pod]}"
exit 1
fi
done

# Every 20 minutes, scale up or down the `watch-auditor` deployment
if [ $((i % 4)) -eq 0 ]; then
echo "Scaling down the watch-auditor deployment to 0 replicas"
kubectl scale deploy/watch-auditor -n watch-auditor --replicas=0
else
echo "Scaling up the watch-auditor deployment to 0 replicas"
cmwylie19 marked this conversation as resolved.
Show resolved Hide resolved
kubectl scale deploy/watch-auditor -n watch-auditor --replicas=1
fi
fi
sleep 300s # Sleep for 5 minutes before the next iteration
done

echo "Soak test passed successfully!"
cmwylie19 marked this conversation as resolved.
Show resolved Hide resolved
shell: bash

- name: Upload logs
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: soak-test-logs
path: logs
Loading