From cc6986c2728b8533831b15df0550775b472c9eb4 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 27 Nov 2024 17:33:57 +0900 Subject: [PATCH 01/19] Add Wave Autoscale resources including ExternalSecret, Namespace, HelmRepository, and HelmRelease --- .../STCLab/WaveAutoscale/external-secret.yaml | 19 +++++++++++ .../STCLab/WaveAutoscale/namespace.yaml | 8 +++++ .../WaveAutoscale/wave-autoscale-source.yaml | 9 +++++ .../STCLab/WaveAutoscale/wave-autoscale.yaml | 33 +++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/external-secret.yaml create mode 100644 eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/namespace.yaml create mode 100644 eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml create mode 100644 eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/external-secret.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/external-secret.yaml new file mode 100644 index 00000000..fd4b48a8 --- /dev/null +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/external-secret.yaml @@ -0,0 +1,19 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: wa-external-secret + namespace: wave-autoscale +spec: + refreshInterval: 1m + secretStoreRef: + name: eksa-secret-store #The secret store name we have just created. + kind: ClusterSecretStore + target: + name: wa-secret # Secret name in k8s + data: + - secretKey: WA_LICENSE + remoteRef: + key: WA_LICENSE + - secretKey: GHRC_TOKEN + remoteRef: + key: GHRC_TOKEN diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/namespace.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/namespace.yaml new file mode 100644 index 00000000..206e12f7 --- /dev/null +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/namespace.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: wave-autoscale + labels: + aws.conformance.vendor: STCLab + aws.conformance.vendor-solution: wave-autoscale + aws.conformance.vendor-solution-version: 1.10.0 diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml new file mode 100644 index 00000000..de568b21 --- /dev/null +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: wave-autoscale-charts + namespace: wave-autoscale +spec: + interval: 30s + url: https://release.waveautoscale.io/ghcr-helm/wa diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml new file mode 100644 index 00000000..4ae11691 --- /dev/null +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: wave-autoscale + namespace: wave-autoscale +spec: + chart: + spec: + chart: wave-autoscale-helm + reconcileStrategy: ChartVersion + sourceRef: + kind: HelmRepository + name: wave-autoscale-charts + namespace: flux-system + version: 1.10.0 + interval: 1m0s + targetNamespace: wave-autoscale + values: + spec: + core: + env: + - name: WA_LICENSE + value: "" + valuesFrom: + - kind: Secret + name: wa-secret + valuesKey: WA_LICENSE + targetPath: core.env.0.value + - kind: Secret + name: wa-secret + valuesKey: GHRC_TOKEN + targetPath: ghcr.dockerconfigjson From 2a93296587fcdd6fed07403396833590ef4939d1 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 27 Nov 2024 17:49:32 +0900 Subject: [PATCH 02/19] Rename HelmRepository from wave-autoscale-charts to wave-autoscale-helm --- .../Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml | 2 +- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml index de568b21..35781a99 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml @@ -2,7 +2,7 @@ apiVersion: source.toolkit.fluxcd.io/v1beta2 kind: HelmRepository metadata: - name: wave-autoscale-charts + name: wave-autoscale-helm namespace: wave-autoscale spec: interval: 30s diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index 4ae11691..b8b91e8e 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -11,7 +11,7 @@ spec: reconcileStrategy: ChartVersion sourceRef: kind: HelmRepository - name: wave-autoscale-charts + name: wave-autoscale-helm namespace: flux-system version: 1.10.0 interval: 1m0s From 1a93771374771514fbda9ddea4761951f9f7e8c2 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 27 Nov 2024 17:54:38 +0900 Subject: [PATCH 03/19] Update HelmRepository namespace from wave-autoscale to flux-system --- .../Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml index 35781a99..a5931414 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-source.yaml @@ -3,7 +3,7 @@ apiVersion: source.toolkit.fluxcd.io/v1beta2 kind: HelmRepository metadata: name: wave-autoscale-helm - namespace: wave-autoscale + namespace: flux-system spec: interval: 30s url: https://release.waveautoscale.io/ghcr-helm/wa From 2a9a4ea73414d18b8753dda23288d8fb97f49bab Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 27 Nov 2024 17:58:10 +0900 Subject: [PATCH 04/19] Enable GitHub Container Registry in Wave Autoscale configuration --- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index b8b91e8e..199613f5 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -17,6 +17,8 @@ spec: interval: 1m0s targetNamespace: wave-autoscale values: + ghcr: + enabled: true spec: core: env: From ee3607fe37cc26d632c9357c6abbff2289bd81f9 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 27 Nov 2024 18:00:06 +0900 Subject: [PATCH 05/19] Add WA_API_SERVER_HOST environment variable to Wave Autoscale configuration --- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index 199613f5..a39bd6b7 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -24,6 +24,8 @@ spec: env: - name: WA_LICENSE value: "" + - name: WA_API_SERVER_HOST + value: "0.0.0.0" valuesFrom: - kind: Secret name: wa-secret From 6be45bea3495e18d569ff98232b7102bea89dc5e Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 27 Nov 2024 18:30:20 +0900 Subject: [PATCH 06/19] Refactor WA_LICENSE environment variable to use secretKeyRef for enhanced security --- .../Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index a39bd6b7..62038efb 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -23,14 +23,13 @@ spec: core: env: - name: WA_LICENSE - value: "" + valueFrom: + secretKeyRef: + name: wa-secret + key: WA_LICENSE - name: WA_API_SERVER_HOST value: "0.0.0.0" valuesFrom: - - kind: Secret - name: wa-secret - valuesKey: WA_LICENSE - targetPath: core.env.0.value - kind: Secret name: wa-secret valuesKey: GHRC_TOKEN From edc616391a4236d1563f22d6c21cf34dd88a3219 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Mon, 2 Dec 2024 09:42:18 +0900 Subject: [PATCH 07/19] Add CronJob for Wave Autoscale health checks --- .../WaveAutoscale/wave-autoscale-cronjob.yaml | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml new file mode 100644 index 00000000..10e08f8b --- /dev/null +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -0,0 +1,64 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: wave-autoscale-healthcheck + namespace: wave-autoscale +spec: + schedule: "*/10 * * * *" + jobTemplate: + spec: + template: + spec: + containers: + - name: wave-autoscale-healthcheck + image: alpine/k8s:1.26.9 + command: + - /bin/sh + args: + - -c + - >- + echo Start a health check for Wave Autoscale; + response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/); + code=$(echo ${response} | jq .code); + if [ "$code" -eq 200 ]; then + echo "Wave Autoscale Core working Successful." + exit 0 + else + echo "Failed to fetch Wave Autoscale Core. Response was $response" + exit 1 + fi + response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/info); + code=$(echo ${response} | jq .code); + if [ "$code" -eq 200 ]; then + license=$(echo "$response" | jq -r '.license') + if [[ "$license" == "{}" ]]; then + echo "Wave Autoscale License is invalid: empty map detected." + exit 1 + else + echo "Wave Autoscale License is valid." + exit 0 + fi + else + echo "Wave Autoscale License is is invalid." + exit 1 + fi + response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); + code=$(echo ${response} | jq .code); + if [ "$code" -eq 200 ]; then + echo "Wave Autoscale Web Console working Successful." + exit 0 + else + echo "Failed to fetch Wave Autoscale Web Console. Response was $response" + exit 1 + fi + response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026/); + code=$(echo ${response} | jq .code); + if [ "$code" -eq 200 ]; then + echo "Wave Autoscale Autopilot working Successful." + exit 0 + else + echo "Failed to fetch Wave Autoscale Autopilot. Response was $response" + exit 1 + fi + restartPolicy: Never + backoffLimit: 1 From 0297c207c226374af5cf89ff0236ffa0da464d50 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Mon, 2 Dec 2024 10:19:04 +0900 Subject: [PATCH 08/19] Update Wave Autoscale health check schedule to run every minute and fix license validation condition --- .../Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml index 10e08f8b..8be15030 100644 --- a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -4,7 +4,7 @@ metadata: name: wave-autoscale-healthcheck namespace: wave-autoscale spec: - schedule: "*/10 * * * *" + schedule: "*/1 * * * *" jobTemplate: spec: template: @@ -31,7 +31,7 @@ spec: code=$(echo ${response} | jq .code); if [ "$code" -eq 200 ]; then license=$(echo "$response" | jq -r '.license') - if [[ "$license" == "{}" ]]; then + if [ "$license" == "{}" ]; then echo "Wave Autoscale License is invalid: empty map detected." exit 1 else From b146f060262e16afb4d4b1f329c3707a271fb283 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Mon, 2 Dec 2024 10:59:40 +0900 Subject: [PATCH 09/19] Update Wave Autoscale health check schedule to run every 10 minutes and enhance health check script for improved error handling --- .../WaveAutoscale/wave-autoscale-cronjob.yaml | 84 ++++++++++--------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml index 8be15030..c41213aa 100644 --- a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -4,7 +4,7 @@ metadata: name: wave-autoscale-healthcheck namespace: wave-autoscale spec: - schedule: "*/1 * * * *" + schedule: "*/10 * * * *" jobTemplate: spec: template: @@ -16,49 +16,51 @@ spec: - /bin/sh args: - -c - - >- - echo Start a health check for Wave Autoscale; - response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/); - code=$(echo ${response} | jq .code); - if [ "$code" -eq 200 ]; then - echo "Wave Autoscale Core working Successful." - exit 0 - else - echo "Failed to fetch Wave Autoscale Core. Response was $response" - exit 1 + - | + echo "Start a health check for Wave Autoscale"; + + # Core health check + response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/) + status_code=$(echo "$response" | tail -n1); + if [ "$status_code" -ne 200 ]; then + echo "Failed to fetch Wave Autoscale Core. Response was $response"; + exit 1; fi - response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/info); - code=$(echo ${response} | jq .code); - if [ "$code" -eq 200 ]; then - license=$(echo "$response" | jq -r '.license') - if [ "$license" == "{}" ]; then - echo "Wave Autoscale License is invalid: empty map detected." - exit 1 - else - echo "Wave Autoscale License is valid." - exit 0 - fi - else - echo "Wave Autoscale License is is invalid." - exit 1 + echo "Wave Autoscale Core working Successful."; + + # License check + response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/info); + status_code=$(echo "$response" | tail -n1); + body=$(echo "$response" | head -n-1); + if [ "$status_code" -ne 200 ]; then + echo "Wave Autoscale License is invalid. Response was $response"; + exit 1; fi - response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); - code=$(echo ${response} | jq .code); - if [ "$code" -eq 200 ]; then - echo "Wave Autoscale Web Console working Successful." - exit 0 - else - echo "Failed to fetch Wave Autoscale Web Console. Response was $response" - exit 1 + license=$(echo "$body" | jq -r '.license'); + if [ "$license" = "{}" ]; then + echo "Wave Autoscale License is invalid: empty map detected."; + exit 1; fi - response=$(curl -sL http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026/); - code=$(echo ${response} | jq .code); - if [ "$code" -eq 200 ]; then - echo "Wave Autoscale Autopilot working Successful." - exit 0 - else - echo "Failed to fetch Wave Autoscale Autopilot. Response was $response" - exit 1 + echo "Wave Autoscale License is valid."; + + # Web Console health check + response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); + status_code=$(echo "$response" | tail -n1); + if [ "$status_code" -ne 200 ]; then + echo "Failed to fetch Wave Autoscale Web Console. Response was $response"; + exit 1; fi + echo "Wave Autoscale Web Console working Successful."; + + # Autopilot health check + response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026/); + status_code=$(echo "$response" | tail -n1); + if [ "$status_code" -ne 200 ]; then + echo "Failed to fetch Wave Autoscale Autopilot. Response was $response"; + exit 1; + fi + echo "Wave Autoscale Autopilot working Successful."; + + exit 0; restartPolicy: Never backoffLimit: 1 From 36942cd74f1bc8a450935b83edeb71955e0ec3ce Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 3 Dec 2024 09:01:20 +0900 Subject: [PATCH 10/19] Add resource requests for Wave Autoscale components to optimize resource allocation --- .../STCLab/WaveAutoscale/wave-autoscale.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index 62038efb..d49612d7 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -21,6 +21,10 @@ spec: enabled: true spec: core: + resources: + requests: + cpu: 250m + memory: 300Mi env: - name: WA_LICENSE valueFrom: @@ -29,6 +33,16 @@ spec: key: WA_LICENSE - name: WA_API_SERVER_HOST value: "0.0.0.0" + webConsole: + resources: + requests: + cpu: 300m + memory: 300Mi + autopilot: + resources: + requests: + cpu: 500m + memory: 300Mi valuesFrom: - kind: Secret name: wa-secret From 9f22fd7758036e7538b1382fb95644a6edd20df2 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 3 Dec 2024 09:15:56 +0900 Subject: [PATCH 11/19] Reduce CPU resource requests for Wave Autoscale components to optimize resource usage --- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index d49612d7..34ec39cb 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -36,12 +36,12 @@ spec: webConsole: resources: requests: - cpu: 300m + cpu: 250m memory: 300Mi autopilot: resources: requests: - cpu: 500m + cpu: 250m memory: 300Mi valuesFrom: - kind: Secret From 7fd8487080f3729da67792d9aad1f74998ee8393 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 4 Dec 2024 10:14:12 +0900 Subject: [PATCH 12/19] Update Wave Autoscale health check schedule to run every 5 minutes and enhance health check logic --- .../WaveAutoscale/wave-autoscale-cronjob.yaml | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml index c41213aa..d0e683e8 100644 --- a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -4,7 +4,7 @@ metadata: name: wave-autoscale-healthcheck namespace: wave-autoscale spec: - schedule: "*/10 * * * *" + schedule: "*/5 * * * *" jobTemplate: spec: template: @@ -20,46 +20,44 @@ spec: echo "Start a health check for Wave Autoscale"; # Core health check - response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/) - status_code=$(echo "$response" | tail -n1); - if [ "$status_code" -ne 200 ]; then + response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/) + name=$(echo "$response" | jq -r '.name') + if [ "$name" = "wa-api-server" ]; then + echo "Wave Autoscale Core working Successful."; + else echo "Failed to fetch Wave Autoscale Core. Response was $response"; exit 1; fi - echo "Wave Autoscale Core working Successful."; # License check - response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/info); - status_code=$(echo "$response" | tail -n1); - body=$(echo "$response" | head -n-1); - if [ "$status_code" -ne 200 ]; then - echo "Wave Autoscale License is invalid. Response was $response"; - exit 1; - fi - license=$(echo "$body" | jq -r '.license'); + response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/info) + license=$(echo "$response" | jq -r '.license') if [ "$license" = "{}" ]; then echo "Wave Autoscale License is invalid: empty map detected."; exit 1; + else + echo "Wave Autoscale License is valid."; fi - echo "Wave Autoscale License is valid."; # Web Console health check - response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); - status_code=$(echo "$response" | tail -n1); - if [ "$status_code" -ne 200 ]; then + response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); + redirect_path=$(echo "$response") + if [ "$redirect_path" = "/app/k8s" ]; then + echo "Wave Autoscale Web Console working Successful."; + else echo "Failed to fetch Wave Autoscale Web Console. Response was $response"; exit 1; fi - echo "Wave Autoscale Web Console working Successful."; # Autopilot health check - response=$(curl -s -w "\n200" http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026/); - status_code=$(echo "$response" | tail -n1); - if [ "$status_code" -ne 200 ]; then + response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026/) + name=$(echo "$response" | jq -r '.name') + if [ "$name" = "wa-autopilot" ]; then + echo "Wave Autoscale Autopilot working Successful."; + else echo "Failed to fetch Wave Autoscale Autopilot. Response was $response"; exit 1; fi - echo "Wave Autoscale Autopilot working Successful."; exit 0; restartPolicy: Never From 330092a8d39a02262ee7e27ac243b0c8fbe2429e Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 4 Dec 2024 10:25:17 +0900 Subject: [PATCH 13/19] Add self statefulset check to Wave Autoscale health check script --- .../STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml index d0e683e8..fdd40d78 100644 --- a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -39,6 +39,16 @@ spec: echo "Wave Autoscale License is valid."; fi + # Core - self statefulset check + response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/k8s/statefulsets/wave-autoscale/wave-autoscale) + statefulset_name=$(echo "$response" | jq -r '.k8s_definition.name') + if [ "$statefulset_name" = "wave-autoscale" ]; then + echo "Wave Autoscale Statefulset working Successful."; + else + echo "Failed to fetch Wave Autoscale Statefulset. Response was $response"; + exit 1; + fi + # Web Console health check response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); redirect_path=$(echo "$response") From 34456fcbbe797727533d773e08857f79b7c0b3cd Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 10 Dec 2024 14:00:13 +0900 Subject: [PATCH 14/19] Add Helm repository and release configuration for Wave Autoscale agent --- .../wave-autoscale-agent-source.yaml | 9 +++++ .../WaveAutoscale/wave-autoscale-agent.yaml | 37 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent-source.yaml create mode 100644 eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent.yaml diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent-source.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent-source.yaml new file mode 100644 index 00000000..d3bc0972 --- /dev/null +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent-source.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: wave-autoscale-agent-helm + namespace: flux-system +spec: + interval: 30s + url: https://release.waveautoscale.io/ghcr-helm/wa-agent diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent.yaml new file mode 100644 index 00000000..7dd7443a --- /dev/null +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale-agent.yaml @@ -0,0 +1,37 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: wave-autoscale-agent + namespace: wave-autoscale +spec: + chart: + spec: + chart: wave-autoscale-agent-helm + reconcileStrategy: ChartVersion + sourceRef: + kind: HelmRepository + name: wave-autoscale-agent-helm + namespace: flux-system + version: 1.8.0 + interval: 1m0s + targetNamespace: wave-autoscale + values: + ghcr: + enabled: true + spec: + agent: + resources: + requests: + cpu: 100m + memory: 50Mi + cadvisor: + resources: + requests: + cpu: 50m + memory: 50Mi + valuesFrom: + - kind: Secret + name: wa-secret + valuesKey: GHRC_TOKEN + targetPath: ghcr.dockerconfigjson From 0550fdda35da6184bb4fd246b2547638f799dc82 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 10 Dec 2024 14:27:35 +0900 Subject: [PATCH 15/19] Update Wave Autoscale Helm chart version to 1.10.2 --- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index 34ec39cb..65c6ba86 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -13,7 +13,7 @@ spec: kind: HelmRepository name: wave-autoscale-helm namespace: flux-system - version: 1.10.0 + version: 1.10.2 interval: 1m0s targetNamespace: wave-autoscale values: From 169cb0e3a4cccca7e75d982355323a4cab6a7147 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 10 Dec 2024 15:04:16 +0900 Subject: [PATCH 16/19] Downgrade Wave Autoscale Helm chart version to 1.10.0 --- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index 65c6ba86..34ec39cb 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -13,7 +13,7 @@ spec: kind: HelmRepository name: wave-autoscale-helm namespace: flux-system - version: 1.10.2 + version: 1.10.0 interval: 1m0s targetNamespace: wave-autoscale values: From a507c7d4c5c650ed416db0332c6832569c76119c Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 10 Dec 2024 17:04:49 +0900 Subject: [PATCH 17/19] Update Wave Autoscale Helm chart version to 1.10.2 and enhance health check logic --- .../Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml index 34ec39cb..65c6ba86 100644 --- a/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml +++ b/eks-anywhere-common/Addons/Partner/STCLab/WaveAutoscale/wave-autoscale.yaml @@ -13,7 +13,7 @@ spec: kind: HelmRepository name: wave-autoscale-helm namespace: flux-system - version: 1.10.0 + version: 1.10.2 interval: 1m0s targetNamespace: wave-autoscale values: From 48e5016608aa76f0ff9b8fa46a5da8402c285729 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Tue, 10 Dec 2024 19:02:07 +0900 Subject: [PATCH 18/19] Add service account and enhance health check for Wave Autoscale cronjob --- .../WaveAutoscale/wave-autoscale-cronjob.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml index fdd40d78..a6e7eec3 100644 --- a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -9,6 +9,7 @@ spec: spec: template: spec: + serviceAccountName: wave-autoscale-sa containers: - name: wave-autoscale-healthcheck image: alpine/k8s:1.26.9 @@ -17,6 +18,9 @@ spec: args: - -c - | + # + # Health check for Wave Autoscale + # echo "Start a health check for Wave Autoscale"; # Core health check @@ -69,6 +73,31 @@ spec: exit 1; fi + # + # Autopilot behavior check + # + echo "Start checking Autopilot behavior"; + + # 1. Agent Running Check + AGENT_POD_LIST=$(kubectl get pods -n wave-autoscale -l app.kubernetes.io/name=wave-autoscale-agent -o jsonpath='{.items[*].metadata.name}') + AGENT_ALL_RUNNING=true + for POD in $AGENT_POD_LIST; do + STATUS=$(kubectl get pod "$POD" -n wave-autoscale -o jsonpath='{.status.phase}') + if [ "$STATUS" != "Running" ]; then + echo "Agent Pod $POD is not running (current status: $STATUS)" + AGENT_ALL_RUNNING=false + else + echo "Agent Pod $POD is running.." + fi + done + if [ "$AGENT_ALL_RUNNING" = true ]; then + echo "All Agent Pods in DaemonSet '$DAEMONSET_NAME' are running." + else + echo "Some Agent Pods in DaemonSet '$DAEMONSET_NAME' are not running." + exit 1 + fi + + exit 0; restartPolicy: Never backoffLimit: 1 From 6faae64dc59db62a067b2b99797e31ab84cdfb95 Mon Sep 17 00:00:00 2001 From: Ari-suhyeon Date: Wed, 11 Dec 2024 20:11:25 +0900 Subject: [PATCH 19/19] Enhance Wave Autoscale cronjob with extended health checks and add test deployment configuration --- .../WaveAutoscale/wave-autoscale-cronjob.yaml | 208 ++++++++++++++++-- 1 file changed, 191 insertions(+), 17 deletions(-) diff --git a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml index a6e7eec3..35f0e3c3 100644 --- a/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml +++ b/eks-anywhere-common/Testers/STCLab/WaveAutoscale/wave-autoscale-cronjob.yaml @@ -4,7 +4,7 @@ metadata: name: wave-autoscale-healthcheck namespace: wave-autoscale spec: - schedule: "*/5 * * * *" + schedule: "*/10 * * * *" jobTemplate: spec: template: @@ -18,65 +18,69 @@ spec: args: - -c - | + API_SERVER="wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024" + WEB_SERVER="wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025" + ML_SERVER="wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026" + # # Health check for Wave Autoscale # - echo "Start a health check for Wave Autoscale"; + echo " ## Start a health check for Wave Autoscale"; # Core health check - response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/) + response=$(curl -s http://$API_SERVER/) name=$(echo "$response" | jq -r '.name') if [ "$name" = "wa-api-server" ]; then echo "Wave Autoscale Core working Successful."; else - echo "Failed to fetch Wave Autoscale Core. Response was $response"; + echo "[ERROR] Failed to fetch Wave Autoscale Core. Response was $response"; exit 1; fi # License check - response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/info) + response=$(curl -s http://$API_SERVER/api/info) license=$(echo "$response" | jq -r '.license') if [ "$license" = "{}" ]; then - echo "Wave Autoscale License is invalid: empty map detected."; + echo "[ERROR] Wave Autoscale License is invalid: empty map detected."; exit 1; else echo "Wave Autoscale License is valid."; fi # Core - self statefulset check - response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3024/api/k8s/statefulsets/wave-autoscale/wave-autoscale) + response=$(curl -s http://$API_SERVER/api/k8s/statefulsets/wave-autoscale/wave-autoscale) statefulset_name=$(echo "$response" | jq -r '.k8s_definition.name') if [ "$statefulset_name" = "wave-autoscale" ]; then echo "Wave Autoscale Statefulset working Successful."; else - echo "Failed to fetch Wave Autoscale Statefulset. Response was $response"; + echo "[ERROR] Failed to fetch Wave Autoscale Statefulset. Response was $response"; exit 1; fi # Web Console health check - response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3025/); + response=$(curl -s http://$WEB_SERVER/); redirect_path=$(echo "$response") if [ "$redirect_path" = "/app/k8s" ]; then echo "Wave Autoscale Web Console working Successful."; else - echo "Failed to fetch Wave Autoscale Web Console. Response was $response"; + echo "[ERROR] Failed to fetch Wave Autoscale Web Console. Response was $response"; exit 1; fi # Autopilot health check - response=$(curl -s http://wave-autoscale-svc.wave-autoscale.svc.cluster.local:3026/) + response=$(curl -s http://$ML_SERVER/) name=$(echo "$response" | jq -r '.name') if [ "$name" = "wa-autopilot" ]; then echo "Wave Autoscale Autopilot working Successful."; else - echo "Failed to fetch Wave Autoscale Autopilot. Response was $response"; + echo "[ERROR] Failed to fetch Wave Autoscale Autopilot. Response was $response"; exit 1; fi # - # Autopilot behavior check + # Autopilot scaleout check # - echo "Start checking Autopilot behavior"; + echo " ## Start checking scaleout"; # 1. Agent Running Check AGENT_POD_LIST=$(kubectl get pods -n wave-autoscale -l app.kubernetes.io/name=wave-autoscale-agent -o jsonpath='{.items[*].metadata.name}') @@ -91,13 +95,183 @@ spec: fi done if [ "$AGENT_ALL_RUNNING" = true ]; then - echo "All Agent Pods in DaemonSet '$DAEMONSET_NAME' are running." + echo "All Agent Pods in DaemonSet are running." + else + echo "[ERROR] Some Agent Pods in DaemonSet are not running." + exit 1; + fi + + # 2. K8s agent setting save + response=$(curl -s -X POST -H "Content-Type: application/json" -d \ + '{ + "metrics_source": { + "kind": "wave-agent", + "wave_agent": { + "interval_seconds": 10 + } + } + }' http://$API_SERVER/api/k8s/config) + kind=$(echo "$response" | jq -r '.metrics_source.kind') + if [ "$kind" = "wave-agent" ]; then + echo "K8s agent setting save successful." + else + echo "[ERROR] Failed to save K8s agent setting. Response was $response"; + exit 1; + fi + + # 3. test deployment autopilot on + # 3.1. check the current setting before saving + response=$(curl -s http://$API_SERVER/api/k8s/deployments/wave-autoscale/wa-test-dp) + deployment_data=$(echo "$response" | jq -r '.k8s_deployment') + if [ "$deployment_data" = "null" ]; then + response=$(curl -s -X POST -H "Content-Type: application/json" -d \ + '{ + "k8s_deployment": { + "enabled":true, + "autopilot_enabled":true + }, + "k8s_deployment_config": { + "config_type":"default", + "strategy":"performance", + "application_type":"cpu_intensive", + "load":"network_in", + "forecast_horizon":1, + "min_replicas":1, + "max_replicas":3, + "fallback_cpu_utilization":50, + "threshold":{}, + "stabilization_window_seconds":60 + } + }' http://$API_SERVER/api/k8s/deployments/wave-autoscale/wa-test-dp/default) + if [ "$response" = "{}" ]; then + echo "Test deployment autopilot on successful."; + else + echo "[ERROR] Failed to turn on test deployment autopilot. Response was $response"; + exit 1; + fi + fi + + # 3. check test K8s Deployment replicas 1 (min replicas) + DEPLOYMENT_REPLICAS=$(kubectl get deployment wa-test-dp -n wave-autoscale -o jsonpath='{.status.availableReplicas}') + if [ "$DEPLOYMENT_REPLICAS" -eq 1 ]; then + echo "Test deployment replicas is $DEPLOYMENT_REPLICAS"; + else + echo "[ERROR] The test deployment replica must be the minimum replica. Current replicas: $DEPLOYMENT_REPLICAS"; + exit 1; + fi + + # 4. load test for test deployment + echo "Triggering K6 load test" > /shared/trigger; + echo "Waiting for 40 seconds..."; + sleep 40 + + # 5. check test K8s Deployment replicas + DEPLOYMENT_REPLICAS=$(kubectl get deployment wa-test-dp -n wave-autoscale -o jsonpath='{.status.availableReplicas}') + if [ "$DEPLOYMENT_REPLICAS" -gt 1 ]; then + echo "Test deployment replicas are scaled up. Current replicas: $DEPLOYMENT_REPLICAS"; else - echo "Some Agent Pods in DaemonSet '$DAEMONSET_NAME' are not running." - exit 1 + echo "[ERROR] Failed to scale up test deployment replicas. Current replicas: $DEPLOYMENT_REPLICAS"; + exit 1; fi exit 0; + volumeMounts: + - name: shared-volume + mountPath: /shared + - name: k6-container + image: grafana/k6:0.55.0 + ports: + - containerPort: 3030 + command: + - /bin/sh + - -c + - | + echo "Starting K6 container"; + while true; do + if [ -f /shared/trigger ]; then + echo "Load test triggered!"; + rm /shared/trigger; + + k6 run - < <(echo " + import http from 'k6/http'; + export let options = { + vus: 10, + duration: '30s', + }; + const payload = JSON.stringify( + [ + { + 'id': 1, + 'cpu-bound': 12 + } + ] + ); + const params = { + headers: { + 'Content-Type': 'application/json', + } + }; + export default function () { + http.post('http://wa-test-svc.wave-autoscale.svc.cluster.local:3100/process', payload, params); + }") + break; + fi + sleep 1; + done + exit 0; + volumeMounts: + - name: shared-volume + mountPath: /shared restartPolicy: Never + volumes: + - name: shared-volume + emptyDir: {} backoffLimit: 1 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: wave-autoscale + name: wa-test-dp + labels: + app: wa-test-dp +spec: + replicas: 1 + selector: + matchLabels: + app: wa-test-dp + template: + metadata: + labels: + app: wa-test-dp + spec: + containers: + - name: wa-test-dp + image: public.ecr.aws/wave-autoscale/wa-simulation-app:latest + imagePullPolicy: Always + env: + - name: PORT + value: "3100" + resources: + requests: + cpu: "250m" + memory: "256Mi" + ports: + - containerPort: 3100 +--- +apiVersion: v1 +kind: Service +metadata: + namespace: wave-autoscale + name: wa-test-svc + labels: + app: wa-test-dp +spec: + selector: + app: wa-test-dp + ports: + - protocol: "TCP" + port: 3100 + targetPort: 3100 + name: wa-test-dp