From 101b30626887e5f497faeb071523115dfaec6e1f Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Fri, 14 Jun 2024 11:52:07 +0100 Subject: [PATCH 01/27] added script --- .../workflows/verify-dashboards-alerts.yaml | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 .github/workflows/verify-dashboards-alerts.yaml diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml new file mode 100644 index 000000000..d0729ce4a --- /dev/null +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -0,0 +1,106 @@ +name: Verify Dashboards and Alerts OK + +on: + push: + paths: + # Dashboards + - examples/dashboards/app_developer.json + - examples/dashboards/business_user.json + - examples/dashboards/platform_engineer.json + # Alerts + - examples/alerts/prometheusrules_policies_missing.yaml + - examples/alerts/slo-availability.yaml + - examples/alerts/slo-latency.yaml + +jobs: + verify-dashboards-alerts: + name: Verify Dashboards and Alerts OK + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v2 + - name: Run Quickstart + run: | + bash ./hack/quickstart.sh + - name: Port forward grafana and check if contains dashboards. + run: | + # Port forward Grafana + kubectl -n monitoring port-forward service/grafana 3000:3000 & + grafana_process_id=$! + echo "Successfully port forwarded Grafana service." + + # Make API Call and save response to variable. + grafana_api_call=$(curl http://admin@admin:127.0.0.1:3000/api/search) + + # Compare the content in json file with field containing dashboard names + + app_developer=$(jq -r '.description' examples/dashboards/app_developer.json) + business_user=$(jq -r '.description' examples/dashboards/business_user.json) + platform_engineer=$(jq -r '.description' examples/dashboards/platform_engineer.json) + + declare -a missing_dashboards=() + + if [[ "$grafana_api_call" != *"$app_developer"* ]]; then + echo "Grafana does not have $app_developer dashboard." + missing_dashboards+=("$app_developer") + elif [[ "$grafana_api_call" != *"$business_user"* ]]; then + echo "Grafana does not have $business_user dashboard." + missing_dashboards+=("$app_developer") + elif [[ "$grafana_api_call" != *"$platform_engineer"*]]; then + echo "Grafana does not have $platform_engineer dashboard." + missing_dashboards+=("$app_developer") + fi + + if [ ${#missing_dashboards[@]} -gt 0 ]; then + echo "Grafana is missing the following dashboards:" + printf '%s\n' "${missing_dashboards[@]}" + echo "Exiting..." + exit 1 + fi + + echo "Grafana contains dashboards $app_developer, $business_user and $platform_engineer. Continuing to Prometheus..." + + # Close Grafana port-forward. + kill $grafana_process_id + echo "Stoppped port forwarding Grafana." + - name: Port forward Prometheus and check if contains alert rules. + run: | + # Port forward Prometheus + kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & + prometheus_process_id=$! + echo "Successfully port forwarded Prometheus service." + + # Make API Call and save response to variable + prometheus_api_call=$(curl http://localhost:9090/api/v1/rules) + + # Compare the content in json file with field containing dashboard names. + + prometheusrules_policies_missing_alerts=$(yq e '.spec.groups[].rules[].alert' examples/alerts/prometheusrules_policies_missing.yaml) + slo_availability_alerts=$(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-availability.yaml) + slo_latency_alerts=$(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-latency.yaml) + + combined_alerts=("${prometheusrules_policies_missing_alerts[@]}" "${slo_availability_alerts[@]}" "${slo_latency_alerts[@]}") + + declare -a missing_alerts=() + + for alert in "${combined_alerts[@]}"; do + if [[ "$substring" != *"$alert"* ]]; then + echo "Prometheus does not have $alert rule." + missing_alerts+=("$alert") + fi + fi + + if [ ${#missing_alerts[@]} -gt 0 ]; then + echo "Prometheus is missing the following alerts:" + printf '%s\n' "${missing_alerts[@]}" + echo "Exiting..." + exit 1 + fi + + echo "Prometheus has all alert rules." + + # Close Prometheus port-forward + kill prometheus_process_id=$! + echo "Stopped port forwarding Prometheus." \ No newline at end of file From 7256b63adf855a99b3917ac5029b610fff192624 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Fri, 14 Jun 2024 15:36:00 +0100 Subject: [PATCH 02/27] added faulty code to github action --- .../workflows/verify-dashboards-alerts.yaml | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index d0729ce4a..e4946bd6d 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -20,10 +20,51 @@ jobs: run: shell: bash steps: + - name: Install expect + run: | + sudo apt-get update + sudo apt-get install expect -y + - name: Set up Docker + run: | + sudo apt-get update + sudo apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg-agent \ + software-properties-common + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + sudo add-apt-repository -y \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io + sudo usermod -aG docker $USER + - name: Install kind + run: | + sudo curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64 + sudo chmod +x ./kind + sudo mv ./kind /usr/local/bin/kind + - name: Install kubectl + run: | + sudo curl -LO https://dl.k8s.io/release/v1.30.0/bin/linux/amd64/kubectl + sudo chmod +x ./kubectl + sudo mv ./kubectl /usr/local/bin/kubectl - uses: actions/checkout@v2 - name: Run Quickstart run: | - bash ./hack/quickstart.sh + cat << 'EOF' > automate_setup.expect + #!/usr/bin/expect -f + spawn bash hack/quickstart-setup.sh + expect "Are you ready to begin? (y/n)" + send "y\r" + expect "Do you want to set up a DNS provider for use with Kuadrant's DNSPolicy API? (y/n)" + send "n\r" + expect eof + EOF + chmod +x automate_setup.expect + ./automate_setup.expect - name: Port forward grafana and check if contains dashboards. run: | # Port forward Grafana From 71ec54140ca1d2d46d735faf6862a679a5729944 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Mon, 17 Jun 2024 16:12:31 +0100 Subject: [PATCH 03/27] switched to podman (limited success) --- .../workflows/verify-dashboards-alerts.yaml | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index e4946bd6d..df577ac85 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -24,23 +24,11 @@ jobs: run: | sudo apt-get update sudo apt-get install expect -y - - name: Set up Docker + - name: Set up Podman run: | - sudo apt-get update - sudo apt-get install -y \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - sudo add-apt-repository -y \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - sudo apt-get update - sudo apt-get install -y docker-ce docker-ce-cli containerd.io - sudo usermod -aG docker $USER + curl -LO https://github.com/mgoltzsche/podman-static/releases/download/v4.9.5/podman-linux-amd64.tar.gz + tar -xzf podman-linux-amd64.tar.gz + sudo cp -r podman-linux-amd64/usr podman-linux-amd64/etc / - name: Install kind run: | sudo curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64 @@ -54,6 +42,9 @@ jobs: - uses: actions/checkout@v2 - name: Run Quickstart run: | + rm /usr/bin/docker + + sudo podman pull docker.io/kindest/node:v1.29.2 cat << 'EOF' > automate_setup.expect #!/usr/bin/expect -f spawn bash hack/quickstart-setup.sh From 6c8678e95a7d027de0174b99877ecbc5b4320209 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Tue, 18 Jun 2024 11:32:20 +0100 Subject: [PATCH 04/27] changed to self-hosted and removed install for packages --- .../workflows/verify-dashboards-alerts.yaml | 26 +++---------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index df577ac85..4ab5b7791 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -2,6 +2,8 @@ name: Verify Dashboards and Alerts OK on: push: + branches: + - 444-add-a-gh-action-for-deploying-and-verifying-dashboards-alerts-load-ok-with-quickstart paths: # Dashboards - examples/dashboards/app_developer.json @@ -15,36 +17,14 @@ on: jobs: verify-dashboards-alerts: name: Verify Dashboards and Alerts OK - runs-on: ubuntu-latest + runs-on: self-hosted defaults: run: shell: bash steps: - - name: Install expect - run: | - sudo apt-get update - sudo apt-get install expect -y - - name: Set up Podman - run: | - curl -LO https://github.com/mgoltzsche/podman-static/releases/download/v4.9.5/podman-linux-amd64.tar.gz - tar -xzf podman-linux-amd64.tar.gz - sudo cp -r podman-linux-amd64/usr podman-linux-amd64/etc / - - name: Install kind - run: | - sudo curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64 - sudo chmod +x ./kind - sudo mv ./kind /usr/local/bin/kind - - name: Install kubectl - run: | - sudo curl -LO https://dl.k8s.io/release/v1.30.0/bin/linux/amd64/kubectl - sudo chmod +x ./kubectl - sudo mv ./kubectl /usr/local/bin/kubectl - uses: actions/checkout@v2 - name: Run Quickstart run: | - rm /usr/bin/docker - - sudo podman pull docker.io/kindest/node:v1.29.2 cat << 'EOF' > automate_setup.expect #!/usr/bin/expect -f spawn bash hack/quickstart-setup.sh From 6f4d502d8cf0aab6caa2d920b39cb8d009df96fa Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Tue, 18 Jun 2024 11:37:00 +0100 Subject: [PATCH 05/27] test push --- .github/workflows/verify-dashboards-alerts.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 4ab5b7791..887594121 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -25,6 +25,7 @@ jobs: - uses: actions/checkout@v2 - name: Run Quickstart run: | + cat << 'EOF' > automate_setup.expect #!/usr/bin/expect -f spawn bash hack/quickstart-setup.sh From a41016e2716e2b04230857197cf9a4062d1ea25a Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Tue, 18 Jun 2024 11:49:23 +0100 Subject: [PATCH 06/27] changed trigger to pull request + fix typos --- .../workflows/verify-dashboards-alerts.yaml | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 887594121..3a2e61786 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -1,18 +1,15 @@ name: Verify Dashboards and Alerts OK -on: - push: - branches: - - 444-add-a-gh-action-for-deploying-and-verifying-dashboards-alerts-load-ok-with-quickstart - paths: - # Dashboards - - examples/dashboards/app_developer.json - - examples/dashboards/business_user.json - - examples/dashboards/platform_engineer.json - # Alerts - - examples/alerts/prometheusrules_policies_missing.yaml - - examples/alerts/slo-availability.yaml - - examples/alerts/slo-latency.yaml +on: push + # paths: + # # Dashboards + # - examples/dashboards/app_developer.json + # - examples/dashboards/business_user.json + # - examples/dashboards/platform_engineer.json + # # Alerts + # - examples/alerts/prometheusrules_policies_missing.yaml + # - examples/alerts/slo-availability.yaml + # - examples/alerts/slo-latency.yaml jobs: verify-dashboards-alerts: @@ -25,48 +22,58 @@ jobs: - uses: actions/checkout@v2 - name: Run Quickstart run: | - + kind delete clusters kuadrant-local + kind delete clusters kuadrant-local + + export ISTIO_INSTALL_SAIL=true + cat << 'EOF' > automate_setup.expect #!/usr/bin/expect -f + set timeout -1 spawn bash hack/quickstart-setup.sh expect "Are you ready to begin? (y/n)" send "y\r" expect "Do you want to set up a DNS provider for use with Kuadrant's DNSPolicy API? (y/n)" send "n\r" - expect eof + expect "Thank you for using Kuadrant! If you have any questions or feedback, please reach out to our community." EOF chmod +x automate_setup.expect ./automate_setup.expect - name: Port forward grafana and check if contains dashboards. run: | # Port forward Grafana + kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=300s kubectl -n monitoring port-forward service/grafana 3000:3000 & grafana_process_id=$! echo "Successfully port forwarded Grafana service." + sleep 1 + # Make API Call and save response to variable. - grafana_api_call=$(curl http://admin@admin:127.0.0.1:3000/api/search) + grafana_api_call=$(curl -u admin:admin http://127.0.0.1:3000/api/search) # Compare the content in json file with field containing dashboard names - app_developer=$(jq -r '.description' examples/dashboards/app_developer.json) - business_user=$(jq -r '.description' examples/dashboards/business_user.json) - platform_engineer=$(jq -r '.description' examples/dashboards/platform_engineer.json) + app_developer=$(jq -r '.panels[1].title' examples/dashboards/app_developer.json) + business_user=$(jq -r '.panels[1].title' examples/dashboards/business_user.json) + platform_engineer=$(jq -r '.panels[1].title' examples/dashboards/platform_engineer.json) declare -a missing_dashboards=() if [[ "$grafana_api_call" != *"$app_developer"* ]]; then echo "Grafana does not have $app_developer dashboard." missing_dashboards+=("$app_developer") - elif [[ "$grafana_api_call" != *"$business_user"* ]]; then + fi + if [[ "$grafana_api_call" != *"$business_user"* ]]; then echo "Grafana does not have $business_user dashboard." - missing_dashboards+=("$app_developer") - elif [[ "$grafana_api_call" != *"$platform_engineer"*]]; then + missing_dashboards+=("$business_user") + fi + if [[ "$grafana_api_call" != *"$platform_engineer"* ]]; then echo "Grafana does not have $platform_engineer dashboard." - missing_dashboards+=("$app_developer") + missing_dashboards+=("$platform_engineer") fi - if [ ${#missing_dashboards[@]} -gt 0 ]; then + if [[ ${#missing_dashboards[@]} -gt 0 ]]; then echo "Grafana is missing the following dashboards:" printf '%s\n' "${missing_dashboards[@]}" echo "Exiting..." From fa550471344a7ca4141c4d72e49710c63222cc62 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Wed, 19 Jun 2024 15:20:41 +0100 Subject: [PATCH 07/27] fixed typos + added wait for prometheus changed deployment to prometheus adapter changed deployment to blackbox-operator changed wait condition to prometheus k8s pod --- .github/workflows/verify-dashboards-alerts.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 3a2e61786..d123b1afa 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -42,7 +42,7 @@ jobs: - name: Port forward grafana and check if contains dashboards. run: | # Port forward Grafana - kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=300s + kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=600s kubectl -n monitoring port-forward service/grafana 3000:3000 & grafana_process_id=$! echo "Successfully port forwarded Grafana service." @@ -87,11 +87,14 @@ jobs: echo "Stoppped port forwarding Grafana." - name: Port forward Prometheus and check if contains alert rules. run: | + kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s # Port forward Prometheus kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & prometheus_process_id=$! echo "Successfully port forwarded Prometheus service." + sleep 1 + # Make API Call and save response to variable prometheus_api_call=$(curl http://localhost:9090/api/v1/rules) @@ -112,7 +115,7 @@ jobs: fi fi - if [ ${#missing_alerts[@]} -gt 0 ]; then + if [[ ${#missing_alerts[@]} -gt 0 ]]; then echo "Prometheus is missing the following alerts:" printf '%s\n' "${missing_alerts[@]}" echo "Exiting..." From 83bb3f3f8ebf9aed3a1dfa465df6b422cfa7471b Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Thu, 20 Jun 2024 08:55:51 +0100 Subject: [PATCH 08/27] added alias for sudo podman fix for loop syntax fixed string declaration errors and array read issues fixed pid declaration --- .github/workflows/verify-dashboards-alerts.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index d123b1afa..481749183 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -26,6 +26,7 @@ jobs: kind delete clusters kuadrant-local export ISTIO_INSTALL_SAIL=true + alias podman='sudo podman' cat << 'EOF' > automate_setup.expect #!/usr/bin/expect -f @@ -100,20 +101,20 @@ jobs: # Compare the content in json file with field containing dashboard names. - prometheusrules_policies_missing_alerts=$(yq e '.spec.groups[].rules[].alert' examples/alerts/prometheusrules_policies_missing.yaml) - slo_availability_alerts=$(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-availability.yaml) - slo_latency_alerts=$(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-latency.yaml) + readarray -t prometheusrules_policies_missing_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/prometheusrules_policies_missing.yaml) + readarray -t slo_availability_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-availability.yaml) + readarray -t slo_latency_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-latency.yaml) combined_alerts=("${prometheusrules_policies_missing_alerts[@]}" "${slo_availability_alerts[@]}" "${slo_latency_alerts[@]}") declare -a missing_alerts=() for alert in "${combined_alerts[@]}"; do - if [[ "$substring" != *"$alert"* ]]; then + if [[ "$prometheus_api_call" != *"$alert"* && "$alert" != "null" ]]; then echo "Prometheus does not have $alert rule." missing_alerts+=("$alert") fi - fi + done if [[ ${#missing_alerts[@]} -gt 0 ]]; then echo "Prometheus is missing the following alerts:" @@ -125,5 +126,5 @@ jobs: echo "Prometheus has all alert rules." # Close Prometheus port-forward - kill prometheus_process_id=$! + kill $prometheus_process_id echo "Stopped port forwarding Prometheus." \ No newline at end of file From c3b9691bc965d35dab00c54014f3b17f5dc3cb02 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Thu, 20 Jun 2024 14:59:25 +0100 Subject: [PATCH 09/27] added files to .gitignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index eea81dd3d..1d91bfee1 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,8 @@ tmp /coverage/ # Vendor dependencies -vendor \ No newline at end of file +vendor +.terraform +terraform.* +.terraform.* +*.pem From 9796180af816ab2c0f12a52765269e24527b0b6f Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Thu, 20 Jun 2024 15:02:18 +0100 Subject: [PATCH 10/27] added terraform file for self-hosted-runner better implementation of tf script for use with github actions and secrets --- self-hosted-runner.tf | 99 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 self-hosted-runner.tf diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf new file mode 100644 index 000000000..a5a3921d0 --- /dev/null +++ b/self-hosted-runner.tf @@ -0,0 +1,99 @@ +provider "aws" { + region = "eu-west-1" + access_key = var.aws_access_key + secret_key = var.aws_secret_key +} + +resource "aws_instance" "example" { + ami = "ami-0776c814353b4814d" + instance_type = "t2.xlarge" + + root_block_device { + volume_size = 16 // GB + } + + key_name = var.aws_key_name + + tags = { + Name = "kuadrant-operator-self-hosted-runner" + } + + // Security Group for SSH, HTTP, and HTTPS access + security_groups = ["ssh-http-https-access"] + + user_data = <<-EOL + #!/bin/bash + echo "Starting user_data script..." + sudo apt-get update -y + sudo apt-get install -y podman golang + curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/1.30.0/2024-05-12/bin/linux/amd64/kubectl + chmod +x ./kubectl + mkdir -p /home/ubuntu/bin && cp ./kubectl /home/ubuntu/bin/kubectl + echo 'alias podman="sudo podman"' >> /home/ubuntu/.bashrc + echo export PATH=/home/ubuntu/bin:/home/ubuntu/go/pkg/mod/bin:$PATH >> /home/ubuntu/.bashrc + source /home/ubuntu/.bashrc + export GOMODCACHE=/home/ubuntu/go + export GOPATH=/home/ubuntu/go/pkg/mod + export GOCACHE=/home/ubuntu/.cache/go-build + export HOME=/home/ubuntu + go install sigs.k8s.io/kind@v0.23.0 + source /home/ubuntu/.bashrc + cd /home/ubuntu + git clone https://www.github.com/kuadrant/kuadrant-operator.git + echo 'unqualified-search-registries = ["docker.io"]' | sudo tee -a /etc/containers/registries.conf + sudo chmod 7777 kuadrant-operator/hack + echo "user_data script execution completed." + touch /tmp/user_data_done + EOL +} + +resource "aws_security_group" "ssh_http_https_access" { + name = "ssh-http-https-access" + description = "Allow SSH, HTTP, and HTTPS access" + + // Ingress rule for SSH access + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] // Allow SSH access from anywhere + } + + // Ingress rule for HTTP access (port 80) + ingress { + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] // Allow HTTP access from anywhere + } + + // Ingress rule for HTTPS access (port 443) + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] // Allow HTTPS access from anywhere + } + + // Egress rule to allow all outbound traffic + egress { + from_port = 0 + to_port = 0 + protocol = "-1" // Allow all protocols + cidr_blocks = ["0.0.0.0/0"] // Allow outbound traffic to anywhere + } +} + +resource "null_resource" "wait_for_user_data" { + provisioner "local-exec" { + command = < Date: Fri, 21 Jun 2024 09:12:21 +0100 Subject: [PATCH 11/27] added self hosted runner functionality to workflow syntax error another one fixed variable errors passed secrets as variables fixed variable declaration so many issues... added additional changes fixed syntax errors more syntax errors hopefully fixed syntax added public ip to terraform and action fixed env var declare --- .../workflows/verify-dashboards-alerts.yaml | 299 +++++++++++------- self-hosted-runner.tf | 21 ++ 2 files changed, 214 insertions(+), 106 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 481749183..4ea7ee461 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -1,6 +1,7 @@ name: Verify Dashboards and Alerts OK -on: push +on: + push: # paths: # # Dashboards # - examples/dashboards/app_developer.json @@ -12,119 +13,205 @@ on: push # - examples/alerts/slo-latency.yaml jobs: + deploy-register-self-runner: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: hashicorp/setup-terraform@v3 + + - name: Initialize Terraform Environment + run: terraform init + + - name: Create PEM file + run: | + echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem + chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem + + - name: Apply Terraform Configuration + Extract Public IP + run: | + terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} + # Get the public IP address of the instance + echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV + + + - name: Create PEM file + run: | + echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem + chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem + + - name: Generate Registration Token + id: reg_token + run: | + REPO_OWNER=${{ github.repository_owner }} + REPO_NAME=${{ github.event.repository.name }} + + RESPONSE=$(curl -X POST -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/registration-token) + + echo "REGISTRATION_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV + + - name: SSH into EC2 Instance and Register Runner + env: + REGISTRATION_TOKEN: ${{ env.REGISTRATION_TOKEN }} + run: | + ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ec2-user@${{ env.EC2_PUBLIC_IP }} << EOF + mkdir actions-runner && cd actions-runner + curl -o actions-runner-linux-x64-2.281.1.tar.gz -L https://github.com/actions/runner/releases/download/v2.281.1/actions-runner-linux-x64-2.281.1.tar.gz + tar xzf ./actions-runner-linux-x64-2.281.1.tar.gz + ./config.sh --url https://github.com/${{ github.repository }} --token $REGISTRATION_TOKEN --unattended --labels self-hosted,linux,aws + ./svc.sh install + ./svc.sh start + EOF + verify-dashboards-alerts: name: Verify Dashboards and Alerts OK + needs: deploy-register-self-runner runs-on: self-hosted defaults: run: shell: bash steps: - - uses: actions/checkout@v2 - - name: Run Quickstart - run: | - kind delete clusters kuadrant-local - kind delete clusters kuadrant-local - - export ISTIO_INSTALL_SAIL=true - alias podman='sudo podman' - - cat << 'EOF' > automate_setup.expect - #!/usr/bin/expect -f - set timeout -1 - spawn bash hack/quickstart-setup.sh - expect "Are you ready to begin? (y/n)" - send "y\r" - expect "Do you want to set up a DNS provider for use with Kuadrant's DNSPolicy API? (y/n)" - send "n\r" - expect "Thank you for using Kuadrant! If you have any questions or feedback, please reach out to our community." - EOF - chmod +x automate_setup.expect - ./automate_setup.expect - - name: Port forward grafana and check if contains dashboards. - run: | - # Port forward Grafana - kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=600s - kubectl -n monitoring port-forward service/grafana 3000:3000 & - grafana_process_id=$! - echo "Successfully port forwarded Grafana service." - - sleep 1 - - # Make API Call and save response to variable. - grafana_api_call=$(curl -u admin:admin http://127.0.0.1:3000/api/search) - - # Compare the content in json file with field containing dashboard names - - app_developer=$(jq -r '.panels[1].title' examples/dashboards/app_developer.json) - business_user=$(jq -r '.panels[1].title' examples/dashboards/business_user.json) - platform_engineer=$(jq -r '.panels[1].title' examples/dashboards/platform_engineer.json) - - declare -a missing_dashboards=() - - if [[ "$grafana_api_call" != *"$app_developer"* ]]; then - echo "Grafana does not have $app_developer dashboard." - missing_dashboards+=("$app_developer") - fi - if [[ "$grafana_api_call" != *"$business_user"* ]]; then - echo "Grafana does not have $business_user dashboard." - missing_dashboards+=("$business_user") - fi - if [[ "$grafana_api_call" != *"$platform_engineer"* ]]; then - echo "Grafana does not have $platform_engineer dashboard." - missing_dashboards+=("$platform_engineer") - fi - - if [[ ${#missing_dashboards[@]} -gt 0 ]]; then - echo "Grafana is missing the following dashboards:" - printf '%s\n' "${missing_dashboards[@]}" - echo "Exiting..." - exit 1 - fi - - echo "Grafana contains dashboards $app_developer, $business_user and $platform_engineer. Continuing to Prometheus..." - - # Close Grafana port-forward. - kill $grafana_process_id - echo "Stoppped port forwarding Grafana." - - name: Port forward Prometheus and check if contains alert rules. - run: | - kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s - # Port forward Prometheus - kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & - prometheus_process_id=$! - echo "Successfully port forwarded Prometheus service." - - sleep 1 - - # Make API Call and save response to variable - prometheus_api_call=$(curl http://localhost:9090/api/v1/rules) - - # Compare the content in json file with field containing dashboard names. - - readarray -t prometheusrules_policies_missing_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/prometheusrules_policies_missing.yaml) - readarray -t slo_availability_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-availability.yaml) - readarray -t slo_latency_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-latency.yaml) - - combined_alerts=("${prometheusrules_policies_missing_alerts[@]}" "${slo_availability_alerts[@]}" "${slo_latency_alerts[@]}") - - declare -a missing_alerts=() - - for alert in "${combined_alerts[@]}"; do - if [[ "$prometheus_api_call" != *"$alert"* && "$alert" != "null" ]]; then - echo "Prometheus does not have $alert rule." - missing_alerts+=("$alert") + - uses: actions/checkout@v2 + + - name: Run Quickstart + run: | + kind delete clusters kuadrant-local + kind delete clusters kuadrant-local + + export ISTIO_INSTALL_SAIL=true + alias podman='sudo podman' + + cat << 'EOF' > automate_setup.expect + #!/usr/bin/expect -f + set timeout -1 + spawn bash hack/quickstart-setup.sh + expect "Are you ready to begin? (y/n)" + send "y\r" + expect "Do you want to set up a DNS provider for use with Kuadrant's DNSPolicy API? (y/n)" + send "n\r" + expect "Thank you for using Kuadrant! If you have any questions or feedback, please reach out to our community." + EOF + chmod +x automate_setup.expect + ./automate_setup.expect + + - name: Port forward grafana and check if contains dashboards. + run: | + # Port forward Grafana + kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=600s + kubectl -n monitoring port-forward service/grafana 3000:3000 & + grafana_process_id=$! + echo "Successfully port forwarded Grafana service." + + sleep 1 + + # Make API Call and save response to variable. + grafana_api_call=$(curl -u admin:admin http://127.0.0.1:3000/api/search) + + # Compare the content in json file with field containing dashboard names + + app_developer=$(jq -r '.panels[1].title' examples/dashboards/app_developer.json) + business_user=$(jq -r '.panels[1].title' examples/dashboards/business_user.json) + platform_engineer=$(jq -r '.panels[1].title' examples/dashboards/platform_engineer.json) + + declare -a missing_dashboards=() + + if [[ "$grafana_api_call" != *"$app_developer"* ]]; then + echo "Grafana does not have $app_developer dashboard." + missing_dashboards+=("$app_developer") + fi + if [[ "$grafana_api_call" != *"$business_user"* ]]; then + echo "Grafana does not have $business_user dashboard." + missing_dashboards+=("$business_user") + fi + if [[ "$grafana_api_call" != *"$platform_engineer"* ]]; then + echo "Grafana does not have $platform_engineer dashboard." + missing_dashboards+=("$platform_engineer") + fi + + if [[ ${#missing_dashboards[@]} -gt 0 ]]; then + echo "Grafana is missing the following dashboards:" + printf '%s\n' "${missing_dashboards[@]}" + echo "Exiting..." + exit 1 + fi + + echo "Grafana contains dashboards $app_developer, $business_user and $platform_engineer. Continuing to Prometheus..." + + # Close Grafana port-forward. + kill $grafana_process_id + echo "Stoppped port forwarding Grafana." + + - name: Port forward Prometheus and check if contains alert rules. + run: | + kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s + # Port forward Prometheus + kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & + prometheus_process_id=$! + echo "Successfully port forwarded Prometheus service." + + sleep 1 + + # Make API Call and save response to variable + prometheus_api_call=$(curl http://localhost:9090/api/v1/rules) + + # Compare the content in json file with field containing dashboard names. + + readarray -t prometheusrules_policies_missing_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/prometheusrules_policies_missing.yaml) + readarray -t slo_availability_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-availability.yaml) + readarray -t slo_latency_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-latency.yaml) + + combined_alerts=("${prometheusrules_policies_missing_alerts[@]}" "${slo_availability_alerts[@]}" "${slo_latency_alerts[@]}") + + declare -a missing_alerts=() + + for alert in "${combined_alerts[@]}"; do + if [[ "$prometheus_api_call" != *"$alert"* && "$alert" != "null" ]]; then + echo "Prometheus does not have $alert rule." + missing_alerts+=("$alert") + fi + done + + if [[ ${#missing_alerts[@]} -gt 0 ]]; then + echo "Prometheus is missing the following alerts:" + printf '%s\n' "${missing_alerts[@]}" + echo "Exiting..." + exit 1 fi - done - if [[ ${#missing_alerts[@]} -gt 0 ]]; then - echo "Prometheus is missing the following alerts:" - printf '%s\n' "${missing_alerts[@]}" - echo "Exiting..." - exit 1 - fi + echo "Prometheus has all alert rules." - echo "Prometheus has all alert rules." + # Close Prometheus port-forward + kill $prometheus_process_id + echo "Stopped port forwarding Prometheus." - # Close Prometheus port-forward - kill $prometheus_process_id - echo "Stopped port forwarding Prometheus." \ No newline at end of file + deregister-runner: + runs-on: ubuntu-latest + needs: verify-dashboards-alerts + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Create PEM file + run: | + echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem + chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem + + - name: Generate Remove Token + id: remove_token + run: | + REPO_OWNER=${{ github.repository_owner }} + REPO_NAME=${{ github.event.repository.name }} + + RUNNER_ID=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners | jq '.runners[] | select(.name=="self-hosted") | .id') + + curl -X DELETE -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID + + - name: Terraform Destroy + run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf index a5a3921d0..1809829e4 100644 --- a/self-hosted-runner.tf +++ b/self-hosted-runner.tf @@ -4,6 +4,21 @@ provider "aws" { secret_key = var.aws_secret_key } +variable "aws_access_key" { + description = "AWS Access Key" + type = string +} + +variable "aws_secret_key" { + description = "AWS Secret Key" + type = string +} + +variable "aws_key_name" { + description = "AWS Key Name" + type = string +} + resource "aws_instance" "example" { ami = "ami-0776c814353b4814d" instance_type = "t2.xlarge" @@ -47,6 +62,8 @@ resource "aws_instance" "example" { EOL } + + resource "aws_security_group" "ssh_http_https_access" { name = "ssh-http-https-access" description = "Allow SSH, HTTP, and HTTPS access" @@ -96,4 +113,8 @@ resource "null_resource" "wait_for_user_data" { } depends_on = [aws_instance.example] +} + +output "instance_public_ip" { + value = aws_instance.example.public_ip } \ No newline at end of file From 0cee245b7e457c7d2e506609babac96ca7e27fae Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Fri, 21 Jun 2024 10:58:18 +0100 Subject: [PATCH 12/27] deregister-runner no longer needs a job fix for ssh issue moved step created ssh dir and touched new file a b changed perms to 400 changed user to ubuntu added additional package to install for gh runner install libicu changed url check for what the url is another one echo repo changed repo name to static a changed auth type B added permissions for github token e ee e changed from github token to secret --- .../workflows/verify-dashboards-alerts.yaml | 31 ++++++++----------- self-hosted-runner.tf | 2 +- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 4ea7ee461..189494016 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -11,7 +11,6 @@ on: # - examples/alerts/prometheusrules_policies_missing.yaml # - examples/alerts/slo-availability.yaml # - examples/alerts/slo-latency.yaml - jobs: deploy-register-self-runner: runs-on: ubuntu-latest @@ -26,7 +25,7 @@ jobs: - name: Create PEM file run: | echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem - chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem + chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem - name: Apply Terraform Configuration + Extract Public IP run: | @@ -34,21 +33,16 @@ jobs: # Get the public IP address of the instance echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV - - - name: Create PEM file - run: | - echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem - chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem - - name: Generate Registration Token id: reg_token run: | REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} - - RESPONSE=$(curl -X POST -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ - -H "Accept: application/vnd.github.v3+json" \ + RESPONSE=$(curl -L \ + -X POST \ + -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/registration-token) + echo $RESPONSE echo "REGISTRATION_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV @@ -56,13 +50,15 @@ jobs: env: REGISTRATION_TOKEN: ${{ env.REGISTRATION_TOKEN }} run: | - ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ec2-user@${{ env.EC2_PUBLIC_IP }} << EOF + sudo apt-get update -y + sudo apt-get install -y libicu-dev + ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ubuntu@${{ env.EC2_PUBLIC_IP }} << EOF mkdir actions-runner && cd actions-runner - curl -o actions-runner-linux-x64-2.281.1.tar.gz -L https://github.com/actions/runner/releases/download/v2.281.1/actions-runner-linux-x64-2.281.1.tar.gz - tar xzf ./actions-runner-linux-x64-2.281.1.tar.gz - ./config.sh --url https://github.com/${{ github.repository }} --token $REGISTRATION_TOKEN --unattended --labels self-hosted,linux,aws - ./svc.sh install - ./svc.sh start + curl -o actions-runner-linux-x64-2.317.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz + tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz + + ./config.sh --url https://github.com/${{ github.repository_owner }}/${{ github.event.repository.name }} --token $REGISTRATION_TOKEN --unattended --labels self-hosted,linux,aws + ./run.sh EOF verify-dashboards-alerts: @@ -189,7 +185,6 @@ jobs: deregister-runner: runs-on: ubuntu-latest needs: verify-dashboards-alerts - steps: - name: Checkout repository uses: actions/checkout@v2 diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf index 1809829e4..808801c6c 100644 --- a/self-hosted-runner.tf +++ b/self-hosted-runner.tf @@ -40,7 +40,7 @@ resource "aws_instance" "example" { #!/bin/bash echo "Starting user_data script..." sudo apt-get update -y - sudo apt-get install -y podman golang + sudo apt-get install -y podman golang libicu-dev curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/1.30.0/2024-05-12/bin/linux/amd64/kubectl chmod +x ./kubectl mkdir -p /home/ubuntu/bin && cp ./kubectl /home/ubuntu/bin/kubectl From fb7b5aca19ccee4959806acb358c31c2518421e9 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Fri, 21 Jun 2024 15:29:38 +0100 Subject: [PATCH 13/27] ran command in background added nohup hopefully fix :D source bashrc changed shell prefs fixed typo install expect and change perms for user in action adding chown because i am a clown :D install yq because y not remove classical flag a --- .github/workflows/verify-dashboards-alerts.yaml | 11 +++++------ self-hosted-runner.tf | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 189494016..823b05c88 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -42,7 +42,6 @@ jobs: -X POST \ -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/registration-token) - echo $RESPONSE echo "REGISTRATION_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV @@ -58,7 +57,7 @@ jobs: tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz ./config.sh --url https://github.com/${{ github.repository_owner }}/${{ github.event.repository.name }} --token $REGISTRATION_TOKEN --unattended --labels self-hosted,linux,aws - ./run.sh + nohup ./run.sh > nohup.out 2> nohup.err < /dev/null & EOF verify-dashboards-alerts: @@ -67,17 +66,17 @@ jobs: runs-on: self-hosted defaults: run: - shell: bash + shell: bash -ieo pipefail {0} steps: - uses: actions/checkout@v2 - name: Run Quickstart run: | + sudo chown $(whoami) . kind delete clusters kuadrant-local kind delete clusters kuadrant-local export ISTIO_INSTALL_SAIL=true - alias podman='sudo podman' cat << 'EOF' > automate_setup.expect #!/usr/bin/expect -f @@ -200,11 +199,11 @@ jobs: REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} - RUNNER_ID=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + RUNNER_ID=$(curl -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners | jq '.runners[] | select(.name=="self-hosted") | .id') - curl -X DELETE -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + curl -X DELETE -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf index 808801c6c..611c5ebb4 100644 --- a/self-hosted-runner.tf +++ b/self-hosted-runner.tf @@ -40,7 +40,8 @@ resource "aws_instance" "example" { #!/bin/bash echo "Starting user_data script..." sudo apt-get update -y - sudo apt-get install -y podman golang libicu-dev + sudo apt-get install -y podman golang libicu-dev expect + sudo snap install yq curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/1.30.0/2024-05-12/bin/linux/amd64/kubectl chmod +x ./kubectl mkdir -p /home/ubuntu/bin && cp ./kubectl /home/ubuntu/bin/kubectl From 37b517b4a92608331294ba0c8876ed5599306e39 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Mon, 24 Jun 2024 16:08:15 +0100 Subject: [PATCH 14/27] have deregister run if previous job fails attempt to use statefile new changes *E* a --- .../workflows/verify-dashboards-alerts.yaml | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 823b05c88..e9a80a989 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -29,9 +29,15 @@ jobs: - name: Apply Terraform Configuration + Extract Public IP run: | - terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} + terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} -state=statefile # Get the public IP address of the instance echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV + + - name: Upload statefile for use further down + uses: actions/upload-artifact@v4 + with: + name: statefile + path: statefile - name: Generate Registration Token id: reg_token @@ -184,10 +190,21 @@ jobs: deregister-runner: runs-on: ubuntu-latest needs: verify-dashboards-alerts + if: always() steps: - name: Checkout repository uses: actions/checkout@v2 + - uses: hashicorp/setup-terraform@v3 + + - name: Initialize Terraform Environment + run: terraform init + + - name: Download statefile to teardown resources + uses: actions/download-artifact@v4 + with: + name: statefile + - name: Create PEM file run: | echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem @@ -208,4 +225,4 @@ jobs: https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID - name: Terraform Destroy - run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} + run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} -state=statefile From 6a08c43808535ead6089cd43ec1aa0aa2f2c0e34 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Tue, 25 Jun 2024 10:34:08 +0100 Subject: [PATCH 15/27] terraform.tfstate o . o a a --- .github/workflows/verify-dashboards-alerts.yaml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index e9a80a989..de1d780a3 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -29,15 +29,15 @@ jobs: - name: Apply Terraform Configuration + Extract Public IP run: | - terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} -state=statefile + terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} # Get the public IP address of the instance echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV - + - name: Upload statefile for use further down uses: actions/upload-artifact@v4 with: - name: statefile - path: statefile + name: terraform-tfstate + path: terraform.tfstate - name: Generate Registration Token id: reg_token @@ -198,12 +198,13 @@ jobs: - uses: hashicorp/setup-terraform@v3 - name: Initialize Terraform Environment - run: terraform init + run: | + terraform init - name: Download statefile to teardown resources uses: actions/download-artifact@v4 with: - name: statefile + name: terraform-tfstate - name: Create PEM file run: | @@ -225,4 +226,4 @@ jobs: https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID - name: Terraform Destroy - run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} -state=statefile + run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} From ebc3d8c4372e09e586f99b6dbf968e1976cf1c64 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Tue, 25 Jun 2024 15:27:12 +0100 Subject: [PATCH 16/27] changed over to github app for token gen --- .../workflows/verify-dashboards-alerts.yaml | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index de1d780a3..13307c5a1 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -39,14 +39,22 @@ jobs: name: terraform-tfstate path: terraform.tfstate + - name: Generate a token + id: generate-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + - name: Generate Registration Token - id: reg_token + env: + GH_TOKEN: ${{ steps.generate-token.outputs.token }} run: | REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} RESPONSE=$(curl -L \ -X POST \ - -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ + -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/registration-token) echo "REGISTRATION_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV @@ -211,17 +219,26 @@ jobs: echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem + - name: Generate a token + id: generate-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + - name: Generate Remove Token + env: + GH_TOKEN: ${{ steps.generate-token.outputs.token }} id: remove_token run: | REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} - RUNNER_ID=$(curl -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ + RUNNER_ID=$(curl -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners | jq '.runners[] | select(.name=="self-hosted") | .id') - curl -X DELETE -H "Authorization: Bearer ${{ secrets.REGISTRATION_PAT }}" \ + curl -X DELETE -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID From 63c2a78d393ccb10112a19629e69e1f748972f61 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Tue, 25 Jun 2024 15:49:39 +0100 Subject: [PATCH 17/27] save changes (undone) --- .../workflows/verify-dashboards-alerts.yaml | 57 +++++++++++++++---- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 13307c5a1..919326c4c 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -22,10 +22,11 @@ jobs: - name: Initialize Terraform Environment run: terraform init - - name: Create PEM file + - name: Create PEM files run: | echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem - chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem + echo "${{ secrets.APP_PRIVATE_KEY }}" > app_private.pem + chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem app_private.pem - name: Apply Terraform Configuration + Extract Public IP run: | @@ -39,16 +40,52 @@ jobs: name: terraform-tfstate path: terraform.tfstate - - name: Generate a token - id: generate-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ vars.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} + - name: Generate a JWT Token + run: | + #!/usr/bin/env bash + + set -o pipefail + + client_id=${{ var.APP_ID }} # Client ID as first argument + + pem=$( cat app_private.pem ) # file path of the private key as second argument + + now=$(date +%s) + iat=$((${now} - 60)) # Issues 60 seconds in the past + exp=$((${now} + 600)) # Expires 10 minutes in the future + + b64enc() { openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n'; } + + header_json='{ + "typ":"JWT", + "alg":"RS256" + }' + # Header encode + header=$( echo -n "${header_json}" | b64enc ) + + payload_json='{ + "iat":'"${iat}"', + "exp":'"${exp}"', + "iss":'"${client_id}"' + }' + # Payload encode + payload=$( echo -n "${payload_json}" | b64enc ) + + # Signature + header_payload="${header}"."${payload}" + signature=$( + openssl dgst -sha256 -sign <(echo -n "${pem}") \ + <(echo -n "${header_payload}") | b64enc + ) + + # Create JWT + JWT="${header_payload}"."${signature}" + + # echo to variable for later use + echo $JWT_TOKEN >> $GITHUB_ENV + - name: Generate Registration Token - env: - GH_TOKEN: ${{ steps.generate-token.outputs.token }} run: | REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} From 67f8bd4589397376b76f84ca592e3b91e95ab4ee Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Wed, 26 Jun 2024 09:04:02 +0100 Subject: [PATCH 18/27] swapped out github action for manual api interactions instead fixed typo E e e e a a a meep meep --- .../workflows/verify-dashboards-alerts.yaml | 99 ++++++++++++++++--- 1 file changed, 84 insertions(+), 15 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 919326c4c..c9a4ca845 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -46,7 +46,7 @@ jobs: set -o pipefail - client_id=${{ var.APP_ID }} # Client ID as first argument + client_id=${{ vars.APP_ID }} # Client ID as first argument pem=$( cat app_private.pem ) # file path of the private key as second argument @@ -82,8 +82,24 @@ jobs: JWT="${header_payload}"."${signature}" # echo to variable for later use - echo $JWT_TOKEN >> $GITHUB_ENV + echo "JWT_TOKEN=$JWT" >> $GITHUB_ENV + + - name: Get the installation ID + run: | + RESPONSE=$(curl -L \ + -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ + https://api.github.com/app/installations) + + echo "INSTALLATION_ID=$(echo $RESPONSE | jq -r .[0].id)" >> $GITHUB_ENV + + - name: Generate Installation Access Token + run: | + RESPONSE=$(curl -L \ + -X POST \ + -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ + https://api.github.com/app/installations/${{ env.INSTALLATION_ID }}/access_tokens) + echo "ACCESS_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV - name: Generate Registration Token run: | @@ -91,7 +107,7 @@ jobs: REPO_NAME=${{ github.event.repository.name }} RESPONSE=$(curl -L \ -X POST \ - -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ + -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/registration-token) echo "REGISTRATION_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV @@ -107,7 +123,7 @@ jobs: curl -o actions-runner-linux-x64-2.317.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz - ./config.sh --url https://github.com/${{ github.repository_owner }}/${{ github.event.repository.name }} --token $REGISTRATION_TOKEN --unattended --labels self-hosted,linux,aws + ./config.sh --url https://github.com/${{ github.repository_owner }}/${{ github.event.repository.name }} --token ${{ env.REGISTRATION_TOKEN }} --unattended --labels self-hosted,linux,aws nohup ./run.sh > nohup.out 2> nohup.err < /dev/null & EOF @@ -255,27 +271,80 @@ jobs: run: | echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem + echo "${{ secrets.APP_PRIVATE_KEY }}" > app_private.pem + chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem app_private.pem - - name: Generate a token - id: generate-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ vars.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} + - name: Generate a JWT Token + run: | + #!/usr/bin/env bash + + set -o pipefail + + client_id=${{ vars.APP_ID }} # Client ID as first argument + + pem=$( cat app_private.pem ) # file path of the private key as second argument + + now=$(date +%s) + iat=$((${now} - 60)) # Issues 60 seconds in the past + exp=$((${now} + 600)) # Expires 10 minutes in the future + + b64enc() { openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n'; } + + header_json='{ + "typ":"JWT", + "alg":"RS256" + }' + # Header encode + header=$( echo -n "${header_json}" | b64enc ) + + payload_json='{ + "iat":'"${iat}"', + "exp":'"${exp}"', + "iss":'"${client_id}"' + }' + # Payload encode + payload=$( echo -n "${payload_json}" | b64enc ) + + # Signature + header_payload="${header}"."${payload}" + signature=$( + openssl dgst -sha256 -sign <(echo -n "${pem}") \ + <(echo -n "${header_payload}") | b64enc + ) + + # Create JWT + JWT="${header_payload}"."${signature}" + + # echo to variable for later use + echo "JWT_TOKEN=$JWT" >> $GITHUB_ENV + + - name: Get the installation ID + run: | + RESPONSE=$(curl -L \ + -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ + https://api.github.com/app/installations) + + echo "INSTALLATION_ID=$(echo $RESPONSE | jq -r .[0].id)" >> $GITHUB_ENV + + - name: Generate Installation Access Token + run: | + RESPONSE=$(curl -L \ + -X POST \ + -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ + https://api.github.com/app/installations/${{ env.INSTALLATION_ID }}/access_tokens) + + echo "ACCESS_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV - name: Generate Remove Token - env: - GH_TOKEN: ${{ steps.generate-token.outputs.token }} - id: remove_token run: | REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} - RUNNER_ID=$(curl -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ + RUNNER_ID=$(curl -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners | jq '.runners[] | select(.name=="self-hosted") | .id') - curl -X DELETE -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ + curl -X DELETE -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID From 003d5f74d6063877744a509dde1e6195c33e00d2 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Wed, 26 Jun 2024 12:06:29 +0100 Subject: [PATCH 19/27] added code to remove runner through ec2 instance e a beep bop boop boop --- .../workflows/verify-dashboards-alerts.yaml | 25 +++++++++++++------ self-hosted-runner.tf | 2 +- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index c9a4ca845..eca678440 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -266,13 +266,16 @@ jobs: uses: actions/download-artifact@v4 with: name: terraform-tfstate + - - name: Create PEM file + - name: Create PEM file + Get EC2 IP run: | echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem echo "${{ secrets.APP_PRIVATE_KEY }}" > app_private.pem chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem app_private.pem + # Get the public IP address of the instance + echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV - name: Generate a JWT Token run: | @@ -340,13 +343,21 @@ jobs: REPO_OWNER=${{ github.repository_owner }} REPO_NAME=${{ github.event.repository.name }} - RUNNER_ID=$(curl -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ - -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners | jq '.runners[] | select(.name=="self-hosted") | .id') + RESPONSE=$(curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/remove-token) - curl -X DELETE -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ - -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/$RUNNER_ID + echo "REMOVE_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV + - name: SSH into EC2 instance and Remove Self-Hosted Runner + run: | + ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ubuntu@${{ env.EC2_PUBLIC_IP }} << EOF + cd actions-runner + ./config.sh remove --token ${{ env.REMOVE_TOKEN }} + EOF + - name: Terraform Destroy run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf index 611c5ebb4..4b0caa004 100644 --- a/self-hosted-runner.tf +++ b/self-hosted-runner.tf @@ -118,4 +118,4 @@ resource "null_resource" "wait_for_user_data" { output "instance_public_ip" { value = aws_instance.example.public_ip -} \ No newline at end of file +} From aa3d658a8760a016c08a9df2fa90bc9d31b7c838 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Wed, 26 Jun 2024 13:55:21 +0100 Subject: [PATCH 20/27] add terraform script and workflow for create ami e e e e create ami e e final touches :) e --- .../create-self-hosted-runner-ami.yaml | 33 +++++++++++++++++++ .../workflows/verify-dashboards-alerts.yaml | 26 +++++++++------ ami-self-hosted-runner.tf | 20 +++++++++++ self-hosted-runner.tf | 16 +++++---- 4 files changed, 78 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/create-self-hosted-runner-ami.yaml create mode 100644 ami-self-hosted-runner.tf diff --git a/.github/workflows/create-self-hosted-runner-ami.yaml b/.github/workflows/create-self-hosted-runner-ami.yaml new file mode 100644 index 000000000..8d36430dc --- /dev/null +++ b/.github/workflows/create-self-hosted-runner-ami.yaml @@ -0,0 +1,33 @@ +name: Create Self Hosted Runner AMI + +on: + push: + branches: main + paths: + - self-hosted-runner.tf + +jobs: + create-self-hosted-runner-ami: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: hashicorp/setup-terraform@v3 + + - name: Initialize Terraform Environment + run: | + terraform init + + - name: Create PEM file + run: | + echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem + chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem + + - name: Apply Terraform Configuration + run: | + terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} + + - name: Destroy Terraform Configuration (should retain AMI from config) + run: | + # Remove AMI from terraform so it does not destroy + terraform state rm aws_ami_from_instance.self_hosted_runner_ami + terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} \ No newline at end of file diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index eca678440..340c82b58 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -2,15 +2,16 @@ name: Verify Dashboards and Alerts OK on: push: - # paths: - # # Dashboards - # - examples/dashboards/app_developer.json - # - examples/dashboards/business_user.json - # - examples/dashboards/platform_engineer.json - # # Alerts - # - examples/alerts/prometheusrules_policies_missing.yaml - # - examples/alerts/slo-availability.yaml - # - examples/alerts/slo-latency.yaml + branches: main + paths: + # Dashboards + - examples/dashboards/app_developer.json + - examples/dashboards/business_user.json + - examples/dashboards/platform_engineer.json + # Alerts + - examples/alerts/prometheusrules_policies_missing.yaml + - examples/alerts/slo-availability.yaml + - examples/alerts/slo-latency.yaml jobs: deploy-register-self-runner: runs-on: ubuntu-latest @@ -20,7 +21,10 @@ jobs: - uses: hashicorp/setup-terraform@v3 - name: Initialize Terraform Environment - run: terraform init + run: | + # Remove ami-self-hosted-runner.tf to prevent constant creation of AMIs + rm ami-self-hosted-runner.tf + terraform init - name: Create PEM files run: | @@ -260,6 +264,8 @@ jobs: - name: Initialize Terraform Environment run: | + # Remove ami-self-hosted-runner.tf to prevent constant creation of AMIs + rm ami-self-hosted-runner.tf terraform init - name: Download statefile to teardown resources diff --git a/ami-self-hosted-runner.tf b/ami-self-hosted-runner.tf new file mode 100644 index 000000000..cd0cf1cd9 --- /dev/null +++ b/ami-self-hosted-runner.tf @@ -0,0 +1,20 @@ +data "aws_instance" "self_hosted_runner_instance" { + instance_id = aws_instance.self_hosted_runner.id + depends_on = [ null_resource.wait_for_user_data ] +} + +resource "aws_ami_from_instance" "self_hosted_runner_ami" { + name = "self-hosted-runner-ami" + source_instance_id = data.aws_instance.self_hosted_runner_instance.id + description = "An AMI created from an existing EC2 instance which contains the environment needed for self-hosted runner on kuadrant-operator." + + tags = { + Name = "self-hosted-runner-ami" + } + + lifecycle { + prevent_destroy = true + } + + depends_on = [ null_resource.wait_for_user_data ] +} \ No newline at end of file diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf index 4b0caa004..fe603e407 100644 --- a/self-hosted-runner.tf +++ b/self-hosted-runner.tf @@ -19,8 +19,8 @@ variable "aws_key_name" { type = string } -resource "aws_instance" "example" { - ami = "ami-0776c814353b4814d" +resource "aws_instance" "self_hosted_runner" { + ami = "ami-055032149717ffb30" # change to ami-0776c814353b4814d when creating an AMI. instance_type = "t2.xlarge" root_block_device { @@ -36,7 +36,9 @@ resource "aws_instance" "example" { // Security Group for SSH, HTTP, and HTTPS access security_groups = ["ssh-http-https-access"] - user_data = <<-EOL + # Uncomment when creating an AMI . + + /* user_data = <<-EOL #!/bin/bash echo "Starting user_data script..." sudo apt-get update -y @@ -60,7 +62,7 @@ resource "aws_instance" "example" { sudo chmod 7777 kuadrant-operator/hack echo "user_data script execution completed." touch /tmp/user_data_done - EOL + EOL */ } @@ -105,7 +107,7 @@ resource "aws_security_group" "ssh_http_https_access" { resource "null_resource" "wait_for_user_data" { provisioner "local-exec" { command = < Date: Thu, 27 Jun 2024 10:10:58 +0100 Subject: [PATCH 21/27] null resource block comment --- self-hosted-runner.tf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf index fe603e407..a2bfc3f00 100644 --- a/self-hosted-runner.tf +++ b/self-hosted-runner.tf @@ -104,7 +104,9 @@ resource "aws_security_group" "ssh_http_https_access" { } } -resource "null_resource" "wait_for_user_data" { +# Uncomment when creating AMI. + +/* resource "null_resource" "wait_for_user_data" { provisioner "local-exec" { command = < Date: Thu, 27 Jun 2024 11:12:07 +0100 Subject: [PATCH 22/27] check for ssh-ability --- .github/workflows/verify-dashboards-alerts.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 340c82b58..9337b2849 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -122,6 +122,22 @@ jobs: run: | sudo apt-get update -y sudo apt-get install -y libicu-dev + + # Function to check SSH connectivity + function check_ssh { + nc -z -w 5 ${{ env.EC2_PUBLIC_IP }} 22 + } + + echo "Checking SSH connectivity to ${{ env.EC2_PUBLIC_IP }}..." + + # Loop until SSH is available + while ! check_ssh; do + echo "SSH is not yet available. Retrying in 10 seconds..." + sleep 10 + done + + echo "SSH is now available. Connecting now..." + ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ubuntu@${{ env.EC2_PUBLIC_IP }} << EOF mkdir actions-runner && cd actions-runner curl -o actions-runner-linux-x64-2.317.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz From b65101cf1850c040124253a140b828d377648d07 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Mon, 8 Jul 2024 10:15:42 +0100 Subject: [PATCH 23/27] cleanup + changed runner to github for verify workflow . --- .../create-self-hosted-runner-ami.yaml | 33 --- .../workflows/verify-dashboards-alerts.yaml | 262 +----------------- .gitignore | 4 - ami-self-hosted-runner.tf | 20 -- self-hosted-runner.tf | 125 --------- 5 files changed, 6 insertions(+), 438 deletions(-) delete mode 100644 .github/workflows/create-self-hosted-runner-ami.yaml delete mode 100644 ami-self-hosted-runner.tf delete mode 100644 self-hosted-runner.tf diff --git a/.github/workflows/create-self-hosted-runner-ami.yaml b/.github/workflows/create-self-hosted-runner-ami.yaml deleted file mode 100644 index 8d36430dc..000000000 --- a/.github/workflows/create-self-hosted-runner-ami.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: Create Self Hosted Runner AMI - -on: - push: - branches: main - paths: - - self-hosted-runner.tf - -jobs: - create-self-hosted-runner-ami: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: hashicorp/setup-terraform@v3 - - - name: Initialize Terraform Environment - run: | - terraform init - - - name: Create PEM file - run: | - echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem - chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem - - - name: Apply Terraform Configuration - run: | - terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} - - - name: Destroy Terraform Configuration (should retain AMI from config) - run: | - # Remove AMI from terraform so it does not destroy - terraform state rm aws_ami_from_instance.self_hosted_runner_ami - terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} \ No newline at end of file diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 9337b2849..58fbba1a5 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -13,155 +13,21 @@ on: - examples/alerts/slo-availability.yaml - examples/alerts/slo-latency.yaml jobs: - deploy-register-self-runner: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - uses: hashicorp/setup-terraform@v3 - - - name: Initialize Terraform Environment - run: | - # Remove ami-self-hosted-runner.tf to prevent constant creation of AMIs - rm ami-self-hosted-runner.tf - terraform init - - - name: Create PEM files - run: | - echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem - echo "${{ secrets.APP_PRIVATE_KEY }}" > app_private.pem - chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem app_private.pem - - - name: Apply Terraform Configuration + Extract Public IP - run: | - terraform apply -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} - # Get the public IP address of the instance - echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV - - - name: Upload statefile for use further down - uses: actions/upload-artifact@v4 - with: - name: terraform-tfstate - path: terraform.tfstate - - - name: Generate a JWT Token - run: | - #!/usr/bin/env bash - - set -o pipefail - - client_id=${{ vars.APP_ID }} # Client ID as first argument - - pem=$( cat app_private.pem ) # file path of the private key as second argument - - now=$(date +%s) - iat=$((${now} - 60)) # Issues 60 seconds in the past - exp=$((${now} + 600)) # Expires 10 minutes in the future - - b64enc() { openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n'; } - - header_json='{ - "typ":"JWT", - "alg":"RS256" - }' - # Header encode - header=$( echo -n "${header_json}" | b64enc ) - - payload_json='{ - "iat":'"${iat}"', - "exp":'"${exp}"', - "iss":'"${client_id}"' - }' - # Payload encode - payload=$( echo -n "${payload_json}" | b64enc ) - - # Signature - header_payload="${header}"."${payload}" - signature=$( - openssl dgst -sha256 -sign <(echo -n "${pem}") \ - <(echo -n "${header_payload}") | b64enc - ) - - # Create JWT - JWT="${header_payload}"."${signature}" - - # echo to variable for later use - echo "JWT_TOKEN=$JWT" >> $GITHUB_ENV - - - name: Get the installation ID - run: | - RESPONSE=$(curl -L \ - -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ - https://api.github.com/app/installations) - - echo "INSTALLATION_ID=$(echo $RESPONSE | jq -r .[0].id)" >> $GITHUB_ENV - - - name: Generate Installation Access Token - run: | - RESPONSE=$(curl -L \ - -X POST \ - -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ - https://api.github.com/app/installations/${{ env.INSTALLATION_ID }}/access_tokens) - - echo "ACCESS_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV - - - name: Generate Registration Token - run: | - REPO_OWNER=${{ github.repository_owner }} - REPO_NAME=${{ github.event.repository.name }} - RESPONSE=$(curl -L \ - -X POST \ - -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ - https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/registration-token) - - echo "REGISTRATION_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV - - - name: SSH into EC2 Instance and Register Runner - env: - REGISTRATION_TOKEN: ${{ env.REGISTRATION_TOKEN }} - run: | - sudo apt-get update -y - sudo apt-get install -y libicu-dev - - # Function to check SSH connectivity - function check_ssh { - nc -z -w 5 ${{ env.EC2_PUBLIC_IP }} 22 - } - - echo "Checking SSH connectivity to ${{ env.EC2_PUBLIC_IP }}..." - - # Loop until SSH is available - while ! check_ssh; do - echo "SSH is not yet available. Retrying in 10 seconds..." - sleep 10 - done - - echo "SSH is now available. Connecting now..." - - ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ubuntu@${{ env.EC2_PUBLIC_IP }} << EOF - mkdir actions-runner && cd actions-runner - curl -o actions-runner-linux-x64-2.317.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz - tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz - - ./config.sh --url https://github.com/${{ github.repository_owner }}/${{ github.event.repository.name }} --token ${{ env.REGISTRATION_TOKEN }} --unattended --labels self-hosted,linux,aws - nohup ./run.sh > nohup.out 2> nohup.err < /dev/null & - EOF - verify-dashboards-alerts: name: Verify Dashboards and Alerts OK - needs: deploy-register-self-runner - runs-on: self-hosted + runs-on: ubuntu-latest defaults: run: - shell: bash -ieo pipefail {0} + shell: bash -eo pipefail {0} steps: - uses: actions/checkout@v2 - name: Run Quickstart run: | sudo chown $(whoami) . - kind delete clusters kuadrant-local - kind delete clusters kuadrant-local + + sudo apt-get update -y + sudo apt-get install -y golang expect export ISTIO_INSTALL_SAIL=true @@ -266,120 +132,4 @@ jobs: # Close Prometheus port-forward kill $prometheus_process_id - echo "Stopped port forwarding Prometheus." - - deregister-runner: - runs-on: ubuntu-latest - needs: verify-dashboards-alerts - if: always() - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - uses: hashicorp/setup-terraform@v3 - - - name: Initialize Terraform Environment - run: | - # Remove ami-self-hosted-runner.tf to prevent constant creation of AMIs - rm ami-self-hosted-runner.tf - terraform init - - - name: Download statefile to teardown resources - uses: actions/download-artifact@v4 - with: - name: terraform-tfstate - - - - name: Create PEM file + Get EC2 IP - run: | - echo "${{ secrets.AWS_PEM_KEY }}" > ${{ secrets.AWS_KEY_NAME }}.pem - chmod 600 ${{ secrets.AWS_KEY_NAME }}.pem - echo "${{ secrets.APP_PRIVATE_KEY }}" > app_private.pem - chmod 400 ${{ secrets.AWS_KEY_NAME }}.pem app_private.pem - # Get the public IP address of the instance - echo "EC2_PUBLIC_IP=$(terraform output instance_public_ip)" >> $GITHUB_ENV - - - name: Generate a JWT Token - run: | - #!/usr/bin/env bash - - set -o pipefail - - client_id=${{ vars.APP_ID }} # Client ID as first argument - - pem=$( cat app_private.pem ) # file path of the private key as second argument - - now=$(date +%s) - iat=$((${now} - 60)) # Issues 60 seconds in the past - exp=$((${now} + 600)) # Expires 10 minutes in the future - - b64enc() { openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n'; } - - header_json='{ - "typ":"JWT", - "alg":"RS256" - }' - # Header encode - header=$( echo -n "${header_json}" | b64enc ) - - payload_json='{ - "iat":'"${iat}"', - "exp":'"${exp}"', - "iss":'"${client_id}"' - }' - # Payload encode - payload=$( echo -n "${payload_json}" | b64enc ) - - # Signature - header_payload="${header}"."${payload}" - signature=$( - openssl dgst -sha256 -sign <(echo -n "${pem}") \ - <(echo -n "${header_payload}") | b64enc - ) - - # Create JWT - JWT="${header_payload}"."${signature}" - - # echo to variable for later use - echo "JWT_TOKEN=$JWT" >> $GITHUB_ENV - - - name: Get the installation ID - run: | - RESPONSE=$(curl -L \ - -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ - https://api.github.com/app/installations) - - echo "INSTALLATION_ID=$(echo $RESPONSE | jq -r .[0].id)" >> $GITHUB_ENV - - - name: Generate Installation Access Token - run: | - RESPONSE=$(curl -L \ - -X POST \ - -H "Authorization: Bearer ${{ env.JWT_TOKEN }}" \ - https://api.github.com/app/installations/${{ env.INSTALLATION_ID }}/access_tokens) - - echo "ACCESS_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV - - - name: Generate Remove Token - run: | - REPO_OWNER=${{ github.repository_owner }} - REPO_NAME=${{ github.event.repository.name }} - - RESPONSE=$(curl -L \ - -X POST \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer ${{ env.ACCESS_TOKEN }}" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/actions/runners/remove-token) - - echo "REMOVE_TOKEN=$(echo $RESPONSE | jq -r .token)" >> $GITHUB_ENV - - - name: SSH into EC2 instance and Remove Self-Hosted Runner - run: | - ssh -o StrictHostKeyChecking=no -i ${{ secrets.AWS_KEY_NAME }}.pem ubuntu@${{ env.EC2_PUBLIC_IP }} << EOF - cd actions-runner - ./config.sh remove --token ${{ env.REMOVE_TOKEN }} - EOF - - - name: Terraform Destroy - run: terraform destroy -auto-approve -var=aws_access_key=${{ secrets.AWS_ACCESS_KEY_ID }} -var=aws_secret_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} -var=aws_key_name=${{ secrets.AWS_KEY_NAME }} + echo "Stopped port forwarding Prometheus." \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1d91bfee1..90f2d79d5 100644 --- a/.gitignore +++ b/.gitignore @@ -33,7 +33,3 @@ tmp # Vendor dependencies vendor -.terraform -terraform.* -.terraform.* -*.pem diff --git a/ami-self-hosted-runner.tf b/ami-self-hosted-runner.tf deleted file mode 100644 index cd0cf1cd9..000000000 --- a/ami-self-hosted-runner.tf +++ /dev/null @@ -1,20 +0,0 @@ -data "aws_instance" "self_hosted_runner_instance" { - instance_id = aws_instance.self_hosted_runner.id - depends_on = [ null_resource.wait_for_user_data ] -} - -resource "aws_ami_from_instance" "self_hosted_runner_ami" { - name = "self-hosted-runner-ami" - source_instance_id = data.aws_instance.self_hosted_runner_instance.id - description = "An AMI created from an existing EC2 instance which contains the environment needed for self-hosted runner on kuadrant-operator." - - tags = { - Name = "self-hosted-runner-ami" - } - - lifecycle { - prevent_destroy = true - } - - depends_on = [ null_resource.wait_for_user_data ] -} \ No newline at end of file diff --git a/self-hosted-runner.tf b/self-hosted-runner.tf deleted file mode 100644 index a2bfc3f00..000000000 --- a/self-hosted-runner.tf +++ /dev/null @@ -1,125 +0,0 @@ -provider "aws" { - region = "eu-west-1" - access_key = var.aws_access_key - secret_key = var.aws_secret_key -} - -variable "aws_access_key" { - description = "AWS Access Key" - type = string -} - -variable "aws_secret_key" { - description = "AWS Secret Key" - type = string -} - -variable "aws_key_name" { - description = "AWS Key Name" - type = string -} - -resource "aws_instance" "self_hosted_runner" { - ami = "ami-055032149717ffb30" # change to ami-0776c814353b4814d when creating an AMI. - instance_type = "t2.xlarge" - - root_block_device { - volume_size = 16 // GB - } - - key_name = var.aws_key_name - - tags = { - Name = "kuadrant-operator-self-hosted-runner" - } - - // Security Group for SSH, HTTP, and HTTPS access - security_groups = ["ssh-http-https-access"] - - # Uncomment when creating an AMI . - - /* user_data = <<-EOL - #!/bin/bash - echo "Starting user_data script..." - sudo apt-get update -y - sudo apt-get install -y podman golang libicu-dev expect - sudo snap install yq - curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/1.30.0/2024-05-12/bin/linux/amd64/kubectl - chmod +x ./kubectl - mkdir -p /home/ubuntu/bin && cp ./kubectl /home/ubuntu/bin/kubectl - echo 'alias podman="sudo podman"' >> /home/ubuntu/.bashrc - echo export PATH=/home/ubuntu/bin:/home/ubuntu/go/pkg/mod/bin:$PATH >> /home/ubuntu/.bashrc - source /home/ubuntu/.bashrc - export GOMODCACHE=/home/ubuntu/go - export GOPATH=/home/ubuntu/go/pkg/mod - export GOCACHE=/home/ubuntu/.cache/go-build - export HOME=/home/ubuntu - go install sigs.k8s.io/kind@v0.23.0 - source /home/ubuntu/.bashrc - cd /home/ubuntu - git clone https://www.github.com/kuadrant/kuadrant-operator.git - echo 'unqualified-search-registries = ["docker.io"]' | sudo tee -a /etc/containers/registries.conf - sudo chmod 7777 kuadrant-operator/hack - echo "user_data script execution completed." - touch /tmp/user_data_done - EOL */ -} - - - -resource "aws_security_group" "ssh_http_https_access" { - name = "ssh-http-https-access" - description = "Allow SSH, HTTP, and HTTPS access" - - // Ingress rule for SSH access - ingress { - from_port = 22 - to_port = 22 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] // Allow SSH access from anywhere - } - - // Ingress rule for HTTP access (port 80) - ingress { - from_port = 80 - to_port = 80 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] // Allow HTTP access from anywhere - } - - // Ingress rule for HTTPS access (port 443) - ingress { - from_port = 443 - to_port = 443 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] // Allow HTTPS access from anywhere - } - - // Egress rule to allow all outbound traffic - egress { - from_port = 0 - to_port = 0 - protocol = "-1" // Allow all protocols - cidr_blocks = ["0.0.0.0/0"] // Allow outbound traffic to anywhere - } -} - -# Uncomment when creating AMI. - -/* resource "null_resource" "wait_for_user_data" { - provisioner "local-exec" { - command = < Date: Mon, 8 Jul 2024 10:36:00 +0100 Subject: [PATCH 24/27] removed chown test test --- .../workflows/verify-dashboards-alerts.yaml | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 58fbba1a5..031333f34 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -2,16 +2,16 @@ name: Verify Dashboards and Alerts OK on: push: - branches: main - paths: - # Dashboards - - examples/dashboards/app_developer.json - - examples/dashboards/business_user.json - - examples/dashboards/platform_engineer.json - # Alerts - - examples/alerts/prometheusrules_policies_missing.yaml - - examples/alerts/slo-availability.yaml - - examples/alerts/slo-latency.yaml + # branches: main + # paths: + # # Dashboards + # - examples/dashboards/app_developer.json + # - examples/dashboards/business_user.json + # - examples/dashboards/platform_engineer.json + # # Alerts + # - examples/alerts/prometheusrules_policies_missing.yaml + # - examples/alerts/slo-availability.yaml + # - examples/alerts/slo-latency.yaml jobs: verify-dashboards-alerts: name: Verify Dashboards and Alerts OK @@ -24,8 +24,6 @@ jobs: - name: Run Quickstart run: | - sudo chown $(whoami) . - sudo apt-get update -y sudo apt-get install -y golang expect From 653bfb3fc17bd1e0b40cd6ce416e17cc73739235 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Mon, 8 Jul 2024 11:22:48 +0100 Subject: [PATCH 25/27] separated set up and tear down from tests test test 1 --- .../workflows/verify-dashboards-alerts.yaml | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 031333f34..65dcc1f8e 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -22,11 +22,13 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Run Quickstart + - name: Set up dependencies run: | sudo apt-get update -y sudo apt-get install -y golang expect + - name: Run Quickstart + run: | export ISTIO_INSTALL_SAIL=true cat << 'EOF' > automate_setup.expect @@ -42,7 +44,7 @@ jobs: chmod +x automate_setup.expect ./automate_setup.expect - - name: Port forward grafana and check if contains dashboards. + - name: Port forward grafana run: | # Port forward Grafana kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=600s @@ -50,8 +52,20 @@ jobs: grafana_process_id=$! echo "Successfully port forwarded Grafana service." - sleep 1 + echo 'GRAFANA_PID=$grafana_process_id' >> $GITHUB_ENV + + - name: Port forward Prometheus. + run: | + kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s + # Port forward Prometheus + kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & + prometheus_process_id=$! + echo "Successfully port forwarded Prometheus service." + + echo 'PROMETHEUS_PID=$prometheus_process_id' >> $GITHUB_ENV + - name: Check if Grafana contains dashboards. + run: | # Make API Call and save response to variable. grafana_api_call=$(curl -u admin:admin http://127.0.0.1:3000/api/search) @@ -85,20 +99,8 @@ jobs: echo "Grafana contains dashboards $app_developer, $business_user and $platform_engineer. Continuing to Prometheus..." - # Close Grafana port-forward. - kill $grafana_process_id - echo "Stoppped port forwarding Grafana." - - - name: Port forward Prometheus and check if contains alert rules. + - name: Check if Prometheus contains alert rules. run: | - kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s - # Port forward Prometheus - kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & - prometheus_process_id=$! - echo "Successfully port forwarded Prometheus service." - - sleep 1 - # Make API Call and save response to variable prometheus_api_call=$(curl http://localhost:9090/api/v1/rules) @@ -126,8 +128,4 @@ jobs: exit 1 fi - echo "Prometheus has all alert rules." - - # Close Prometheus port-forward - kill $prometheus_process_id - echo "Stopped port forwarding Prometheus." \ No newline at end of file + echo "Prometheus has all alert rules." \ No newline at end of file From be8044533fa137f04811054b43067c0f8b404428 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Mon, 8 Jul 2024 11:33:23 +0100 Subject: [PATCH 26/27] additional clean up a --- .../workflows/verify-dashboards-alerts.yaml | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 65dcc1f8e..854606d05 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -2,16 +2,16 @@ name: Verify Dashboards and Alerts OK on: push: - # branches: main - # paths: - # # Dashboards - # - examples/dashboards/app_developer.json - # - examples/dashboards/business_user.json - # - examples/dashboards/platform_engineer.json - # # Alerts - # - examples/alerts/prometheusrules_policies_missing.yaml - # - examples/alerts/slo-availability.yaml - # - examples/alerts/slo-latency.yaml + branches: main + paths: + # Dashboards + - examples/dashboards/app_developer.json + - examples/dashboards/business_user.json + - examples/dashboards/platform_engineer.json + # Alerts + - examples/alerts/prometheusrules_policies_missing.yaml + - examples/alerts/slo-availability.yaml + - examples/alerts/slo-latency.yaml jobs: verify-dashboards-alerts: name: Verify Dashboards and Alerts OK @@ -49,21 +49,15 @@ jobs: # Port forward Grafana kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=600s kubectl -n monitoring port-forward service/grafana 3000:3000 & - grafana_process_id=$! echo "Successfully port forwarded Grafana service." - echo 'GRAFANA_PID=$grafana_process_id' >> $GITHUB_ENV - - name: Port forward Prometheus. run: | kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s # Port forward Prometheus kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & - prometheus_process_id=$! echo "Successfully port forwarded Prometheus service." - echo 'PROMETHEUS_PID=$prometheus_process_id' >> $GITHUB_ENV - - name: Check if Grafana contains dashboards. run: | # Make API Call and save response to variable. From fc8472f7afa83727c3e53d5bb179be866c7575e0 Mon Sep 17 00:00:00 2001 From: ehearneredhat Date: Mon, 8 Jul 2024 12:14:53 +0100 Subject: [PATCH 27/27] removed quickstart dependency a a a --- .../workflows/verify-dashboards-alerts.yaml | 31 +++++++++---------- .gitignore | 2 +- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml index 854606d05..47a6b868e 100644 --- a/.github/workflows/verify-dashboards-alerts.yaml +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -22,27 +22,24 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up dependencies + - name: Set up golang run: | sudo apt-get update -y - sudo apt-get install -y golang expect + sudo apt-get install -y golang - - name: Run Quickstart + - name: Deploy observability stack (Grafana and Prometheus) run: | - export ISTIO_INSTALL_SAIL=true - - cat << 'EOF' > automate_setup.expect - #!/usr/bin/expect -f - set timeout -1 - spawn bash hack/quickstart-setup.sh - expect "Are you ready to begin? (y/n)" - send "y\r" - expect "Do you want to set up a DNS provider for use with Kuadrant's DNSPolicy API? (y/n)" - send "n\r" - expect "Thank you for using Kuadrant! If you have any questions or feedback, please reach out to our community." - EOF - chmod +x automate_setup.expect - ./automate_setup.expect + kind create cluster + # Install Istio + kubectl apply -k config/dependencies/istio/sail + kubectl -n istio-system wait --for=condition=Available deployment istio-operator --timeout=300s + kubectl apply -f config/dependencies/istio/sail/istio.yaml + + # Install Observability Stack (Grafana and Prometheus) + kubectl kustomize config/observability/ | docker run --rm -i ryane/kfilt -i kind=CustomResourceDefinition | kubectl apply --server-side -f - + kubectl kustomize config/observability/ | docker run --rm -i ryane/kfilt -x kind=CustomResourceDefinition | kubectl apply -f - + kubectl kustomize examples/dashboards/ | kubectl apply --server-side -f - + kubectl kustomize examples/alerts/ | kubectl apply --server-side -f - - name: Port forward grafana run: | diff --git a/.gitignore b/.gitignore index 90f2d79d5..eea81dd3d 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,4 @@ tmp /coverage/ # Vendor dependencies -vendor +vendor \ No newline at end of file