Skip to content
This repository has been archived by the owner on Nov 2, 2024. It is now read-only.

fix: Explicitly create SLO services when installation with ASM is launched #1064

Merged
merged 5 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions provisioning/sandboxctl
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ create_subcommand() {
### Cleanup ###
unset -v GOOGLE_TERRAFORM_USERAGENT_EXTENSION TF_APPEND_USER_AGENT
popd > /dev/null
mv "${KUSTOMIZE_FILE}.origin" "${KUSTOMIZE_FILE}"
}
delete_subcommand() {
parse_args "${@}"
Expand Down Expand Up @@ -164,7 +163,6 @@ help_subcommand() {
}

init() {
MICROSERVICE_DEMO_PATH="kustomize/online-boutique"; readonly MICROSERVICE_DEMO_PATH
PROJECT_ID=""
CLUSTER_LOCATION=""
CLUSTER_NAME=""
Expand All @@ -178,6 +176,7 @@ init() {
AGCLOUD="$(which gcloud || true)"; readonly AGCLOUD;
CLOUDOPS_SANDBOX_POOL_CFG=${CLOUDOPS_SANDBOX_POOL_CFG:-}
TF_FILE_LOCATION="$(mktemp).tfvars"; readonly TF_FILE_LOCATION
KUSTOMIZE_FILE="$(mktemp -d)/kustomization.yaml"; readonly KUSTOMIZE_FILE
}

### Support functions ###
Expand Down Expand Up @@ -471,7 +470,7 @@ configure_terraform_input_vars() {
state_bucket_name = "${TF_BUCKET_NAME}"
state_prefix = "${TERRAFORM_PREFIX}"
gcp_project_id = "${PROJECT_ID}"
filepath_manifest = "${SCRIPT_DIR}/${MICROSERVICE_DEMO_PATH}"
filepath_manifest = "${KUSTOMIZE_FILE%/*}"
EOF
)

Expand All @@ -493,8 +492,8 @@ EOF
}

configure_kustomization() {
KUSTOMIZE_FILE="${SCRIPT_DIR}/${MICROSERVICE_DEMO_PATH}/kustomization.yaml"; readonly KUSTOMIZE_FILE
cp "${KUSTOMIZE_FILE}" "${KUSTOMIZE_FILE}.origin" > /dev/null
local ORIGINAL_KUSTOMIZE_FILE="${SCRIPT_DIR}/kustomize/online-boutique/kustomization.yaml";
cp "${ORIGINAL_KUSTOMIZE_FILE}" "${KUSTOMIZE_FILE}" > /dev/null
local SED_EXPRESSION; SED_EXPRESSION=""

# uncomment 'without-loadgenerator' component if skip_loadgen is set
Expand All @@ -506,7 +505,7 @@ configure_kustomization() {
SED_EXPRESSION+=" -E '/service-mesh-istio(\?version\=v?[0-9]+\.[0-9]+\.[0-9]+)?$/s/^#//'"
fi
if [[ -n "${SED_EXPRESSION}" ]]; then
eval "sed ${SED_EXPRESSION} ${KUSTOMIZE_FILE}.origin >| ${KUSTOMIZE_FILE}"
eval "sed ${SED_EXPRESSION} ${ORIGINAL_KUSTOMIZE_FILE} >| ${KUSTOMIZE_FILE}"
fi
}

Expand Down
3 changes: 2 additions & 1 deletion provisioning/terraform/cloudops.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ module "monitoring" {
gcp_project_number = data.google_project.info.number
enable_asm = var.enable_asm
frontend_external_ip = data.kubernetes_service.frontend_external_service.status[0].load_balancer[0].ingress[0].ip
gke_cluster_name = var.gke_cluster_name
gke_cluster_name = google_container_cluster.sandbox.name
gke_cluster_location = google_container_cluster.sandbox.location
# re-use prefix to customize resources within the same project
name_suffix = length(var.state_prefix) > 0 ? "-${var.state_prefix}" : ""

Expand Down
6 changes: 5 additions & 1 deletion provisioning/terraform/monitoring/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ The following table describes all input arguments the module accepts:
| filepath_configuration | `string` | | A path to the root folder storing configuration files and templates. The relative path should be defined relative to the root terraform folder |
| frontend_external_ip | `string` | ✔️ | A valid IPv4 address of the publicly available endpoint of the frontend service |
| gcp_project_id | `string` | ✔️ | A project id of the GCP project that hosts Cloud Ops Sandbox |
| notification_channel_email | `string` || A valid email address to be used as a destination for alert notifications |
| gcp_project_number | `string` | ✔️ | A project number corresponding to the project id. Passed explicitly to save API call because it is known in the parent terraform. |
| gke_cluster_name | `string` || Name of the GKE cluster that hosting the demo app. Defaults to `cloud-ops-sandbox`. |
| gke_cluster_location | `string` || Location of the GKE cluster that hosting the demo app. Defaults to `default`. |
| name_suffix | `string` || Forwarding suffix string from parent terraform to enable resource customization when multiple Sandboxes are provisioned in the same project. Defaults to empty string. |
| notification_channel_email | `string` || A valid email address to be used as a destination for alert notifications. Defaults to `[email protected]`. |

## What's included

Expand Down
4 changes: 2 additions & 2 deletions provisioning/terraform/monitoring/alerts.tf
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ resource "google_monitoring_alert_policy" "availability_slo_burn_alert" {

# This filter alerts on burn rate over the past 60 minutes
# The service is defined by the unique Istio string that is automatically created
filter = "select_slo_burn_rate(\"${module.slo_service[count.index].qualified_name}/serviceLevelObjectives/${google_monitoring_slo.service_availability[count.index].slo_id}\", 60m)"
filter = "select_slo_burn_rate(\"${google_monitoring_service.slo_service[count.index].name}/serviceLevelObjectives/${google_monitoring_slo.service_availability[count.index].slo_id}\", 60m)"
threshold_value = local.burn_rate
comparison = "COMPARISON_GT"
duration = "60s"
Expand All @@ -78,7 +78,7 @@ resource "google_monitoring_alert_policy" "latency_slo_burn_alert" {
conditions {
display_name = "SLO burn rate alert for latency SLO with a threshold of ${local.burn_rate}"
condition_threshold {
filter = "select_slo_burn_rate(\"${module.slo_service[count.index].qualified_name}/serviceLevelObjectives/${google_monitoring_slo.service_latency[count.index].slo_id}\", 60m)"
filter = "select_slo_burn_rate(\"${google_monitoring_service.slo_service[count.index].name}/serviceLevelObjectives/${google_monitoring_slo.service_latency[count.index].slo_id}\", 60m)"
threshold_value = local.burn_rate
comparison = "COMPARISON_GT"
duration = "60s"
Expand Down
64 changes: 0 additions & 64 deletions provisioning/terraform/monitoring/slo_service/main.tf

This file was deleted.

59 changes: 24 additions & 35 deletions provisioning/terraform/monitoring/slo_services.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

locals {
namespace = "default"
burn_rate = 2 # (2x factor)
latency_threshold = 1000 # (ms)
slo_services = [
Expand Down Expand Up @@ -60,9 +61,30 @@ locals {
slo_goal = 0.9 # (common goal of 90%)
}

resource "google_monitoring_service" "slo_service" {
count = var.enable_asm ? length(local.slo_services) : 0

service_id = "${local.slo_services[count.index].id}${var.name_suffix}"
display_name = local.slo_services[count.index].title

user_labels = {
creator = "Cloud Ops Sandbox"
}

basic_service {
service_type = "CLUSTER_ISTIO"
service_labels = {
location : var.gke_cluster_location,
cluster_name : var.gke_cluster_name,
service_namespace : local.namespace,
service_name : local.slo_services[count.index].id
}
}
}

resource "google_monitoring_slo" "service_availability" {
count = var.enable_asm ? length(local.slo_services) : 0
service = module.slo_service[count.index].id
service = google_monitoring_service.slo_service[count.index].service_id
slo_id = "${local.slo_services[count.index].id}-availability-slo${var.name_suffix}"
display_name = "${local.slo_goal * 100}% - Availability - Rolling 30 Days - ${local.slo_services[count.index].id}"

Expand All @@ -75,15 +97,11 @@ resource "google_monitoring_slo" "service_availability" {
enabled = "true"
}
}

depends_on = [
null_resource.wait_monitored_services,
]
}

resource "google_monitoring_slo" "service_latency" {
count = var.enable_asm ? length(local.slo_services) : 0
service = module.slo_service[count.index].id
service = google_monitoring_service.slo_service[count.index].service_id
slo_id = "${local.slo_services[count.index].id}-latency-slo${var.name_suffix}"
display_name = "${local.slo_goal * 100}% - Latency - Rolling 30 days - ${local.slo_services[count.index].id}"
goal = local.slo_goal
Expand All @@ -106,33 +124,4 @@ resource "google_monitoring_slo" "service_latency" {
}
}
}

depends_on = [
null_resource.wait_monitored_services,
]
}

module "slo_service" {
count = length(local.slo_services)
source = "./slo_service"

project_number = var.gcp_project_number
name = local.slo_services[count.index].id
}

# wait until all monitored services are provisioned
resource "null_resource" "wait_monitored_services" {
count = var.enable_asm ? length(local.slo_services) : 0
provisioner "local-exec" {
interpreter = ["bash", "-exc"]
command = <<EOF
while [[ $code != "200" ]]; do \
code=$(curl -s -o /dev/null -w "%%{http_code}" \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json; charset=utf-8" \
${module.slo_service[count.index].url}); \
sleep 1; \
done 2> /dev/null
EOF
}
}
8 changes: 7 additions & 1 deletion provisioning/terraform/monitoring/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,16 @@ variable "filepath_configuration" {

variable "gke_cluster_name" {
type = string
description = "Name given to the new GKE cluster"
description = "Name of the GKE cluster hosting the app"
default = "cloud-ops-sandbox"
}

variable "gke_cluster_location" {
type = string
description = "Location of the GKE cluster hosting the app"
default = "us-central1"
}

variable "notification_channel_email" {
type = string
description = "Email address to use for alert notification channel."
Expand Down
2 changes: 1 addition & 1 deletion provisioning/terraform/online-boutique.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

locals {
service_name = var.enable_asm ? "istio-gateway" : "frontend-external"
service_name = var.enable_asm ? "istio-gateway-istio" : "frontend-external"
namespace_name = "default"
}

Expand Down
2 changes: 1 addition & 1 deletion provisioning/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.3
0.9.4
Loading