Skip to content

Commit

Permalink
make dlq alerts route to teams (#706)
Browse files Browse the repository at this point in the history
Signed-off-by: Kenny Leung <[email protected]>
  • Loading branch information
k4leung4 authored Jan 30, 2025
1 parent 49ccf2d commit 2e8e585
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions modules/alerting/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ locals {
}

locals {
squad_log_filter = var.squad == "" ? "" : "labels.squad=\"${var.squad}\""
name = var.squad == "" ? "global" : var.squad
metric_filter = var.squad == "" ? "" : "metric.labels.team=\"${var.squad}\""
squad_log_filter = var.squad == "" ? "" : "labels.squad=\"${var.squad}\""
name = var.squad == "" ? "global" : var.squad
squad_metric_filter = var.squad == "" ? "" : "metric.labels.team=\"${var.squad}\""
squad_metric_user_label_filter = var.squad == "" ? "" : "metadata.user_labels.\"team\"=\"${var.squad}\""
}

locals {
Expand Down Expand Up @@ -621,7 +622,7 @@ resource "google_monitoring_alert_policy" "cloud-run-failed-req" {
}

resource "google_monitoring_alert_policy" "pubsub_dead_letter_queue_messages" {
count = var.squad == "" ? 1 : 0
count = var.global_only_alerts ? 0 : 1

alert_strategy {
auto_close = "3600s" // 1 hour
Expand All @@ -643,6 +644,7 @@ resource "google_monitoring_alert_policy" "pubsub_dead_letter_queue_messages" {
resource.type="pubsub_topic"
metadata.system_labels."name"=monitoring.regex.full_match(".*-dlq-.*")
${var.dlq_filter}
${local.squad_metric_user_label_filter}
EOT

trigger {
Expand Down Expand Up @@ -980,7 +982,7 @@ resource "google_monitoring_alert_policy" "http_error_rate" {
metric.labels.service_name != monitoring.regex.full_match(".*-registry")
metric.labels.service_name != monitoring.regex.full_match("prb-.*")
metric.labels.code != monitoring.regex.full_match("[23]..")
${local.metric_filter}
${local.squad_metric_filter}
EOT

evaluation_missing_data = "EVALUATION_MISSING_DATA_INACTIVE"
Expand Down Expand Up @@ -1030,7 +1032,7 @@ resource "google_monitoring_alert_policy" "grpc_error_rate" {
resource.type = "prometheus_target"
metric.type = "prometheus.googleapis.com/grpc_server_handled_total/counter"
metric.labels.grpc_code != monitoring.regex.full_match("OK|AlreadyExists")
${local.metric_filter}
${local.squad_metric_filter}
EOT

evaluation_missing_data = "EVALUATION_MISSING_DATA_INACTIVE"
Expand Down

0 comments on commit 2e8e585

Please sign in to comment.