Skip to content

Commit

Permalink
feat: add slack template
Browse files Browse the repository at this point in the history
  • Loading branch information
swibrow committed Sep 23, 2024
1 parent 184827c commit 7e7faf4
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 90 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ as described in the `.pre-commit-config.yaml` file
| <a name="input_grafana"></a> [grafana](#input\_grafana) | Grafana configurations, used to override default configurations | `any` | `{}` | no |
| <a name="input_ingress_nginx"></a> [ingress\_nginx](#input\_ingress\_nginx) | Ingress Nginx configurations | `any` | `{}` | no |
| <a name="input_karpenter"></a> [karpenter](#input\_karpenter) | Karpenter configurations | `any` | `{}` | no |
| <a name="input_metadata"></a> [metadata](#input\_metadata) | Metadata for the platform | <pre>object({<br> environment = optional(string, "")<br> team = optional(string, "")<br> })</pre> | `{}` | no |
| <a name="input_metrics_server"></a> [metrics\_server](#input\_metrics\_server) | Metrics Server configurations | `any` | `{}` | no |
| <a name="input_name"></a> [name](#input\_name) | The name of the platform, a timestamp will be appended to this name to make the stack\_name. If not provided, the name of the directory will be used. | `string` | `""` | no |
| <a name="input_okta"></a> [okta](#input\_okta) | Okta configurations | <pre>object({<br> base_url = optional(string, "")<br> secrets_manager_secret_name = optional(string, "")<br> kubernetes_secret_name = optional(string, "okta")<br> })</pre> | `{}` | no |
Expand Down
8 changes: 7 additions & 1 deletion addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,17 @@ module "addons" {
# This just means annotations are needed for the service to use the aws load balancer controller
set = [{
name = "enableServiceMutatorWebhook"
value = "false"
value = "true"
}, {
name = "serviceMutatorWebhookConfig"
value = "Ignore"
}, {
name = "replicaCount"
value = 2
}, {
name = "enableServiceMonitor"
value = var.enable_prometheus_stack
}, {
name = "clusterSecretsPermissions.allowAllSecrets"
value = "true" # enables Okta integration by reading client id and secret from K8s secrets
}]
Expand Down
28 changes: 0 additions & 28 deletions examples/complete/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -104,34 +104,6 @@ module "k8s_platform" {

enable_downscaler = true

enable_pagerduty = true
pagerduty = {
secrets_manager_secret_name = "dai/platform/pagerduty"
}

enable_okta = true
okta = {
base_url = "https://login.tx.group"
secrets_manager_secret_name = "dai/platform/okta"
}

base_domain = "dai.tx.group"

enable_acm_certificate = true
acm_certificate = {
subject_alternative_names = [
"prometheus",
"alertmanager",
"grafana",
]
wildcard_certificates = true
}

fluent_log_annotation = {
name = ""
value = ""
}

enable_amp = true

}
30 changes: 0 additions & 30 deletions files/helm/prometheus/alertmanager-template.yaml

This file was deleted.

99 changes: 99 additions & 0 deletions files/helm/prometheus/alertmanager-templates.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
alertmanager:
templateFiles:
description.tmpl: |-
{{ define "common.description" }}
CLUSTER: [
{{- .Status | toUpper }}
{{- if eq .Status "firing" }}
:{{ .Alerts.Firing | len }}
{{- end }}
]
{{- .GroupLabels.SortedPairs.Values | join " " }}
{{- if gt (len .CommonLabels) (len .GroupLabels) }}
({{ with .CommonLabels.Remove .GroupLabels.Names }}
{{ .Values | join " " }}
{{- end }})
{{- end }}
{{ end }}
slack.tmpl: |-
{{/* Alertmanager Silence link */}}
{{ define "__alert_silence_link" -}}
{{ .ExternalURL }}/#/silences/new?filter=%7B
{{- range .CommonLabels.SortedPairs -}}
{{- if ne .Name "alertname" -}}
{{- .Name }}%3D"{{- .Value -}}"%2C%20
{{- end -}}
{{- end -}}
alertname%3D"{{- .CommonLabels.alertname -}}"%7D
{{- end }}
{{/* Severity of the alert */}}
{{ define "__alert_severity" -}}
{{- if eq .CommonLabels.severity "critical" -}}
*Severity:* `Critical`
{{- else if eq .CommonLabels.severity "warning" -}}
*Severity:* `Warning`
{{- else if eq .CommonLabels.severity "info" -}}
*Severity:* `Info`
{{- else -}}
*Severity:* :question: {{ .CommonLabels.severity }}
{{- end }}
{{- end }}
{{/* Title of the Slack alert */}}
{{ define "slack.title" -}}
[{{ .Status | toUpper -}}
{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
] {{ .CommonLabels.alertname }}
{{- end }}
{{/* Color of Slack attachment (appears as line next to alert )*/}}
{{ define "slack.color" -}}
{{ if eq .Status "firing" -}}
{{ if eq .CommonLabels.severity "warning" -}}
warning
{{- else if eq .CommonLabels.severity "critical" -}}
danger
{{- else -}}
#439FE0
{{- end -}}
{{ else -}}
good
{{- end }}
{{- end }}
{{/* The text to display in the alert */}}
# Should use commonLabels here
{{ define "slack.text" -}}
{{ template "__alert_severity" . }}
{{- if CommonLabels.environment }}
{{- "\n" -}}
*Environment:* {{ CommonLabels.environment }}
{{- end }}
{{- if CommonLabels.team }}
{{- "\n" -}}
*Team:* {{ CommonLabels.team }}
{{- end }}
{{- if CommonLabels.cluster }}
{{- "\n" -}}
*Cluster:* {{ CommonLabels.cluster }}
{{- end }}
{{- if (index .Alerts 0).Annotations.summary }}
{{- "\n" -}}
*Summary:* {{ (index .Alerts 0).Annotations.summary }}
{{- end }}
{{- range .Alerts }}
{{- if .Annotations.description }}
{{- "\n" -}}
{{ .Annotations.description }}
{{- "\n" -}}
{{- end }}
{{- if .Annotations.message }}
{{- "\n" -}}
{{ .Annotations.message }}
{{- "\n" -}}
{{- end }}
{{- end }}
{{- end }}
85 changes: 55 additions & 30 deletions monitoring.tf
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,24 @@ module "prometheus_stack" {
# https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml
values = [
file("${path.module}/files/helm/prometheus/common.yaml"),
file("${path.module}/files/helm/prometheus/alertmanager-template.yaml"),
file("${path.module}/files/helm/prometheus/alertmanager-templates.yaml"),
<<-EOT
defaultRules:
create: true
rules:
# Disable EKS managed services
etcd: false
kubeApiserverAvailability: false
kubeApiserverBurnrate: false
kubeApiserverHistogram: false
kubeApiserverSlos: false
kubeControllerManager: false
# We dont support windows
windows: false
labels:
cluster: ${local.stack_name}
environment: ${var.metadata.environment}
team: ${var.metadata.team}
prometheus:
serviceAccount:
annotations:
Expand Down Expand Up @@ -290,10 +306,6 @@ module "prometheus_stack" {
capacity: 2500
%{endif}
alertmanager:
defaultRules:
labels:
cluster: ${local.stack_name}
# environment: foo
ingress:
enabled: ${var.enable_okta}
ingressClassName: alb
Expand All @@ -320,6 +332,8 @@ module "prometheus_stack" {
- ${var.slack.kubernetes_secret_name}
%{endif}
config:
global:
slack_api_url: https://slack.com/api/chat.postMessage
route:
receiver: "null"
group_by: [...]
Expand All @@ -332,6 +346,12 @@ module "prometheus_stack" {
matchers:
- alertname="Watchdog"
continue: false
%{if var.enable_slack}
- receiver: it-pts-dai-monitoring
matchers:
- severity=~"info|warning|critical"
continue: true
%{endif}
%{if var.enable_pagerduty}
- receiver: pagerduty-critical
matchers:
Expand All @@ -346,14 +366,35 @@ module "prometheus_stack" {
- severity="info"
continue: false
%{endif}
%{if var.enable_slack}
- receiver: it-pts-dai-monitoring
matchers:
- severity=~"info|warning|critical"
continue: false
%{endif}
receivers:
- name: "null"
%{if var.enable_slack}
- name: it-pts-dai-monitoring
slack_configs:
- send_resolved: true
api_url_file: /etc/alertmanager/secrets/${var.slack.kubernetes_secret_name}/it_pts_dai_monitoring
http_config:
follow_redirects: true
enable_http2: true
color: '{{ template "slack.color" . }}'
title: '{{ template "slack.title" . }}'
text: '{{ template "slack.text" . }}'

channel: '#it_pts_dai_monitoring'
actions:
- type: button
text: 'Runbook :green_book:'
url: '{{ (index .Alerts 0).Annotations.runbook_url }}'
- type: button
text: 'Query :mag:'
url: '{{ (index .Alerts 0).GeneratorURL }}'
- type: button
text: 'Dashboard :chart_with_upwards_trend:'
url: '{{ (index .Alerts 0).Annotations.dashboard_url }}'
- type: button
text: 'Silence :no_bell:'
url: '{{ template "__alert_silence_link" . }}'
%{endif}
%{if var.enable_pagerduty}
- name: pagerduty-critical
pagerduty_configs:
Expand All @@ -365,7 +406,7 @@ module "prometheus_stack" {
url: https://events.pagerduty.com/v2/enqueue
client: '{{ template "pagerduty.default.client" . }}'
client_url: '{{ template "pagerduty.default.clientURL" . }}'
description: '{{ template "pagerduty.default.description" .}}'
description: '{{ template "pagerduty.default.description" . }}'
details:
alertname: '{{ .CommonLabels.alertname }}'
description: '{{ .CommonAnnotations.description }}'
Expand All @@ -387,7 +428,7 @@ module "prometheus_stack" {
url: https://events.pagerduty.com/v2/enqueue
client: '{{ template "pagerduty.default.client" . }}'
client_url: '{{ template "pagerduty.default.clientURL" . }}'
description: '{{ template "pagerduty.default.description" .}}'
description: '{{ template "pagerduty.default.description" . }}'
details:
alertname: '{{ .CommonLabels.alertname }}'
description: '{{ .CommonAnnotations.description }}'
Expand All @@ -409,7 +450,7 @@ module "prometheus_stack" {
url: https://events.pagerduty.com/v2/enqueue
client: '{{ template "pagerduty.default.client" . }}'
client_url: '{{ template "pagerduty.default.clientURL" . }}'
description: '{{ template "pagerduty.default.description" .}}'
description: '{{ template "pagerduty.default.description" . }}'
details:
alertname: '{{ .CommonLabels.alertname }}'
description: '{{ .CommonAnnotations.description }}'
Expand All @@ -422,22 +463,6 @@ module "prometheus_stack" {
source: '{{ template "pagerduty.default.client" . }}'
severity: info
%{endif}
%{if var.enable_slack}
- name: it-pts-dai-monitoring
slack_configs:
- send_resolved: true
api_url_file: /etc/alertmanager/secrets/${var.slack.kubernetes_secret_name}/it_pts_dai_monitoring
http_config:
follow_redirects: true
enable_http2: true
channel: '#it-pts-dai-monitoring'
title: '{{ template "slack.default.title" . }}'
text: '{{ template "slack.default.text" . }}'
footer: '{{ template "slack.default.footer" . }}'
icon_url: '{{ template "slack.default.iconURL" . }}'
username: '{{ template "slack.default.username" . }}'
color: '{{ template "slack.default.color" . }}'
%{endif}
EOT
]

Expand Down
7 changes: 6 additions & 1 deletion tests/main/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ module "k8s_platform" {
}
}

metadata = {
environment = "sandbox"
team = "dai"
}

tags = {
Environment = "sandbox"
GithubRepo = "terraform-aws-kubernetes-platform"
Expand Down Expand Up @@ -132,7 +137,7 @@ module "k8s_platform" {

enable_downscaler = true

enable_pagerduty = true
enable_pagerduty = false
pagerduty = {
secrets_manager_secret_name = "dai/platform/pagerduty"
}
Expand Down
9 changes: 9 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
variable "metadata" {
description = "Metadata for the platform"
type = object({
environment = optional(string, "")
team = optional(string, "")
})
default = {}
}

variable "create_addons" {
description = "Create the platform addons. if set to false, no addons will be created"
type = bool
Expand Down

0 comments on commit 7e7faf4

Please sign in to comment.