diff --git a/README.md b/README.md index 73a6198..536ad37 100644 --- a/README.md +++ b/README.md @@ -209,6 +209,7 @@ as described in the `.pre-commit-config.yaml` file | [grafana](#input\_grafana) | Grafana configurations, used to override default configurations | `any` | `{}` | no | | [ingress\_nginx](#input\_ingress\_nginx) | Ingress Nginx configurations | `any` | `{}` | no | | [karpenter](#input\_karpenter) | Karpenter configurations | `any` | `{}` | no | +| [metadata](#input\_metadata) | Metadata for the platform |
object({| `{}` | no | | [metrics\_server](#input\_metrics\_server) | Metrics Server configurations | `any` | `{}` | no | | [name](#input\_name) | The name of the platform, a timestamp will be appended to this name to make the stack\_name. If not provided, the name of the directory will be used. | `string` | `""` | no | | [okta](#input\_okta) | Okta configurations |
environment = optional(string, "")
team = optional(string, "")
})
object({| `{}` | no | diff --git a/addons.tf b/addons.tf index e78c0da..1b4e6cd 100644 --- a/addons.tf +++ b/addons.tf @@ -81,11 +81,17 @@ module "addons" { # This just means annotations are needed for the service to use the aws load balancer controller set = [{ name = "enableServiceMutatorWebhook" - value = "false" + value = "true" + }, { + name = "serviceMutatorWebhookConfig" + value = "Ignore" }, { name = "replicaCount" value = 2 }, { + name = "enableServiceMonitor" + value = var.enable_prometheus_stack + }, { name = "clusterSecretsPermissions.allowAllSecrets" value = "true" # enables Okta integration by reading client id and secret from K8s secrets }] diff --git a/examples/complete/main.tf b/examples/complete/main.tf index e662f81..10e3c23 100644 --- a/examples/complete/main.tf +++ b/examples/complete/main.tf @@ -104,34 +104,6 @@ module "k8s_platform" { enable_downscaler = true - enable_pagerduty = true - pagerduty = { - secrets_manager_secret_name = "dai/platform/pagerduty" - } - - enable_okta = true - okta = { - base_url = "https://login.tx.group" - secrets_manager_secret_name = "dai/platform/okta" - } - - base_domain = "dai.tx.group" - - enable_acm_certificate = true - acm_certificate = { - subject_alternative_names = [ - "prometheus", - "alertmanager", - "grafana", - ] - wildcard_certificates = true - } - - fluent_log_annotation = { - name = "" - value = "" - } - enable_amp = true } diff --git a/files/helm/prometheus/alertmanager-template.yaml b/files/helm/prometheus/alertmanager-template.yaml deleted file mode 100644 index 3602765..0000000 --- a/files/helm/prometheus/alertmanager-template.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# alertmanager: -# templateFiles: -# pagerduty.tmpl: |- -# {{ define "pagerduty.summary" }} -# {{- $root := . -}} -# {{ $routingKey := (readFile "/path/to/routing_key.txt") | trimSpace }} -# {{ range .Alerts }} -# { -# "routing_key": "{{ $routingKey }}", -# "event_action": "trigger", -# "payload": { -# "summary": "{{ .Annotations.summary }}", -# "severity": "{{ .Labels.severity }}", -# "source": "{{ .ExternalURL }}", -# "component": "{{ .Labels.component }}", -# "group": "{{ template "cluster" $root }}", -# "class": "{{ .Labels.alertname }}", -# "custom_details": { -# "description": "{{ .Annotations.description }}", -# "runbook": "{{ .Annotations.runbook }}", -# "graph_link": "{{ .GeneratorURL }}" -# } -# } -# } -# {{ end }} -# {{ end }} - -# {{ define "cluster" }} -# {{ .ExternalURL | reReplaceAll ".*alertmanager\\.(.*)" "$1" }} -# {{ end }} diff --git a/files/helm/prometheus/alertmanager-templates.yaml b/files/helm/prometheus/alertmanager-templates.yaml new file mode 100644 index 0000000..8ec838f --- /dev/null +++ b/files/helm/prometheus/alertmanager-templates.yaml @@ -0,0 +1,99 @@ +alertmanager: + templateFiles: + description.tmpl: |- + {{ define "common.description" }} + CLUSTER: [ + {{- .Status | toUpper }} + {{- if eq .Status "firing" }} + :{{ .Alerts.Firing | len }} + {{- end }} + ] + + {{- .GroupLabels.SortedPairs.Values | join " " }} + + {{- if gt (len .CommonLabels) (len .GroupLabels) }} + ({{ with .CommonLabels.Remove .GroupLabels.Names }} + {{ .Values | join " " }} + {{- end }}) + {{- end }} + {{ end }} + slack.tmpl: |- + {{/* Alertmanager Silence link */}} + {{ define "__alert_silence_link" -}} + {{ .ExternalURL }}/#/silences/new?filter=%7B + {{- range .CommonLabels.SortedPairs -}} + {{- if ne .Name "alertname" -}} + {{- .Name }}%3D"{{- .Value -}}"%2C%20 + {{- end -}} + {{- end -}} + alertname%3D"{{- .CommonLabels.alertname -}}"%7D + {{- end }} + + {{/* Severity of the alert */}} + {{ define "__alert_severity" -}} + {{- if eq .CommonLabels.severity "critical" -}} + *Severity:* `Critical` + {{- else if eq .CommonLabels.severity "warning" -}} + *Severity:* `Warning` + {{- else if eq .CommonLabels.severity "info" -}} + *Severity:* `Info` + {{- else -}} + *Severity:* :question: {{ .CommonLabels.severity }} + {{- end }} + {{- end }} + + {{/* Title of the Slack alert */}} + {{ define "slack.title" -}} + [{{ .Status | toUpper -}} + {{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}} + ] {{ .CommonLabels.alertname }} + {{- end }} + + {{/* Color of Slack attachment (appears as line next to alert )*/}} + {{ define "slack.color" -}} + {{ if eq .Status "firing" -}} + {{ if eq .CommonLabels.severity "warning" -}} + warning + {{- else if eq .CommonLabels.severity "critical" -}} + danger + {{- else -}} + #439FE0 + {{- end -}} + {{ else -}} + good + {{- end }} + {{- end }} + + {{/* The text to display in the alert */}} + # Should use commonLabels here + {{ define "slack.text" -}} + {{ template "__alert_severity" . }} + {{- if CommonLabels.environment }} + {{- "\n" -}} + *Environment:* {{ CommonLabels.environment }} + {{- end }} + {{- if CommonLabels.team }} + {{- "\n" -}} + *Team:* {{ CommonLabels.team }} + {{- end }} + {{- if CommonLabels.cluster }} + {{- "\n" -}} + *Cluster:* {{ CommonLabels.cluster }} + {{- end }} + {{- if (index .Alerts 0).Annotations.summary }} + {{- "\n" -}} + *Summary:* {{ (index .Alerts 0).Annotations.summary }} + {{- end }} + {{- range .Alerts }} + {{- if .Annotations.description }} + {{- "\n" -}} + {{ .Annotations.description }} + {{- "\n" -}} + {{- end }} + {{- if .Annotations.message }} + {{- "\n" -}} + {{ .Annotations.message }} + {{- "\n" -}} + {{- end }} + {{- end }} + {{- end }} diff --git a/monitoring.tf b/monitoring.tf index 8e8af02..e24e81a 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -253,8 +253,24 @@ module "prometheus_stack" { # https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml values = [ file("${path.module}/files/helm/prometheus/common.yaml"), - file("${path.module}/files/helm/prometheus/alertmanager-template.yaml"), + file("${path.module}/files/helm/prometheus/alertmanager-templates.yaml"), <<-EOT + defaultRules: + create: true + rules: + # Disable EKS managed services + etcd: false + kubeApiserverAvailability: false + kubeApiserverBurnrate: false + kubeApiserverHistogram: false + kubeApiserverSlos: false + kubeControllerManager: false + # We dont support windows + windows: false + labels: + cluster: ${local.stack_name} + environment: ${var.metadata.environment} + team: ${var.metadata.team} prometheus: serviceAccount: annotations: @@ -290,10 +306,6 @@ module "prometheus_stack" { capacity: 2500 %{endif} alertmanager: - defaultRules: - labels: - cluster: ${local.stack_name} - # environment: foo ingress: enabled: ${var.enable_okta} ingressClassName: alb @@ -320,6 +332,8 @@ module "prometheus_stack" { - ${var.slack.kubernetes_secret_name} %{endif} config: + global: + slack_api_url: https://slack.com/api/chat.postMessage route: receiver: "null" group_by: [...] @@ -332,6 +346,12 @@ module "prometheus_stack" { matchers: - alertname="Watchdog" continue: false + %{if var.enable_slack} + - receiver: it-pts-dai-monitoring + matchers: + - severity=~"info|warning|critical" + continue: true + %{endif} %{if var.enable_pagerduty} - receiver: pagerduty-critical matchers: @@ -346,14 +366,35 @@ module "prometheus_stack" { - severity="info" continue: false %{endif} - %{if var.enable_slack} - - receiver: it-pts-dai-monitoring - matchers: - - severity=~"info|warning|critical" - continue: false - %{endif} receivers: - name: "null" + %{if var.enable_slack} + - name: it-pts-dai-monitoring + slack_configs: + - send_resolved: true + api_url_file: /etc/alertmanager/secrets/${var.slack.kubernetes_secret_name}/it_pts_dai_monitoring + http_config: + follow_redirects: true + enable_http2: true + color: '{{ template "slack.color" . }}' + title: '{{ template "slack.title" . }}' + text: '{{ template "slack.text" . }}' + + channel: '#it_pts_dai_monitoring' + actions: + - type: button + text: 'Runbook :green_book:' + url: '{{ (index .Alerts 0).Annotations.runbook_url }}' + - type: button + text: 'Query :mag:' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: 'Dashboard :chart_with_upwards_trend:' + url: '{{ (index .Alerts 0).Annotations.dashboard_url }}' + - type: button + text: 'Silence :no_bell:' + url: '{{ template "__alert_silence_link" . }}' + %{endif} %{if var.enable_pagerduty} - name: pagerduty-critical pagerduty_configs: @@ -365,7 +406,7 @@ module "prometheus_stack" { url: https://events.pagerduty.com/v2/enqueue client: '{{ template "pagerduty.default.client" . }}' client_url: '{{ template "pagerduty.default.clientURL" . }}' - description: '{{ template "pagerduty.default.description" .}}' + description: '{{ template "pagerduty.default.description" . }}' details: alertname: '{{ .CommonLabels.alertname }}' description: '{{ .CommonAnnotations.description }}' @@ -387,7 +428,7 @@ module "prometheus_stack" { url: https://events.pagerduty.com/v2/enqueue client: '{{ template "pagerduty.default.client" . }}' client_url: '{{ template "pagerduty.default.clientURL" . }}' - description: '{{ template "pagerduty.default.description" .}}' + description: '{{ template "pagerduty.default.description" . }}' details: alertname: '{{ .CommonLabels.alertname }}' description: '{{ .CommonAnnotations.description }}' @@ -409,7 +450,7 @@ module "prometheus_stack" { url: https://events.pagerduty.com/v2/enqueue client: '{{ template "pagerduty.default.client" . }}' client_url: '{{ template "pagerduty.default.clientURL" . }}' - description: '{{ template "pagerduty.default.description" .}}' + description: '{{ template "pagerduty.default.description" . }}' details: alertname: '{{ .CommonLabels.alertname }}' description: '{{ .CommonAnnotations.description }}' @@ -422,22 +463,6 @@ module "prometheus_stack" { source: '{{ template "pagerduty.default.client" . }}' severity: info %{endif} - %{if var.enable_slack} - - name: it-pts-dai-monitoring - slack_configs: - - send_resolved: true - api_url_file: /etc/alertmanager/secrets/${var.slack.kubernetes_secret_name}/it_pts_dai_monitoring - http_config: - follow_redirects: true - enable_http2: true - channel: '#it-pts-dai-monitoring' - title: '{{ template "slack.default.title" . }}' - text: '{{ template "slack.default.text" . }}' - footer: '{{ template "slack.default.footer" . }}' - icon_url: '{{ template "slack.default.iconURL" . }}' - username: '{{ template "slack.default.username" . }}' - color: '{{ template "slack.default.color" . }}' - %{endif} EOT ] diff --git a/tests/main/main.tf b/tests/main/main.tf index 999e898..39bf2b4 100644 --- a/tests/main/main.tf +++ b/tests/main/main.tf @@ -85,6 +85,11 @@ module "k8s_platform" { } } + metadata = { + environment = "sandbox" + team = "dai" + } + tags = { Environment = "sandbox" GithubRepo = "terraform-aws-kubernetes-platform" @@ -132,7 +137,7 @@ module "k8s_platform" { enable_downscaler = true - enable_pagerduty = true + enable_pagerduty = false pagerduty = { secrets_manager_secret_name = "dai/platform/pagerduty" } diff --git a/variables.tf b/variables.tf index 4847d8d..2c51f76 100644 --- a/variables.tf +++ b/variables.tf @@ -1,3 +1,12 @@ +variable "metadata" { + description = "Metadata for the platform" + type = object({ + environment = optional(string, "") + team = optional(string, "") + }) + default = {} +} + variable "create_addons" { description = "Create the platform addons. if set to false, no addons will be created" type = bool
base_url = optional(string, "")
secrets_manager_secret_name = optional(string, "")
kubernetes_secret_name = optional(string, "okta")
})