Merge branch 'main' into managed-prometheus-collector

aws-observability · Mar 19, 2024 · 27060a5 · 27060a5
2 parents 235a22f + 3c02872
commit 27060a5
Show file tree

Hide file tree

Showing 9 changed files with 108 additions and 129 deletions.
diff --git a/docs/contributors.md b/docs/contributors.md
@@ -16,6 +16,7 @@ The core team include the following people:
 * Michael Hausenblas
 * Rodrigue Koffi
 * Toshal Dudhwala
+* Vikram Venkataraman
 
 We welcome the wider open source community and thank [those who contribute](https://github.com/aws-observability/terraform-aws-observability-accelerator/graphs/contributors)
 to this project.

diff --git a/docs/ecs/ecs-monitoring-on-ec2.md b/docs/ecs/ecs-monitoring-on-ec2.md
@@ -4,6 +4,7 @@ This example demonstrates how to monitor your Amazon Elastic Container Service o
 (Amazon ECS) cluster with the Observability Accelerator's ECS monitoring module
 
 The module collects Prometheus metrics from tasks running on ECS and sends it to Prometheus using AWS Distro for OpenTelemetry Collector (ADOT).
+
 You can either run the collector as a sidecar or deploy the collector as its own ECS service for entire cluster.
 ECS tasks with Prometheus endpoints are discovered using extension
 [ecsobserver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/ecsobserver/README.md).
@@ -26,7 +27,7 @@ Make sure to update your exisitng Application Task Definitions based on the work
 
 ## Setup
 
-#### 1. Add the ECS Monitoring Module to your exisitng ECS CLuster
+#### 1. Add the ECS Monitoring Module to your exisitng ECS Cluster
 
 ```
 module "ecs_monitoring" {

diff --git a/modules/eks-monitoring/README.md b/modules/eks-monitoring/README.md
diff --git a/modules/eks-monitoring/locals.tf b/modules/eks-monitoring/locals.tf
@@ -62,7 +62,7 @@ locals {
     grafana_dashboard_url = "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/java/default.json"
   }
 
-  nginx_pattern_config = {
+  nginx_pattern_config_defaults = {
     # disabled if options from module are disabled, by default
     # can be overriden by providing a config
     enable_alerting_rules  = var.enable_alerting_rules
@@ -83,6 +83,11 @@ locals {
     grafana_dashboard_url = "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/nginx/nginx.json"
   }
 
+  nginx_pattern_config = {
+    # Merge input variable with defaults and rebuild with non-null values
+    for k, v in merge(local.nginx_pattern_config_defaults, var.nginx_config) : k => v != null ? v : local.nginx_pattern_config_defaults[k]
+  }
+
   istio_pattern_config = {
     # disabled if options from module are disabled, by default
     # can be overriden by providing a config

diff --git a/modules/eks-monitoring/main.tf b/modules/eks-monitoring/main.tf
@@ -138,6 +138,10 @@ module "helm_addon" {
       name  = "adotLoglevel"
       value = var.adot_loglevel
     },
+    {
+      name  = "adotServiceTelemetryLoglevel"
+      value = var.adot_service_telemetry_loglevel
+    },
     {
       name  = "accountId"
       value = local.context.aws_caller_identity_account_id
@@ -192,11 +196,11 @@ module "helm_addon" {
     },
     {
       name  = "nginxScrapeSampleLimit"
-      value = try(var.nginx_config.scrape_sample_limit, local.nginx_pattern_config.scrape_sample_limit)
+      value = local.nginx_pattern_config.scrape_sample_limit
     },
     {
       name  = "nginxPrometheusMetricsEndpoint"
-      value = try(var.nginx_config.prometheus_metrics_endpoint, local.nginx_pattern_config.prometheus_metrics_endpoint)
+      value = local.nginx_pattern_config.prometheus_metrics_endpoint
     },
     {
       name  = "enableIstio"
@@ -257,7 +261,7 @@ module "nginx_monitoring" {
   source = "./patterns/nginx"
   count  = var.enable_nginx ? 1 : 0
 
-  pattern_config = coalesce(var.nginx_config, local.nginx_pattern_config)
+  pattern_config = local.nginx_pattern_config
 }
 
 module "istio_monitoring" {

diff --git a/modules/eks-monitoring/otel-config/templates/opentelemetrycollector.yaml b/modules/eks-monitoring/otel-config/templates/opentelemetrycollector.yaml
@@ -261,9 +261,9 @@ spec:
           exporters: [logging, prometheusremotewrite]
         {{ if .Values.enableAdotcollectorMetrics }}
         metrics/1:
-         receivers: [prometheus/1]
-         processors: []
-         exporters: [prometheusremotewrite]
+          receivers: [prometheus/1]
+          processors: []
+          exporters: [prometheusremotewrite]
         {{ end }}
         {{ if .Values.enableTracing }}
         traces:
@@ -276,4 +276,6 @@ spec:
         metrics:
           address: 0.0.0.0:8888
           level: basic
+        logs:
+          level: {{ .Values.adotServiceTelemetryLoglevel }}
       {{ end }}
diff --git a/modules/eks-monitoring/otel-config/values.yaml b/modules/eks-monitoring/otel-config/values.yaml
@@ -32,6 +32,7 @@ istioScrapeSampleLimit: ${istio_scrape_sample_limit}
 istioPrometheusMetricsEndpoint: ${istio_prometheus_metrics_endpoint}
 
 adotLoglevel: ${adot_loglevel}
+adotServiceTelemetryLoglevel: ${adot_service_telemetry_loglevel}
 
 enableAdotcollectorMetrics: ${enable_adotcollector_metrics}
 

diff --git a/modules/eks-monitoring/variables.tf b/modules/eks-monitoring/variables.tf
@@ -58,11 +58,23 @@ variable "irsa_iam_additional_policies" {
 }
 
 variable "adot_loglevel" {
-  description = "Verbosity level for ADOT collector logs. This accepts (detailed|normal|basic), see https://aws-otel.github.io/docs/components/misc-exporters for mor infos."
+  description = "Verbosity level for ADOT collector logs. This accepts (detailed|normal|basic), see https://aws-otel.github.io/docs/components/misc-exporters for more info."
   type        = string
   default     = "normal"
 }
 
+variable "adot_service_telemetry_loglevel" {
+  description = "Verbosity level for ADOT service telemetry logs. See https://opentelemetry.io/docs/collector/configuration/#telemetry for more info."
+  type        = string
+  default     = "INFO"
+}
+
+variable "managed_prometheus_workspace_endpoint" {
+  description = "Amazon Managed Prometheus Workspace Endpoint"
+  type        = string
+  default     = ""
+}
+
 variable "managed_prometheus_workspace_id" {
   description = "Amazon Managed Prometheus Workspace ID"
   type        = string
@@ -138,32 +150,20 @@ variable "enable_kube_state_metrics" {
 variable "ksm_config" {
   description = "Kube State metrics configuration"
   type = object({
-    create_namespace   = bool
-    k8s_namespace      = string
-    helm_chart_name    = string
-    helm_chart_version = string
-    helm_release_name  = string
-    helm_repo_url      = string
-    helm_settings      = map(string)
-    helm_values        = map(any)
-
-    scrape_interval = string
-    scrape_timeout  = string
+    create_namespace   = optional(bool, true)
+    k8s_namespace      = optional(string, "kube-system")
+    helm_chart_name    = optional(string, "kube-state-metrics")
+    helm_chart_version = optional(string, "5.15.2")
+    helm_release_name  = optional(string, "kube-state-metrics")
+    helm_repo_url      = optional(string, "https://prometheus-community.github.io/helm-charts")
+    helm_settings      = optional(map(string), {})
+    helm_values        = optional(map(any), {})
+
+    scrape_interval = optional(string, "60s")
+    scrape_timeout  = optional(string, "15s")
   })
 
-  default = {
-    create_namespace   = true
-    helm_chart_name    = "kube-state-metrics"
-    helm_chart_version = "5.15.2"
-    helm_release_name  = "kube-state-metrics"
-    helm_repo_url      = "https://prometheus-community.github.io/helm-charts"
-    helm_settings      = {}
-    helm_values        = {}
-    k8s_namespace      = "kube-system"
-
-    scrape_interval = "60s"
-    scrape_timeout  = "15s"
-  }
+  default  = {}
   nullable = false
 }
 
@@ -176,32 +176,20 @@ variable "enable_node_exporter" {
 variable "ne_config" {
   description = "Node exporter configuration"
   type = object({
-    create_namespace   = bool
-    k8s_namespace      = string
-    helm_chart_name    = string
-    helm_chart_version = string
-    helm_release_name  = string
-    helm_repo_url      = string
-    helm_settings      = map(string)
-    helm_values        = map(any)
-
-    scrape_interval = string
-    scrape_timeout  = string
+    create_namespace   = optional(bool, true)
+    k8s_namespace      = optional(string, "prometheus-node-exporter")
+    helm_chart_name    = optional(string, "prometheus-node-exporter")
+    helm_chart_version = optional(string, "4.24.0")
+    helm_release_name  = optional(string, "prometheus-node-exporter")
+    helm_repo_url      = optional(string, "https://prometheus-community.github.io/helm-charts")
+    helm_settings      = optional(map(string), {})
+    helm_values        = optional(map(any), {})
+
+    scrape_interval = optional(string, "60s")
+    scrape_timeout  = optional(string, "60s")
   })
 
-  default = {
-    create_namespace   = true
-    helm_chart_name    = "prometheus-node-exporter"
-    helm_chart_version = "4.24.0"
-    helm_release_name  = "prometheus-node-exporter"
-    helm_repo_url      = "https://prometheus-community.github.io/helm-charts"
-    helm_settings      = {}
-    helm_values        = {}
-    k8s_namespace      = "prometheus-node-exporter"
-
-    scrape_interval = "60s"
-    scrape_timeout  = "60s"
-  }
+  default  = {}
   nullable = false
 }
 
@@ -214,14 +202,11 @@ variable "tags" {
 variable "prometheus_config" {
   description = "Controls default values such as scrape interval, timeouts and ports globally"
   type = object({
-    global_scrape_interval = string
-    global_scrape_timeout  = string
+    global_scrape_interval = optional(string, "120s")
+    global_scrape_timeout  = optional(string, "15s")
   })
 
-  default = {
-    global_scrape_interval = "120s"
-    global_scrape_timeout  = "15s"
-  }
+  default  = {}
   nullable = false
 }
 
@@ -260,18 +245,14 @@ variable "enable_tracing" {
 variable "tracing_config" {
   description = "Configuration object for traces collection to AWS X-Ray"
   type = object({
-    otlp_grpc_endpoint = string
-    otlp_http_endpoint = string
-    send_batch_size    = number
-    timeout            = string
+    otlp_grpc_endpoint = optional(string, "0.0.0.0:4317")
+    otlp_http_endpoint = optional(string, "0.0.0.0:4318")
+    send_batch_size    = optional(number, 50)
+    timeout            = optional(string, "30s")
   })
 
-  default = {
-    otlp_grpc_endpoint = "0.0.0.0:4317"
-    otlp_http_endpoint = "0.0.0.0:4318"
-    send_batch_size    = 50
-    timeout            = "30s"
-  }
+  default  = {}
+  nullable = false
 }
 
 variable "enable_custom_metrics" {
@@ -330,28 +311,27 @@ variable "enable_nginx" {
   default     = false
 }
 
-
 variable "nginx_config" {
   description = "Configuration object for NGINX monitoring"
   type = object({
-    enable_alerting_rules  = bool
-    enable_recording_rules = bool
-    enable_dashboards      = bool
-    scrape_sample_limit    = number
+    enable_alerting_rules  = optional(bool)
+    enable_recording_rules = optional(bool)
+    enable_dashboards      = optional(bool)
+    scrape_sample_limit    = optional(number)
 
-    flux_gitrepository_name   = string
-    flux_gitrepository_url    = string
-    flux_gitrepository_branch = string
-    flux_kustomization_name   = string
-    flux_kustomization_path   = string
+    flux_gitrepository_name   = optional(string)
+    flux_gitrepository_url    = optional(string)
+    flux_gitrepository_branch = optional(string)
+    flux_kustomization_name   = optional(string)
+    flux_kustomization_path   = optional(string)
 
-    grafana_dashboard_url = string
+    grafana_dashboard_url = optional(string)
 
-    prometheus_metrics_endpoint = string
+    prometheus_metrics_endpoint = optional(string)
   })
 
-  # defaults are pre-computed in locals.tf, provide a full definition to override
-  default = null
+  # defaults are pre-computed in locals.tf
+  default = {}
 }
 
 variable "enable_istio" {
@@ -417,26 +397,17 @@ variable "enable_fluxcd" {
 variable "flux_config" {
   description = "FluxCD configuration"
   type = object({
-    create_namespace   = bool
-    k8s_namespace      = string
-    helm_chart_name    = string
-    helm_chart_version = string
-    helm_release_name  = string
-    helm_repo_url      = string
-    helm_settings      = map(string)
-    helm_values        = map(any)
+    create_namespace   = optional(bool, true)
+    k8s_namespace      = optional(string, "flux-system")
+    helm_chart_name    = optional(string, "flux2")
+    helm_chart_version = optional(string, "2.12.2")
+    helm_release_name  = optional(string, "observability-fluxcd-addon")
+    helm_repo_url      = optional(string, "https://fluxcd-community.github.io/helm-charts")
+    helm_settings      = optional(map(string), {})
+    helm_values        = optional(map(any), {})
   })
 
-  default = {
-    create_namespace   = true
-    helm_chart_name    = "flux2"
-    helm_chart_version = "2.12.2"
-    helm_release_name  = "observability-fluxcd-addon"
-    helm_repo_url      = "https://fluxcd-community.github.io/helm-charts"
-    helm_settings      = {}
-    helm_values        = {}
-    k8s_namespace      = "flux-system"
-  }
+  default  = {}
   nullable = false
 }
 
@@ -449,22 +420,15 @@ variable "enable_grafana_operator" {
 variable "go_config" {
   description = "Grafana Operator configuration"
   type = object({
-    create_namespace   = bool
-    helm_chart         = string
-    helm_name          = string
-    k8s_namespace      = string
-    helm_release_name  = string
-    helm_chart_version = string
+    create_namespace   = optional(bool, true)
+    helm_chart         = optional(string, "oci://ghcr.io/grafana-operator/helm-charts/grafana-operator")
+    helm_name          = optional(string, "grafana-operator")
+    k8s_namespace      = optional(string, "grafana-operator")
+    helm_release_name  = optional(string, "grafana-operator")
+    helm_chart_version = optional(string, "v5.5.2")
   })
 
-  default = {
-    create_namespace   = true
-    helm_chart         = "oci://ghcr.io/grafana-operator/helm-charts/grafana-operator"
-    helm_name          = "grafana-operator"
-    k8s_namespace      = "grafana-operator"
-    helm_release_name  = "grafana-operator"
-    helm_chart_version = "v5.5.2"
-  }
+  default  = {}
   nullable = false
 }
 

diff --git a/modules/eks-monitoring/versions.tf b/modules/eks-monitoring/versions.tf
@@ -1,5 +1,5 @@
 terraform {
-  required_version = ">= 1.1.0"
+  required_version = ">= 1.3.0"
 
   required_providers {
     aws = {