Skip to content

Commit

Permalink
Merge branch 'main' into managed-prometheus-collector
Browse files Browse the repository at this point in the history
  • Loading branch information
bonclay7 authored Mar 19, 2024
2 parents 235a22f + 3c02872 commit 27060a5
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 129 deletions.
1 change: 1 addition & 0 deletions docs/contributors.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The core team include the following people:
* Michael Hausenblas
* Rodrigue Koffi
* Toshal Dudhwala
* Vikram Venkataraman

We welcome the wider open source community and thank [those who contribute](https://github.com/aws-observability/terraform-aws-observability-accelerator/graphs/contributors)
to this project.
Expand Down
3 changes: 2 additions & 1 deletion docs/ecs/ecs-monitoring-on-ec2.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This example demonstrates how to monitor your Amazon Elastic Container Service o
(Amazon ECS) cluster with the Observability Accelerator's ECS monitoring module

The module collects Prometheus metrics from tasks running on ECS and sends it to Prometheus using AWS Distro for OpenTelemetry Collector (ADOT).

You can either run the collector as a sidecar or deploy the collector as its own ECS service for entire cluster.
ECS tasks with Prometheus endpoints are discovered using extension
[ecsobserver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/ecsobserver/README.md).
Expand All @@ -26,7 +27,7 @@ Make sure to update your exisitng Application Task Definitions based on the work

## Setup

#### 1. Add the ECS Monitoring Module to your exisitng ECS CLuster
#### 1. Add the ECS Monitoring Module to your exisitng ECS Cluster

```
module "ecs_monitoring" {
Expand Down
19 changes: 10 additions & 9 deletions modules/eks-monitoring/README.md

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion modules/eks-monitoring/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ locals {
grafana_dashboard_url = "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/java/default.json"
}

nginx_pattern_config = {
nginx_pattern_config_defaults = {
# disabled if options from module are disabled, by default
# can be overriden by providing a config
enable_alerting_rules = var.enable_alerting_rules
Expand All @@ -83,6 +83,11 @@ locals {
grafana_dashboard_url = "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/nginx/nginx.json"
}

nginx_pattern_config = {
# Merge input variable with defaults and rebuild with non-null values
for k, v in merge(local.nginx_pattern_config_defaults, var.nginx_config) : k => v != null ? v : local.nginx_pattern_config_defaults[k]
}

istio_pattern_config = {
# disabled if options from module are disabled, by default
# can be overriden by providing a config
Expand Down
10 changes: 7 additions & 3 deletions modules/eks-monitoring/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ module "helm_addon" {
name = "adotLoglevel"
value = var.adot_loglevel
},
{
name = "adotServiceTelemetryLoglevel"
value = var.adot_service_telemetry_loglevel
},
{
name = "accountId"
value = local.context.aws_caller_identity_account_id
Expand Down Expand Up @@ -192,11 +196,11 @@ module "helm_addon" {
},
{
name = "nginxScrapeSampleLimit"
value = try(var.nginx_config.scrape_sample_limit, local.nginx_pattern_config.scrape_sample_limit)
value = local.nginx_pattern_config.scrape_sample_limit
},
{
name = "nginxPrometheusMetricsEndpoint"
value = try(var.nginx_config.prometheus_metrics_endpoint, local.nginx_pattern_config.prometheus_metrics_endpoint)
value = local.nginx_pattern_config.prometheus_metrics_endpoint
},
{
name = "enableIstio"
Expand Down Expand Up @@ -257,7 +261,7 @@ module "nginx_monitoring" {
source = "./patterns/nginx"
count = var.enable_nginx ? 1 : 0

pattern_config = coalesce(var.nginx_config, local.nginx_pattern_config)
pattern_config = local.nginx_pattern_config
}

module "istio_monitoring" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ spec:
exporters: [logging, prometheusremotewrite]
{{ if .Values.enableAdotcollectorMetrics }}
metrics/1:
receivers: [prometheus/1]
processors: []
exporters: [prometheusremotewrite]
receivers: [prometheus/1]
processors: []
exporters: [prometheusremotewrite]
{{ end }}
{{ if .Values.enableTracing }}
traces:
Expand All @@ -276,4 +276,6 @@ spec:
metrics:
address: 0.0.0.0:8888
level: basic
logs:
level: {{ .Values.adotServiceTelemetryLoglevel }}
{{ end }}
1 change: 1 addition & 0 deletions modules/eks-monitoring/otel-config/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ istioScrapeSampleLimit: ${istio_scrape_sample_limit}
istioPrometheusMetricsEndpoint: ${istio_prometheus_metrics_endpoint}

adotLoglevel: ${adot_loglevel}
adotServiceTelemetryLoglevel: ${adot_service_telemetry_loglevel}

enableAdotcollectorMetrics: ${enable_adotcollector_metrics}

Expand Down
186 changes: 75 additions & 111 deletions modules/eks-monitoring/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,23 @@ variable "irsa_iam_additional_policies" {
}

variable "adot_loglevel" {
description = "Verbosity level for ADOT collector logs. This accepts (detailed|normal|basic), see https://aws-otel.github.io/docs/components/misc-exporters for mor infos."
description = "Verbosity level for ADOT collector logs. This accepts (detailed|normal|basic), see https://aws-otel.github.io/docs/components/misc-exporters for more info."
type = string
default = "normal"
}

variable "adot_service_telemetry_loglevel" {
description = "Verbosity level for ADOT service telemetry logs. See https://opentelemetry.io/docs/collector/configuration/#telemetry for more info."
type = string
default = "INFO"
}

variable "managed_prometheus_workspace_endpoint" {
description = "Amazon Managed Prometheus Workspace Endpoint"
type = string
default = ""
}

variable "managed_prometheus_workspace_id" {
description = "Amazon Managed Prometheus Workspace ID"
type = string
Expand Down Expand Up @@ -138,32 +150,20 @@ variable "enable_kube_state_metrics" {
variable "ksm_config" {
description = "Kube State metrics configuration"
type = object({
create_namespace = bool
k8s_namespace = string
helm_chart_name = string
helm_chart_version = string
helm_release_name = string
helm_repo_url = string
helm_settings = map(string)
helm_values = map(any)

scrape_interval = string
scrape_timeout = string
create_namespace = optional(bool, true)
k8s_namespace = optional(string, "kube-system")
helm_chart_name = optional(string, "kube-state-metrics")
helm_chart_version = optional(string, "5.15.2")
helm_release_name = optional(string, "kube-state-metrics")
helm_repo_url = optional(string, "https://prometheus-community.github.io/helm-charts")
helm_settings = optional(map(string), {})
helm_values = optional(map(any), {})

scrape_interval = optional(string, "60s")
scrape_timeout = optional(string, "15s")
})

default = {
create_namespace = true
helm_chart_name = "kube-state-metrics"
helm_chart_version = "5.15.2"
helm_release_name = "kube-state-metrics"
helm_repo_url = "https://prometheus-community.github.io/helm-charts"
helm_settings = {}
helm_values = {}
k8s_namespace = "kube-system"

scrape_interval = "60s"
scrape_timeout = "15s"
}
default = {}
nullable = false
}

Expand All @@ -176,32 +176,20 @@ variable "enable_node_exporter" {
variable "ne_config" {
description = "Node exporter configuration"
type = object({
create_namespace = bool
k8s_namespace = string
helm_chart_name = string
helm_chart_version = string
helm_release_name = string
helm_repo_url = string
helm_settings = map(string)
helm_values = map(any)

scrape_interval = string
scrape_timeout = string
create_namespace = optional(bool, true)
k8s_namespace = optional(string, "prometheus-node-exporter")
helm_chart_name = optional(string, "prometheus-node-exporter")
helm_chart_version = optional(string, "4.24.0")
helm_release_name = optional(string, "prometheus-node-exporter")
helm_repo_url = optional(string, "https://prometheus-community.github.io/helm-charts")
helm_settings = optional(map(string), {})
helm_values = optional(map(any), {})

scrape_interval = optional(string, "60s")
scrape_timeout = optional(string, "60s")
})

default = {
create_namespace = true
helm_chart_name = "prometheus-node-exporter"
helm_chart_version = "4.24.0"
helm_release_name = "prometheus-node-exporter"
helm_repo_url = "https://prometheus-community.github.io/helm-charts"
helm_settings = {}
helm_values = {}
k8s_namespace = "prometheus-node-exporter"

scrape_interval = "60s"
scrape_timeout = "60s"
}
default = {}
nullable = false
}

Expand All @@ -214,14 +202,11 @@ variable "tags" {
variable "prometheus_config" {
description = "Controls default values such as scrape interval, timeouts and ports globally"
type = object({
global_scrape_interval = string
global_scrape_timeout = string
global_scrape_interval = optional(string, "120s")
global_scrape_timeout = optional(string, "15s")
})

default = {
global_scrape_interval = "120s"
global_scrape_timeout = "15s"
}
default = {}
nullable = false
}

Expand Down Expand Up @@ -260,18 +245,14 @@ variable "enable_tracing" {
variable "tracing_config" {
description = "Configuration object for traces collection to AWS X-Ray"
type = object({
otlp_grpc_endpoint = string
otlp_http_endpoint = string
send_batch_size = number
timeout = string
otlp_grpc_endpoint = optional(string, "0.0.0.0:4317")
otlp_http_endpoint = optional(string, "0.0.0.0:4318")
send_batch_size = optional(number, 50)
timeout = optional(string, "30s")
})

default = {
otlp_grpc_endpoint = "0.0.0.0:4317"
otlp_http_endpoint = "0.0.0.0:4318"
send_batch_size = 50
timeout = "30s"
}
default = {}
nullable = false
}

variable "enable_custom_metrics" {
Expand Down Expand Up @@ -330,28 +311,27 @@ variable "enable_nginx" {
default = false
}


variable "nginx_config" {
description = "Configuration object for NGINX monitoring"
type = object({
enable_alerting_rules = bool
enable_recording_rules = bool
enable_dashboards = bool
scrape_sample_limit = number
enable_alerting_rules = optional(bool)
enable_recording_rules = optional(bool)
enable_dashboards = optional(bool)
scrape_sample_limit = optional(number)

flux_gitrepository_name = string
flux_gitrepository_url = string
flux_gitrepository_branch = string
flux_kustomization_name = string
flux_kustomization_path = string
flux_gitrepository_name = optional(string)
flux_gitrepository_url = optional(string)
flux_gitrepository_branch = optional(string)
flux_kustomization_name = optional(string)
flux_kustomization_path = optional(string)

grafana_dashboard_url = string
grafana_dashboard_url = optional(string)

prometheus_metrics_endpoint = string
prometheus_metrics_endpoint = optional(string)
})

# defaults are pre-computed in locals.tf, provide a full definition to override
default = null
# defaults are pre-computed in locals.tf
default = {}
}

variable "enable_istio" {
Expand Down Expand Up @@ -417,26 +397,17 @@ variable "enable_fluxcd" {
variable "flux_config" {
description = "FluxCD configuration"
type = object({
create_namespace = bool
k8s_namespace = string
helm_chart_name = string
helm_chart_version = string
helm_release_name = string
helm_repo_url = string
helm_settings = map(string)
helm_values = map(any)
create_namespace = optional(bool, true)
k8s_namespace = optional(string, "flux-system")
helm_chart_name = optional(string, "flux2")
helm_chart_version = optional(string, "2.12.2")
helm_release_name = optional(string, "observability-fluxcd-addon")
helm_repo_url = optional(string, "https://fluxcd-community.github.io/helm-charts")
helm_settings = optional(map(string), {})
helm_values = optional(map(any), {})
})

default = {
create_namespace = true
helm_chart_name = "flux2"
helm_chart_version = "2.12.2"
helm_release_name = "observability-fluxcd-addon"
helm_repo_url = "https://fluxcd-community.github.io/helm-charts"
helm_settings = {}
helm_values = {}
k8s_namespace = "flux-system"
}
default = {}
nullable = false
}

Expand All @@ -449,22 +420,15 @@ variable "enable_grafana_operator" {
variable "go_config" {
description = "Grafana Operator configuration"
type = object({
create_namespace = bool
helm_chart = string
helm_name = string
k8s_namespace = string
helm_release_name = string
helm_chart_version = string
create_namespace = optional(bool, true)
helm_chart = optional(string, "oci://ghcr.io/grafana-operator/helm-charts/grafana-operator")
helm_name = optional(string, "grafana-operator")
k8s_namespace = optional(string, "grafana-operator")
helm_release_name = optional(string, "grafana-operator")
helm_chart_version = optional(string, "v5.5.2")
})

default = {
create_namespace = true
helm_chart = "oci://ghcr.io/grafana-operator/helm-charts/grafana-operator"
helm_name = "grafana-operator"
k8s_namespace = "grafana-operator"
helm_release_name = "grafana-operator"
helm_chart_version = "v5.5.2"
}
default = {}
nullable = false
}

Expand Down
2 changes: 1 addition & 1 deletion modules/eks-monitoring/versions.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
terraform {
required_version = ">= 1.1.0"
required_version = ">= 1.3.0"

required_providers {
aws = {
Expand Down

0 comments on commit 27060a5

Please sign in to comment.