aws-observability · RAMathews · Jan 18, 2024 · Jan 29, 2024 · Jan 29, 2024 · Jan 31, 2024
diff --git a/examples/eks-cross-account-with-central-amp/main.tf b/examples/eks-cross-account-with-central-amp/main.tf
@@ -43,7 +43,6 @@ module "eks_monitoring_one" {
   enable_managed_prometheus = false
 
   managed_prometheus_workspace_id       = module.managed_service_prometheus.workspace_id
-  managed_prometheus_workspace_endpoint = module.managed_service_prometheus.workspace_prometheus_endpoint
   managed_prometheus_workspace_region   = var.cluster_one.region
   managed_prometheus_cross_account_role = aws_iam_role.cross_account_amp_role.arn
   irsa_iam_additional_policies          = [aws_iam_policy.irsa_assume_role_policy_one.arn]
@@ -96,9 +95,8 @@ module "eks_monitoring_two" {
   # prevents the module to create a workspace
   enable_managed_prometheus = false
 
-  managed_prometheus_workspace_id       = module.managed_service_prometheus.workspace_id
-  managed_prometheus_workspace_endpoint = module.managed_service_prometheus.workspace_prometheus_endpoint
-  managed_prometheus_workspace_region   = var.cluster_two.region
+  managed_prometheus_workspace_id     = module.managed_service_prometheus.workspace_id
+  managed_prometheus_workspace_region = var.cluster_two.region
 
   managed_prometheus_cross_account_role = aws_iam_role.cross_account_amp_role.arn
   irsa_iam_additional_policies          = [aws_iam_policy.irsa_assume_role_policy_two.arn]

diff --git a/examples/eks-istio/README.md b/examples/eks-istio/README.md
@@ -54,4 +54,5 @@ View the full documentation for this example [here](https://aws-observability.gi
 | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
 | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
 | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
+| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
 <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/examples/eks-istio/outputs.tf b/examples/eks-istio/outputs.tf
@@ -22,3 +22,8 @@ output "eks_cluster_id" {
   description = "EKS Cluster Id"
   value       = module.eks_monitoring.eks_cluster_id
 }
+
+output "scraper_aws_auth" {
+  description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
+  value       = module.eks_monitoring.scraper_aws_auth
+}
diff --git a/examples/existing-cluster-java/README.md b/examples/existing-cluster-java/README.md
@@ -237,4 +237,5 @@ terraform destroy -var-file=terraform.tfvars
 | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
 | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
 | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
+| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
 <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/examples/existing-cluster-java/outputs.tf b/examples/existing-cluster-java/outputs.tf
@@ -22,3 +22,8 @@ output "eks_cluster_id" {
   description = "EKS Cluster Id"
   value       = module.eks_monitoring.eks_cluster_id
 }
+
+output "scraper_aws_auth" {
+  description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
+  value       = module.eks_monitoring.scraper_aws_auth
+}
diff --git a/examples/existing-cluster-nginx/README.md b/examples/existing-cluster-nginx/README.md
@@ -248,4 +248,5 @@ add this `managed_prometheus_region=xxx` and `managed_prometheus_workspace_id=ws
 | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
 | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
 | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
+| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
 <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/examples/existing-cluster-nginx/outputs.tf b/examples/existing-cluster-nginx/outputs.tf
@@ -22,3 +22,8 @@ output "eks_cluster_id" {
   description = "EKS Cluster Id"
   value       = module.eks_monitoring.eks_cluster_id
 }
+
+output "scraper_aws_auth" {
+  description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
+  value       = module.eks_monitoring.scraper_aws_auth
+}
diff --git a/examples/existing-cluster-with-base-and-infra/README.md b/examples/existing-cluster-with-base-and-infra/README.md
@@ -65,4 +65,5 @@ View the full documentation for this example [here](https://aws-observability.gi
 | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
 | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
 | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
+| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
 <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/examples/existing-cluster-with-base-and-infra/outputs.tf b/examples/existing-cluster-with-base-and-infra/outputs.tf
@@ -22,3 +22,8 @@ output "eks_cluster_id" {
   description = "EKS Cluster Id"
   value       = module.eks_monitoring.eks_cluster_id
 }
+
+output "scraper_aws_auth" {
+  description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
+  value       = module.eks_monitoring.scraper_aws_auth
+}
diff --git a/modules/eks-monitoring/README.md b/modules/eks-monitoring/README.md
@@ -49,12 +49,15 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
 
 | Name | Type |
 |------|------|
+| [aws_prometheus_alert_manager_definition.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_alert_manager_definition) | resource |
 | [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource |
 | [aws_prometheus_rule_group_namespace.recording_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource |
+| [aws_prometheus_scraper.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_scraper) | resource |
 | [aws_prometheus_workspace.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_workspace) | resource |
 | [helm_release.fluxcd](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [helm_release.grafana_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [helm_release.kube_state_metrics](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [helm_release.managed_prometheus_role](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [helm_release.prometheus_node_exporter](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [kubectl_manifest.adothealth_monitoring_dashboards](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource |
 | [kubectl_manifest.api_server_dashboards](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource |
@@ -66,6 +69,7 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
 | [aws_eks_cluster.eks_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster) | data source |
 | [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source |
 | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source |
+| [aws_subnet.helper](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnet) | data source |
 
 ## Inputs
 
@@ -108,7 +112,6 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
 | <a name="input_grafana_api_key"></a> [grafana\_api\_key](#input\_grafana\_api\_key) | Grafana API key for the Amazon Managed Grafana workspace. Required if `enable_external_secrets = true` | `string` | `""` | no |
 | <a name="input_grafana_cluster_dashboard_url"></a> [grafana\_cluster\_dashboard\_url](#input\_grafana\_cluster\_dashboard\_url) | Dashboard URL for Cluster Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/cluster.json"` | no |
 | <a name="input_grafana_kubelet_dashboard_url"></a> [grafana\_kubelet\_dashboard\_url](#input\_grafana\_kubelet\_dashboard\_url) | Dashboard URL for Kubelet Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/kubelet.json"` | no |
-| <a name="input_grafana_kubeproxy_dashboard_url"></a> [grafana\_kubeproxy\_dashboard\_url](#input\_grafana\_kubeproxy\_dashboard\_url) | Dashboard URL for kube-proxy Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/kube-proxy/kube-proxy.json"` | no |
 | <a name="input_grafana_namespace_workloads_dashboard_url"></a> [grafana\_namespace\_workloads\_dashboard\_url](#input\_grafana\_namespace\_workloads\_dashboard\_url) | Dashboard URL for Namespace Workloads Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/namespace-workloads.json"` | no |
 | <a name="input_grafana_node_exporter_dashboard_url"></a> [grafana\_node\_exporter\_dashboard\_url](#input\_grafana\_node\_exporter\_dashboard\_url) | Dashboard URL for Node Exporter Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/nodeexporter-nodes.json"` | no |
 | <a name="input_grafana_nodes_dashboard_url"></a> [grafana\_nodes\_dashboard\_url](#input\_grafana\_nodes\_dashboard\_url) | Dashboard URL for Nodes Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/nodes.json"` | no |
@@ -125,7 +128,6 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
 | <a name="input_kubeproxy_monitoring_config"></a> [kubeproxy\_monitoring\_config](#input\_kubeproxy\_monitoring\_config) | Config object for kube-proxy monitoring | <pre>object({<br>    flux_gitrepository_name   = string<br>    flux_gitrepository_url    = string<br>    flux_gitrepository_branch = string<br>    flux_kustomization_name   = string<br>    flux_kustomization_path   = string<br><br>    dashboards = object({<br>      default = string<br>    })<br>  })</pre> | `null` | no |
 | <a name="input_logs_config"></a> [logs\_config](#input\_logs\_config) | Configuration object for logs collection | <pre>object({<br>    cw_log_retention_days = number<br>  })</pre> | <pre>{<br>  "cw_log_retention_days": 90<br>}</pre> | no |
 | <a name="input_managed_prometheus_cross_account_role"></a> [managed\_prometheus\_cross\_account\_role](#input\_managed\_prometheus\_cross\_account\_role) | Amazon Managed Prometheus Workspace's Account Role Arn | `string` | `""` | no |
-| <a name="input_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#input\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus Workspace Endpoint | `string` | `""` | no |
 | <a name="input_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus Workspace ID | `string` | `null` | no |
 | <a name="input_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#input\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus Workspace's Region | `string` | `null` | no |
 | <a name="input_ne_config"></a> [ne\_config](#input\_ne\_config) | Node exporter configuration | <pre>object({<br>    create_namespace   = optional(bool, true)<br>    k8s_namespace      = optional(string, "prometheus-node-exporter")<br>    helm_chart_name    = optional(string, "prometheus-node-exporter")<br>    helm_chart_version = optional(string, "4.24.0")<br>    helm_release_name  = optional(string, "prometheus-node-exporter")<br>    helm_repo_url      = optional(string, "https://prometheus-community.github.io/helm-charts")<br>    helm_settings      = optional(map(string), {})<br>    helm_values        = optional(map(any), {})<br><br>    scrape_interval = optional(string, "60s")<br>    scrape_timeout  = optional(string, "60s")<br>  })</pre> | `{}` | no |
@@ -147,4 +149,5 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
 | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
 | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
 | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus workspace region |
+| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
 <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/modules/eks-monitoring/locals.tf b/modules/eks-monitoring/locals.tf
@@ -13,15 +13,20 @@ locals {
   managed_prometheus_workspace_id       = var.enable_managed_prometheus ? aws_prometheus_workspace.this[0].id : var.managed_prometheus_workspace_id
   managed_prometheus_workspace_region   = coalesce(var.managed_prometheus_workspace_region, data.aws_region.current.name)
   managed_prometheus_workspace_endpoint = "https://aps-workspaces.${local.managed_prometheus_workspace_region}.amazonaws.com/workspaces/${local.managed_prometheus_workspace_id}/"
+  managed_prometheus_workspace_arn      = "arn:aws:aps:${local.managed_prometheus_workspace_region}:${data.aws_caller_identity.current.account_id}:workspace/${local.managed_prometheus_workspace_id}"
 
-  name                      = "adot-collector-kubeprometheus"
+  name                      = "adot-collector"
   kube_service_account_name = try(var.helm_config.service_account, local.name)
   namespace                 = try(var.helm_config.namespace, local.name)
 
   eks_oidc_issuer_url  = replace(data.aws_eks_cluster.eks_cluster.identity[0].oidc[0].issuer, "https://", "")
   eks_cluster_endpoint = data.aws_eks_cluster.eks_cluster.endpoint
   eks_cluster_version  = data.aws_eks_cluster.eks_cluster.version
 
+  tags = merge(var.tags, {
+    Source = "AWS Observability Accelerator"
+  })
+
   context = {
     aws_caller_identity_account_id = data.aws_caller_identity.current.account_id
     aws_caller_identity_arn        = data.aws_caller_identity.current.arn
@@ -31,7 +36,7 @@ locals {
     eks_cluster_id                 = var.eks_cluster_id
     eks_oidc_issuer_url            = local.eks_oidc_issuer_url
     eks_oidc_provider_arn          = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.eks_oidc_issuer_url}"
-    tags                           = var.tags
+    tags                           = local.tags
     irsa_iam_role_path             = var.irsa_iam_role_path
     irsa_iam_permissions_boundary  = var.irsa_iam_permissions_boundary
   }

diff --git a/modules/eks-monitoring/main.tf b/modules/eks-monitoring/main.tf
@@ -2,7 +2,25 @@ resource "aws_prometheus_workspace" "this" {
   count = var.enable_managed_prometheus ? 1 : 0
 
   alias = local.name
-  tags  = var.tags
+
+  # Agentless scraping require this tag on the workspace
+  tags = merge(local.tags, {
+    AMPAgentlessScraper = ""
+  })
+}
+
+resource "aws_prometheus_alert_manager_definition" "this" {
+  count = var.enable_alertmanager ? 1 : 0
+
+  workspace_id = local.managed_prometheus_workspace_id
+
+  definition = <<EOF
+alertmanager_config: |
+    route:
+      receiver: 'default'
+    receivers:
+      - name: 'default'
+EOF
 }
 
 module "operator" {
@@ -273,3 +291,48 @@ module "external_secrets" {
 
   depends_on = [resource.helm_release.grafana_operator]
 }
+
+resource "helm_release" "managed_prometheus_role" {
+  name  = "managed-prometheus-role"
+  chart = "${path.module}/managed-prometheus-scraper-config"
+}
+
+// These helpers solve the ValidationException error thrown by the scraper if
+// eks subnets are not in unique availability zones.
+data "aws_subnet" "helper" {
+  for_each = toset(data.aws_eks_cluster.eks_cluster.vpc_config[0].subnet_ids)
+  id       = each.key
+}
+
+locals {
+  eks_availability_zone_subnets = {
+    for subnet in data.aws_subnet.helper : subnet.availability_zone => subnet.id...
+  }
+}
+
+resource "aws_prometheus_scraper" "this" {
+  alias = "managed-prometheus-scraper"
+  source {
+    eks {
+      cluster_arn = data.aws_eks_cluster.eks_cluster.arn
+      subnet_ids  = [for subnet_ids in local.eks_availability_zone_subnets : subnet_ids[0]]
+    }
+  }
+
+  scrape_configuration = templatefile("${path.module}/prom_config.yaml", {
+    global_scrape_interval = var.prometheus_config.global_scrape_interval,
+    global_scrape_timeout  = var.prometheus_config.global_scrape_timeout,
+    enableAPIserver        = var.enable_apiserver_monitoring,
+    eks_cluster_id         = local.context.eks_cluster_id,
+    region                 = local.managed_prometheus_workspace_region,
+    accountID              = local.context.aws_caller_identity_account_id
+  })
+
+  destination {
+    amp {
+      workspace_arn = local.managed_prometheus_workspace_arn
+    }
+  }
+
+  tags = local.tags
+}
diff --git a/modules/eks-monitoring/managed-prometheus-scraper-config/.helmignore b/modules/eks-monitoring/managed-prometheus-scraper-config/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/