Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement AWS managed agentless collector for Prometheus metrics #260

Closed
6 changes: 2 additions & 4 deletions examples/eks-cross-account-with-central-amp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ module "eks_monitoring_one" {
enable_managed_prometheus = false

managed_prometheus_workspace_id = module.managed_service_prometheus.workspace_id
managed_prometheus_workspace_endpoint = module.managed_service_prometheus.workspace_prometheus_endpoint
managed_prometheus_workspace_region = var.cluster_one.region
managed_prometheus_cross_account_role = aws_iam_role.cross_account_amp_role.arn
irsa_iam_additional_policies = [aws_iam_policy.irsa_assume_role_policy_one.arn]
Expand Down Expand Up @@ -96,9 +95,8 @@ module "eks_monitoring_two" {
# prevents the module to create a workspace
enable_managed_prometheus = false

managed_prometheus_workspace_id = module.managed_service_prometheus.workspace_id
managed_prometheus_workspace_endpoint = module.managed_service_prometheus.workspace_prometheus_endpoint
managed_prometheus_workspace_region = var.cluster_two.region
managed_prometheus_workspace_id = module.managed_service_prometheus.workspace_id
managed_prometheus_workspace_region = var.cluster_two.region

managed_prometheus_cross_account_role = aws_iam_role.cross_account_amp_role.arn
irsa_iam_additional_policies = [aws_iam_policy.irsa_assume_role_policy_two.arn]
Expand Down
1 change: 1 addition & 0 deletions examples/eks-istio/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,5 @@ View the full documentation for this example [here](https://aws-observability.gi
| <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
| <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
| <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
5 changes: 5 additions & 0 deletions examples/eks-istio/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ output "eks_cluster_id" {
description = "EKS Cluster Id"
value = module.eks_monitoring.eks_cluster_id
}

output "scraper_aws_auth" {
description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
value = module.eks_monitoring.scraper_aws_auth
}
1 change: 1 addition & 0 deletions examples/existing-cluster-java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,4 +237,5 @@ terraform destroy -var-file=terraform.tfvars
| <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
| <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
| <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
5 changes: 5 additions & 0 deletions examples/existing-cluster-java/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ output "eks_cluster_id" {
description = "EKS Cluster Id"
value = module.eks_monitoring.eks_cluster_id
}

output "scraper_aws_auth" {
description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
value = module.eks_monitoring.scraper_aws_auth
}
1 change: 1 addition & 0 deletions examples/existing-cluster-nginx/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,5 @@ add this `managed_prometheus_region=xxx` and `managed_prometheus_workspace_id=ws
| <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
| <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
| <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
5 changes: 5 additions & 0 deletions examples/existing-cluster-nginx/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ output "eks_cluster_id" {
description = "EKS Cluster Id"
value = module.eks_monitoring.eks_cluster_id
}

output "scraper_aws_auth" {
description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
value = module.eks_monitoring.scraper_aws_auth
}
1 change: 1 addition & 0 deletions examples/existing-cluster-with-base-and-infra/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,5 @@ View the full documentation for this example [here](https://aws-observability.gi
| <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
| <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
| <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region |
| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
5 changes: 5 additions & 0 deletions examples/existing-cluster-with-base-and-infra/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ output "eks_cluster_id" {
description = "EKS Cluster Id"
value = module.eks_monitoring.eks_cluster_id
}

output "scraper_aws_auth" {
description = "Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use"
value = module.eks_monitoring.scraper_aws_auth
}
7 changes: 5 additions & 2 deletions modules/eks-monitoring/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,15 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this

| Name | Type |
|------|------|
| [aws_prometheus_alert_manager_definition.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_alert_manager_definition) | resource |
| [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource |
| [aws_prometheus_rule_group_namespace.recording_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource |
| [aws_prometheus_scraper.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_scraper) | resource |
| [aws_prometheus_workspace.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_workspace) | resource |
| [helm_release.fluxcd](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.grafana_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.kube_state_metrics](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.managed_prometheus_role](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.prometheus_node_exporter](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [kubectl_manifest.adothealth_monitoring_dashboards](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource |
| [kubectl_manifest.api_server_dashboards](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource |
Expand All @@ -66,6 +69,7 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
| [aws_eks_cluster.eks_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster) | data source |
| [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source |
| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source |
| [aws_subnet.helper](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnet) | data source |

## Inputs

Expand Down Expand Up @@ -108,7 +112,6 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
| <a name="input_grafana_api_key"></a> [grafana\_api\_key](#input\_grafana\_api\_key) | Grafana API key for the Amazon Managed Grafana workspace. Required if `enable_external_secrets = true` | `string` | `""` | no |
| <a name="input_grafana_cluster_dashboard_url"></a> [grafana\_cluster\_dashboard\_url](#input\_grafana\_cluster\_dashboard\_url) | Dashboard URL for Cluster Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/cluster.json"` | no |
| <a name="input_grafana_kubelet_dashboard_url"></a> [grafana\_kubelet\_dashboard\_url](#input\_grafana\_kubelet\_dashboard\_url) | Dashboard URL for Kubelet Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/kubelet.json"` | no |
| <a name="input_grafana_kubeproxy_dashboard_url"></a> [grafana\_kubeproxy\_dashboard\_url](#input\_grafana\_kubeproxy\_dashboard\_url) | Dashboard URL for kube-proxy Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/kube-proxy/kube-proxy.json"` | no |
| <a name="input_grafana_namespace_workloads_dashboard_url"></a> [grafana\_namespace\_workloads\_dashboard\_url](#input\_grafana\_namespace\_workloads\_dashboard\_url) | Dashboard URL for Namespace Workloads Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/namespace-workloads.json"` | no |
| <a name="input_grafana_node_exporter_dashboard_url"></a> [grafana\_node\_exporter\_dashboard\_url](#input\_grafana\_node\_exporter\_dashboard\_url) | Dashboard URL for Node Exporter Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/nodeexporter-nodes.json"` | no |
| <a name="input_grafana_nodes_dashboard_url"></a> [grafana\_nodes\_dashboard\_url](#input\_grafana\_nodes\_dashboard\_url) | Dashboard URL for Nodes Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/v0.2.0/artifacts/grafana-dashboards/eks/infrastructure/nodes.json"` | no |
Expand All @@ -125,7 +128,6 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
| <a name="input_kubeproxy_monitoring_config"></a> [kubeproxy\_monitoring\_config](#input\_kubeproxy\_monitoring\_config) | Config object for kube-proxy monitoring | <pre>object({<br> flux_gitrepository_name = string<br> flux_gitrepository_url = string<br> flux_gitrepository_branch = string<br> flux_kustomization_name = string<br> flux_kustomization_path = string<br><br> dashboards = object({<br> default = string<br> })<br> })</pre> | `null` | no |
| <a name="input_logs_config"></a> [logs\_config](#input\_logs\_config) | Configuration object for logs collection | <pre>object({<br> cw_log_retention_days = number<br> })</pre> | <pre>{<br> "cw_log_retention_days": 90<br>}</pre> | no |
| <a name="input_managed_prometheus_cross_account_role"></a> [managed\_prometheus\_cross\_account\_role](#input\_managed\_prometheus\_cross\_account\_role) | Amazon Managed Prometheus Workspace's Account Role Arn | `string` | `""` | no |
| <a name="input_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#input\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus Workspace Endpoint | `string` | `""` | no |
| <a name="input_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus Workspace ID | `string` | `null` | no |
| <a name="input_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#input\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus Workspace's Region | `string` | `null` | no |
| <a name="input_ne_config"></a> [ne\_config](#input\_ne\_config) | Node exporter configuration | <pre>object({<br> create_namespace = optional(bool, true)<br> k8s_namespace = optional(string, "prometheus-node-exporter")<br> helm_chart_name = optional(string, "prometheus-node-exporter")<br> helm_chart_version = optional(string, "4.24.0")<br> helm_release_name = optional(string, "prometheus-node-exporter")<br> helm_repo_url = optional(string, "https://prometheus-community.github.io/helm-charts")<br> helm_settings = optional(map(string), {})<br> helm_values = optional(map(any), {})<br><br> scrape_interval = optional(string, "60s")<br> scrape_timeout = optional(string, "60s")<br> })</pre> | `{}` | no |
Expand All @@ -147,4 +149,5 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
| <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint |
| <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID |
| <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus workspace region |
| <a name="output_scraper_aws_auth"></a> [scraper\_aws\_auth](#output\_scraper\_aws\_auth) | Execute this command to grand access to the managed scrapers to gain permissions on your cluster. Mandatory for the first use |
<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
9 changes: 7 additions & 2 deletions modules/eks-monitoring/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,20 @@ locals {
managed_prometheus_workspace_id = var.enable_managed_prometheus ? aws_prometheus_workspace.this[0].id : var.managed_prometheus_workspace_id
managed_prometheus_workspace_region = coalesce(var.managed_prometheus_workspace_region, data.aws_region.current.name)
managed_prometheus_workspace_endpoint = "https://aps-workspaces.${local.managed_prometheus_workspace_region}.amazonaws.com/workspaces/${local.managed_prometheus_workspace_id}/"
managed_prometheus_workspace_arn = "arn:aws:aps:${local.managed_prometheus_workspace_region}:${data.aws_caller_identity.current.account_id}:workspace/${local.managed_prometheus_workspace_id}"

name = "adot-collector-kubeprometheus"
name = "adot-collector"
kube_service_account_name = try(var.helm_config.service_account, local.name)
namespace = try(var.helm_config.namespace, local.name)

eks_oidc_issuer_url = replace(data.aws_eks_cluster.eks_cluster.identity[0].oidc[0].issuer, "https://", "")
eks_cluster_endpoint = data.aws_eks_cluster.eks_cluster.endpoint
eks_cluster_version = data.aws_eks_cluster.eks_cluster.version

tags = merge(var.tags, {
Source = "AWS Observability Accelerator"
})

context = {
aws_caller_identity_account_id = data.aws_caller_identity.current.account_id
aws_caller_identity_arn = data.aws_caller_identity.current.arn
Expand All @@ -31,7 +36,7 @@ locals {
eks_cluster_id = var.eks_cluster_id
eks_oidc_issuer_url = local.eks_oidc_issuer_url
eks_oidc_provider_arn = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.eks_oidc_issuer_url}"
tags = var.tags
tags = local.tags
irsa_iam_role_path = var.irsa_iam_role_path
irsa_iam_permissions_boundary = var.irsa_iam_permissions_boundary
}
Expand Down
65 changes: 64 additions & 1 deletion modules/eks-monitoring/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,25 @@ resource "aws_prometheus_workspace" "this" {
count = var.enable_managed_prometheus ? 1 : 0

alias = local.name
tags = var.tags

# Agentless scraping require this tag on the workspace
tags = merge(local.tags, {
AMPAgentlessScraper = ""
})
}

resource "aws_prometheus_alert_manager_definition" "this" {
count = var.enable_alertmanager ? 1 : 0

workspace_id = local.managed_prometheus_workspace_id

definition = <<EOF
alertmanager_config: |
route:
receiver: 'default'
receivers:
- name: 'default'
EOF
}

module "operator" {
Expand Down Expand Up @@ -273,3 +291,48 @@ module "external_secrets" {

depends_on = [resource.helm_release.grafana_operator]
}

resource "helm_release" "managed_prometheus_role" {
name = "managed-prometheus-role"
chart = "${path.module}/managed-prometheus-scraper-config"
}

// These helpers solve the ValidationException error thrown by the scraper if
// eks subnets are not in unique availability zones.
data "aws_subnet" "helper" {
for_each = toset(data.aws_eks_cluster.eks_cluster.vpc_config[0].subnet_ids)
id = each.key
}

locals {
eks_availability_zone_subnets = {
for subnet in data.aws_subnet.helper : subnet.availability_zone => subnet.id...
}
}

resource "aws_prometheus_scraper" "this" {
alias = "managed-prometheus-scraper"
source {
eks {
cluster_arn = data.aws_eks_cluster.eks_cluster.arn
subnet_ids = [for subnet_ids in local.eks_availability_zone_subnets : subnet_ids[0]]
}
}

scrape_configuration = templatefile("${path.module}/prom_config.yaml", {
global_scrape_interval = var.prometheus_config.global_scrape_interval,
global_scrape_timeout = var.prometheus_config.global_scrape_timeout,
enableAPIserver = var.enable_apiserver_monitoring,
eks_cluster_id = local.context.eks_cluster_id,
region = local.managed_prometheus_workspace_region,
accountID = local.context.aws_caller_identity_account_id
})

destination {
amp {
workspace_arn = local.managed_prometheus_workspace_arn
}
}

tags = local.tags
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
Loading
Loading