Skip to content

Commit

Permalink
Metrics: Agent add cAdvisor integration
Browse files Browse the repository at this point in the history
Signed-off-by: Weifeng Wang <[email protected]>
  • Loading branch information
qclaogui committed Mar 23, 2024
1 parent c675d52 commit bde0d73
Show file tree
Hide file tree
Showing 40 changed files with 1,720 additions and 1,420 deletions.
5 changes: 4 additions & 1 deletion docker-compose/common/compose-include/agent-collect-all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ services:
image: ${AGENT_IMAGE:-docker.io/grafana/agent:latest}
volumes:
- ../config/agent-flow:/etc/agent-config
- /var/run/docker.sock:/var/run/docker.sock
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
entrypoint:
- /bin/grafana-agent
- run
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ services:
image: ${AGENT_IMAGE:-docker.io/grafana/agent:latest}
volumes:
- ../config/agent-flow:/etc/agent-config
- /var/run/docker.sock:/var/run/docker.sock
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
entrypoint:
- /bin/grafana-agent
- run
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ services:
image: ${AGENT_IMAGE:-docker.io/grafana/agent:latest}
volumes:
- ../config/agent-flow:/etc/agent-config
- /var/run/docker.sock:/var/run/docker.sock
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
entrypoint:
- /bin/grafana-agent
- run
Expand All @@ -27,7 +30,7 @@ services:
timeout: 5s
retries: 10
ports:
- "12345"
- "12345:12345"
# scale up support
deploy:
replicas: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ services:
image: ${AGENT_IMAGE:-docker.io/grafana/agent:latest}
volumes:
- ../config/agent-flow:/etc/agent-config
- /var/run/docker.sock:/var/run/docker.sock
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
entrypoint:
- /bin/grafana-agent
- run
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ services:
image: ${AGENT_IMAGE:-docker.io/grafana/agent:latest}
volumes:
- ../config/agent-flow:/etc/agent-config
- /var/run/docker.sock:/var/run/docker.sock
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
entrypoint:
- /bin/grafana-agent
- run
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ argument "keep_labels" {
"cluster",
"component",
"container",
"container_name",
"deployment",
"env",
"filename",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,20 @@ discovery.relabel "dr_docker_logs" {
target_label = "pod"
}

rule {
action = "replace"
source_labels = [
"__meta_docker_container_label_com_docker_compose_service",
]
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "container"
}

rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "container"
target_label = "container_name"
}

rule {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,17 @@ module.file "mf_job_minio_scrape" {
}
}

module.file "mf_job_integration_scrape" {
filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/jobs/integrations.river"
module.file "mf_job_integration_cadvisor" {
filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/jobs/integration_cadvisor.river"

arguments {
forward_to = argument.forward_to.value
scrape_interval = "15s"
}
}

module.file "mf_job_integration_node_exporter" {
filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/jobs/integration_node_exporter.river"

arguments {
forward_to = argument.forward_to.value
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
Module: integration-job
Description: Wrapper module to include all Docker containers metric modules
*/
argument "forward_to" {
comment = "Must be a list(MetricssReceiver) where collected metrics should be forwarded to"
}

argument "cluster" {
optional = true
}

argument "namespace" {
optional = true
}

argument "scrape_interval" {
comment = "How often to scrape metrics from the targets (default: 60s)"
optional = true
}

argument "scrape_timeout" {
comment = "How long before a scrape times out (default: 10s)"
optional = true
}

/********************************************
* Integrations cAdvisor
********************************************/
prometheus.exporter.cadvisor "pec_cadvisor" {
docker_host = "unix:///var/run/docker.sock"

store_container_labels = false
// store_container_labels must be set to false for this to take effect.
// https://github.com/google/cadvisor/blob/master/docs/runtime_options.md#cadvisor-runtime-options
allowlisted_container_labels = [
"com.docker.compose.project",
"com.docker.compose.service",
"metrics.agent.grafana.com/job",
"prometheus.io/job",
]

docker_only = true
enabled_metrics = ["cpu", "diskIO", "memory", "network"]
}

/********************************************
* Prometheus Scrape Integrations Targets
********************************************/
prometheus.scrape "ps_cadvisor" {
targets = concat(
prometheus.exporter.cadvisor.pec_cadvisor.targets,
)

enable_protobuf_negotiation = true
scrape_classic_histograms = true

scrape_interval = coalesce(argument.scrape_interval.value, "60s")
scrape_timeout = coalesce(argument.scrape_timeout.value, "10s")

clustering {
enabled = true
}

forward_to = [prometheus.relabel.pr_cadvisor.receiver]
}

/********************************************
* Prometheus Metric Relabelings (post-scrape)
********************************************/
prometheus.relabel "pr_cadvisor" {
forward_to = argument.forward_to.value

// drop unused metric label
rule {
action = "labeldrop"
regex = "id"
}

// set the cluster label
rule {
action = "replace"
replacement = coalesce(argument.cluster.value, "docker-compose")
target_label = "cluster"
}

// set the namespace label
rule {
action = "replace"
replacement = coalesce(argument.namespace.value, "monitoring-system")
target_label = "namespace"
}

// set a default job label to be the namespace/service_name
rule {
action = "replace"
source_labels = [
"container_label_com_docker_compose_service",
]
regex = "^(?:;*)?([^;]+).*$"
replacement = coalesce(argument.namespace.value, "monitoring-system") + "/$1"
target_label = "job"
}

rule {
action = "replace"
source_labels = [
"container_label_com_docker_compose_service",
]
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "pod"
}

rule {
action = "replace"
source_labels = [
"container_label_com_docker_compose_service",
]
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "container"
}

// allow resources to declare their the job label value to use when collecting their metrics, the default value is "",
rule {
action = "replace"
source_labels = [
"container_label_metrics_agent_grafana_com_job",
"container_label_prometheus_io_job",
]
separator = ";"
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "job"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ argument "forward_to" {
comment = "Must be a list(MetricssReceiver) where collected metrics should be forwarded to"
}

argument "cluster" {
optional = true
}

argument "namespace" {
optional = true
}

argument "keep_metrics" {
optional = true
}

argument "scrape_interval" {
comment = "How often to scrape metrics from the targets (default: 60s)"
optional = true
Expand All @@ -19,15 +31,12 @@ argument "scrape_timeout" {
/********************************************
* Integrations Node Exporter
********************************************/
prometheus.exporter.unix "peu_unix" {
set_collectors = ["cpu"]
disable_collectors = ["diskstats", "mdadm", "textfile", "hwmon"]
}
prometheus.exporter.unix "peu_unix" { }

/********************************************
* Prometheus Scrape Integrations Targets
********************************************/
prometheus.scrape "ps_integrations" {
prometheus.scrape "ps_node_exporter" {
targets = concat(
prometheus.exporter.unix.peu_unix.targets,
)
Expand All @@ -42,26 +51,41 @@ prometheus.scrape "ps_integrations" {
enabled = true
}

forward_to = [prometheus.relabel.pr_integrations.receiver]
forward_to = [prometheus.relabel.pr_node_exporter.receiver]
}

/********************************************
* Prometheus Metric Relabelings (post-scrape)
********************************************/
prometheus.relabel "pr_integrations" {
prometheus.relabel "pr_node_exporter" {
forward_to = argument.forward_to.value

// keep only metrics that match the keep_metrics regex
rule {
source_labels = ["__name__"]
regex = coalesce(argument.keep_metrics.value, "(up|node_exporter_build_info|node_cpu.*|node_memory.*|node_disk.*|node_filesystem.*|process_cpu_seconds_total|process_resident_memory_bytes)")
action = "keep"
}

// Drop metrics for certain file systems
rule {
source_labels = ["__name__", "fstype"]
separator = "@"
regex = "node_filesystem.*@(tempfs)"
action = "drop"
}

// set the cluster label
rule {
source_labels = ["job"]
regex = "integrations/(.*)"
target_label = "pod"
replacement = "${1}"
action = "replace"
replacement = coalesce(argument.cluster.value, "docker-compose")
target_label = "cluster"
}

// set the namespace label
rule {
source_labels = ["job"]
regex = "integrations/(.*)"
target_label = "container"
replacement = "${1}"
action = "replace"
replacement = coalesce(argument.namespace.value, "monitoring-system")
target_label = "namespace"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,20 @@ discovery.relabel "dr_label_metrics" {
target_label = "pod"
}

rule {
action = "replace"
source_labels = [
"__meta_docker_container_label_com_docker_compose_service",
]
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "container"
}

rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "container"
target_label = "container_name"
}

rule {
Expand Down
Loading

0 comments on commit bde0d73

Please sign in to comment.