From 45dcbf4ca062eb63bccff96076f859113ee5e8fa Mon Sep 17 00:00:00 2001 From: Weifeng Wang <qclaogui@gmail.com> Date: Thu, 14 Mar 2024 12:07:36 +0800 Subject: [PATCH] Docker Compose(metrics): Metric Ingestion via Labels Signed-off-by: Weifeng Wang <qclaogui@gmail.com> add metrics-labels-auto-scrape.river Signed-off-by: Weifeng Wang <qclaogui@gmail.com> fix Signed-off-by: Weifeng Wang <qclaogui@gmail.com> update docs Signed-off-by: Weifeng Wang <qclaogui@gmail.com> code clean Signed-off-by: Weifeng Wang <qclaogui@gmail.com> --- .../common/compose-include/minio.yaml | 7 + .../common/config/agent-flow/logs.river | 4 +- .../common/config/agent-flow/metrics.river | 118 +----- .../agent-flow/modules/docker/README.md | 19 +- .../modules/docker/metrics/all.river | 67 +++ .../docker/metrics/metrics-auto-scrape.river | 381 ++++++++++++++++++ .../agent-flow/monolithic-mode-all.river | 80 +--- .../common/config/agent-flow/profiles.river | 8 +- .../common/config/agent-flow/traces.river | 82 +--- .../common/config/nginx/nginx.conf.template | 2 +- .../metrics/docker-compose.yaml | 3 + .../metrics/docker-compose.yaml | 3 + .../metrics/docker-compose.yaml | 3 + 13 files changed, 520 insertions(+), 257 deletions(-) create mode 100644 docker-compose/common/config/agent-flow/modules/docker/metrics/all.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/metrics/metrics-auto-scrape.river diff --git a/docker-compose/common/compose-include/minio.yaml b/docker-compose/common/compose-include/minio.yaml index 18667d6b..59f3499f 100644 --- a/docker-compose/common/compose-include/minio.yaml +++ b/docker-compose/common/compose-include/minio.yaml @@ -1,8 +1,15 @@ services: minio: + # https://github.com/qclaogui/codelab-monitoring/blob/main/docker-compose/common/config/agent-flow/modules/docker/README.md labels: - logs.agent.grafana.com/scrape=false + - metrics.agent.grafana.com/scrape=true + - metrics.agent.grafana.com/job=minio-job + - metrics.agent.grafana.com/path=/minio/v2/metrics/cluster + - metrics.agent.grafana.com/port=9000 + - metrics.agent.grafana.com/interval=15s + - metrics.agent.grafana.com/timeout=10s image: ${MINIO_IMAGE:-docker.io/minio/minio:latest} entrypoint: - sh diff --git a/docker-compose/common/config/agent-flow/logs.river b/docker-compose/common/config/agent-flow/logs.river index ffa6a59c..7b8b1b4d 100644 --- a/docker-compose/common/config/agent-flow/logs.river +++ b/docker-compose/common/config/agent-flow/logs.river @@ -1,7 +1,7 @@ // https://github.com/grafana/agent-configurator logging { - level = "info" + level = coalesce(env("AGENT_LOG_LEVEL"), "info") format = "logfmt" } @@ -10,7 +10,7 @@ logging { ********************************************/ module.file "docker_compose" { - filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker_compose.river" arguments { logs_endpoint = "http://gateway:3100" diff --git a/docker-compose/common/config/agent-flow/metrics.river b/docker-compose/common/config/agent-flow/metrics.river index 59d44c62..311df346 100644 --- a/docker-compose/common/config/agent-flow/metrics.river +++ b/docker-compose/common/config/agent-flow/metrics.river @@ -1,10 +1,14 @@ // https://github.com/grafana/agent-configurator logging { - level = "warn" + level = coalesce(env("AGENT_LOG_LEVEL"), "info") format = "logfmt" } +/******************************************** + * LGTMP Receiver provider + ********************************************/ + module.file "docker_compose" { filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" @@ -13,117 +17,15 @@ module.file "docker_compose" { } } -discovery.relabel "containers" { - targets = module.file.docker_compose.exports.relabelings_common.output -} - /******************************************** * Metrics ********************************************/ -prometheus.exporter.unix "containers" { - set_collectors = ["cpu"] - disable_collectors = ["diskstats", "mdadm", "textfile", "hwmon"] -} - -prometheus.scrape "integrations" { - targets = concat( - prometheus.exporter.unix.containers.targets, - ) - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - scrape_interval = "15s" - - clustering { - enabled = true - } - - forward_to = [prometheus.relabel.integrations.receiver] -} +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/all.river" -prometheus.scrape "containers" { - targets = discovery.relabel.containers.output - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } - - forward_to = [module.file.docker_compose.exports.metrics_receiver] -} - -prometheus.scrape "minio" { - targets = [{"__address__" = "minio:9000", "job" = "minio-job"}] - - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } - metrics_path = "/minio/v2/metrics/cluster" - - forward_to = [prometheus.relabel.integrations.receiver] -} - -prometheus.relabel "integrations" { - rule { - source_labels = ["job"] - regex = "(integrations|monitoring-system)/(.*)" - target_label = "pod" - replacement = "${2}" - } - - rule { - source_labels = ["job"] - regex = "(integrations|monitoring-system)/(.*)" - target_label = "container" - replacement = "${2}" - } - - forward_to = [module.file.docker_compose.exports.metrics_receiver] -} - -/******************************************** - * Otelcol for metrics - ********************************************/ - -otelcol.receiver.otlp "containers" { - grpc { - endpoint = "0.0.0.0:4317" - } - - http { - endpoint = "0.0.0.0:4318" - } - - output { - metrics = [otelcol.processor.batch.containers.input] - } -} - -otelcol.processor.batch "containers" { - output { - metrics = [otelcol.processor.memory_limiter.containers.input] - } -} - -otelcol.processor.memory_limiter "containers" { - check_interval = "1s" - limit = "256MiB" - - output { - metrics = [otelcol.exporter.prometheus.containers.input] + arguments { + forward_to = [module.file.docker_compose.exports.metrics_receiver] + clustering = true } } - -otelcol.exporter.prometheus "containers" { - forward_to = [module.file.docker_compose.exports.metrics_receiver] -} diff --git a/docker-compose/common/config/agent-flow/modules/docker/README.md b/docker-compose/common/config/agent-flow/modules/docker/README.md index 60af20e5..a1d0e6fe 100644 --- a/docker-compose/common/config/agent-flow/modules/docker/README.md +++ b/docker-compose/common/config/agent-flow/modules/docker/README.md @@ -6,7 +6,7 @@ The following service labels are supported: | Label | Description | | :--------------- | :-----------| -| `logs.agent.grafana.com/scrape` | Allow a service to declare it's logs should be dropped. | +| `logs.agent.grafana.com/scrape` | Allow a service to declare it's logs should be ingested (default is `true`). | | `logs.agent.grafana.com/tenant` | Allow a service to override the tenant for its logs. | | `logs.agent.grafana.com/log-format` | If specified additional processing is performed to extract details based on the specified format. This value can be a comma-delimited list, in the instances a pod may have multiple containers. The following formats are currently supported: <ul><li>common-log<li>donet<li>istio<li>json<li>klog<li>log4j-json<li>logfmt<li>otel<li>postgres<li>python<li>spring-boot<li>syslog<li>zerolog</ul> | | `logs.agent.grafana.com/scrub-level` | Boolean whether or not the level should be dropped from the log message (as it is a label). | @@ -22,3 +22,20 @@ The following service labels are supported: | `logs.agent.grafana.com/mask-ipv4` | Boolean whether or not to mask IPv4 addresses in the log line, if true the data will be masked as`*ipv4*salt*` | | `logs.agent.grafana.com/mask-ipv6` | Boolean whether or not to mask IPv6 addresses in the log line, if true the data will be masked as `*ipv6*salt*` | | `logs.agent.grafana.com/mask-phone` | Boolean whether or not to mask phone numbers in the log line, if true the data will be masked as `*phone*salt*` | + +--- + +## Metrics + +The following service labels are supported for gathering of metrics for docker compose services: + +| Label | Description | +| :--------------- | :-----------| +| `metrics.agent.grafana.com/scrape` <br>or<br> `prometheus.io/scrape` | Boolean whether or not to scrape the service for metrics (default is `true`).| +| `metrics.agent.grafana.com/scheme` <br>or<br> `prometheus.io/scheme` | The default scraping scheme is `http`, only support `http` now. | +| `metrics.agent.grafana.com/path` <br>or<br> `prometheus.io/path` | the default path to scrape is `/metrics`, this can be specified as a single value which would override, the scrape path being used for all ports attached to the target | +| `metrics.agent.grafana.com/port` <br>or<br> `prometheus.io/port` | the default `port` to scrape is the target port, this can be specified as a single value which would override the scrape port being used for all ports attached to the target, note that even if an target had multiple targets, the relabel_config targets are deduped before scraping | +| `metrics.agent.grafana.com/tenant` | The tenant their metrics should be sent to, this does not necessarily have to be the actual tenantId, it can be a friendly name as well that is simply used to determine if the metrics should be gathered for the current tenant | +| `metrics.agent.grafana.com/job` <br>or<br> `prometheus.io/job` | The job label value to use when collecting their metrics. However, it is common to use an integration or community project where rules / dashboards are provided for you. Oftentimes, this provided assets use hard-coded values for a job label i.e. `...{job="integrations/kubernetes/cadvisor"...}` or `...{job="minio-job"...}` setting this annotation to that value will allow the provided asset to work out of the box. | +| `metrics.agent.grafana.com/interval` <br>or<br> `prometheus.io/interval` | The default interval to scrape is `15s`, this can be override. | +| `metrics.agent.grafana.com/timeout` <br>or<br> `prometheus.io/timeout` | The default timeout for scraping is `10s`, this can be override. | diff --git a/docker-compose/common/config/agent-flow/modules/docker/metrics/all.river b/docker-compose/common/config/agent-flow/modules/docker/metrics/all.river new file mode 100644 index 00000000..408070a3 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/metrics/all.river @@ -0,0 +1,67 @@ +/* +Module: metrics-all +Description: Wrapper module to include all Docker containers metric modules +*/ +argument "forward_to" { + comment = "Must be a list(MetricssReceiver) where collected metrics should be forwarded to" +} + +argument "tenant" { + comment = "The tenant to filter logs to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + optional = true +} + +argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: true)" +} + +module.file "mf_metrics_auto_scrape" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/metrics-auto-scrape.river" + + arguments { + forward_to = argument.forward_to.value + tenant = coalesce(argument.tenant.value, ".*") + clustering = coalesce(argument.clustering.value, "true") + } +} + +prometheus.exporter.unix "peu_containers" { + set_collectors = ["cpu"] + disable_collectors = ["diskstats", "mdadm", "textfile", "hwmon"] +} + +prometheus.scrape "pc_integrations" { + targets = concat( + prometheus.exporter.unix.peu_containers.targets, + ) + + enable_protobuf_negotiation = true + scrape_classic_histograms = true + + scrape_interval = "15s" + + clustering { + enabled = true + } + + forward_to = [prometheus.relabel.pr_integrations.receiver] +} + +prometheus.relabel "pr_integrations" { + rule { + source_labels = ["job"] + regex = "integrations/(.*)" + target_label = "pod" + replacement = "${2}" + } + + rule { + source_labels = ["job"] + regex = "integrations/(.*)" + target_label = "container" + replacement = "${2}" + } + + forward_to = argument.forward_to.value +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/metrics/metrics-auto-scrape.river b/docker-compose/common/config/agent-flow/modules/docker/metrics/metrics-auto-scrape.river new file mode 100644 index 00000000..8f325810 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/metrics/metrics-auto-scrape.river @@ -0,0 +1,381 @@ +/* +Module(metrics): Docker Containers Auto-Scraping +Description: Scrapes targets for metrics based on Docker Containers labels + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. + + +Following annotations are available: + + metrics.agent.grafana.com/scrape: true + +the default scraping scheme is http, only support http now. + + metrics.agent.grafana.com/scheme: http +or + prometheus.io/scheme: http + +the default path to scrape is /metrics, this can be specified as a single value which would override, the scrape path being used +for all ports attached to the target: + + metrics.agent.grafana.com/path: /metrics/some_path + +the default port to scrape is the target port, this can be specified as a single value which would override the scrape port being +used for all ports attached to the target, note that even if an target had multiple targets, the relabel_config targets are +deduped before scraping: + + metrics.agent.grafana.com/port: 8080 +or + prometheus.io/port: 8080 + +the default interval to scrape is 15s, this can be specified as a single value which would override, the scrape interval being used +for all ports attached to the target: + + metrics.agent.grafana.com/interval: 15s +or + prometheus.io/interval: 15s + + +the default timeout for scraping is 10s, this can be specified as a single value which would override, the scrape interval being +used for all ports attached to the target: + + metrics.agent.grafana.com/timeout: 10s +or + prometheus.io/timeout: 10s + +the default job is namespace/{{ service name }} there may be a different job name is required because of a set of dashboards, rules, +etc. to support this there is a job annotation which will override the default value: + + metrics.agent.grafana.com/job: integrations/kubernetes/kube-state-metrics +or + prometheus.io/job: integrations/kubernetes/kube-state-metrics +*/ +argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" +} + +argument "cluster" { + optional = true +} + +argument "namespace" { + optional = true +} + +argument "tenant" { + comment = "The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant label, and this can be a regex. (default: (.*))" + optional = true +} + +argument "keep_metrics" { + comment = "A regex of metrics to keep (default: (.+))" + optional = true +} + +argument "drop_metrics" { + comment = "A regex of metrics to drop (default: \"\")" + optional = true +} + +argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 15s)" + optional = true +} + +argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true +} + +argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true +} + +// get the available containers. +discovery.docker "dd_metrics" { + host = "unix:///var/run/docker.sock" + + filter { + name = "status" + values = ["running"] + } +} + +discovery.relabel "dr_docker_metrics" { + targets = discovery.docker.dd_metrics.targets + + /**************************************************************************************************************** + * Handle Discovers From Docker Engine Containers Targets to Keep or Drop + * https://grafana.com/docs/agent/latest/flow/reference/components/discovery.docker/#exported-fields + ****************************************************************************************************************/ + // allow resources to declare their metrics scraped or not + // Example Annotation: + // metrics.agent.grafana.com/scrape: false + // + // the label prometheus.io/service-monitor: "false" is a common label for headless services, when performing endpoint + // service discovery, if there is both a load-balanced service and headless service, this can result in duplicate + // scrapes if the name of the service is attached as a label. any targets with this label or annotation set should be dropped + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_scrape", + "__meta_docker_container_label_prometheus_io_scrape", + ] + separator = ";" + regex = "^(?:;*)?(true|false).*$" + replacement = "$1" + target_label = "__tmp_scrape" + } + + // drop any targets that have scrape: false + rule { + action = "drop" + source_labels = ["__tmp_scrape"] + regex = "false" + } + + // allow resources to declare the protocol to use when collecting metrics, the default value is "http", + // Example Annotation: + // metrics.agent.grafana.com/scheme: http + rule { + action = "replace" + replacement = "http" + target_label = "__scheme__" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_scheme", + "__meta_docker_container_label_prometheus_io_scheme", + ] + separator = ";" + regex = "^(?:;*)?(https?).*$" + replacement = "$1" + target_label = "__scheme__" + } + + // allow resources to declare their metrics the tenant their metrics should be sent to, + // Example Annotation: + // metrics.agent.grafana.com/tenant: primary + // + // Note: This does not necessarily have to be the actual tenantId, it can be a friendly name as well that is simply used + // to determine if the metrics should be gathered for the current tenant + rule { + action = "keep" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_tenant", + "__meta_docker_container_label_prometheus_io_tenant", + ] + regex = "^(" + coalesce(argument.tenant.value, ".*") + ")$" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_port", + "__meta_docker_container_label_prometheus_io_port", + ] + separator = ";" + regex = "^(?:;*)?(\\d+).*$" + replacement = "$1" + target_label = "__tmp_metrics_port" + } + + // allow resources to declare the port to use when collecting metrics, the default value is the discovered port from + // Example Annotation: + // metrics.agent.grafana.com/port: 9090 + rule { + action = "replace" + source_labels = [ + "__address__", + "__tmp_metrics_port", + ] + separator = ";" + regex = "^([^:]+)(?::\\d+)?;(\\d+)$" + replacement = "$1:$2" + target_label = "__address__" + } + + // allow resources to declare their the path to use when collecting their metrics, the default value is "/metrics", + // Example Annotation: + // metrics.agent.grafana.com/path: /metrics/foo + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_path", + "__meta_docker_container_label_prometheus_io_path", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "__metrics_path__" + } + + // allow resources to declare how often their metrics should be collected, the default value is 15s, + // the following duration formats are supported (s|m|ms|h|d): + // Example Annotation: + // metrics.agent.grafana.com/interval: 15s + rule { + action = "replace" + replacement = coalesce(argument.scrape_interval.value, "15s") + target_label = "__scrape_interval__" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_interval", + "__meta_docker_container_label_prometheus_io_interval", + ] + separator = ";" + regex = "^(?:;*)?(\\d+(s|m|ms|h|d)).*$" + replacement = "$1" + target_label = "__scrape_interval__" + } + + // allow resources to declare the timeout of the scrape request, the default value is 10s, + // the following duration formats are supported (s|m|ms|h|d): + // Example Annotation: + // metrics.agent.grafana.com/timeout: 10s + rule { + action = "replace" + replacement = coalesce(argument.scrape_timeout.value, "10s") + target_label = "__scrape_timeout__" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_interval", + "__meta_docker_container_label_prometheus_io_interval", + ] + separator = ";" + regex = "^(?:;*)?(\\d+(s|m|ms|h|d)).*$" + replacement = "$1" + target_label = "__scrape_timeout__" + } + + /**************************************************************************************************************** + * Handle Setting Common Labels + ****************************************************************************************************************/ + // set the cluster label + rule { + action = "replace" + replacement = coalesce(argument.cluster.value, "docker-compose") + target_label = "cluster" + } + + // set the namespace label + rule { + action = "replace" + replacement = coalesce(argument.namespace.value, "monitoring-system") + target_label = "namespace" + } + + // set a default job label to be the namespace/service_name + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = coalesce(argument.namespace.value, "monitoring-system") + "/$1" + target_label = "job" + } + + // allow resources to declare their the job label value to use when collecting their metrics, the default value is "", + // Example Annotation: + // metrics.agent.grafana.com/job: integrations/kubernetes/cadvisor + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_metrics_agent_grafana_com_job", + "__meta_docker_container_label_prometheus_io_job", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "job" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "pod" + } + + rule { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + "__meta_docker_container_label_app", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } +} + +// only keep http targets +discovery.relabel "dr_keep_http_targets" { + targets = discovery.relabel.dr_docker_metrics.output + + rule { + action = "keep" + source_labels = ["__scheme__"] + regex = "http" + } +} + +// scrape http only targtets +prometheus.scrape "pc_docker_metrics" { + job_name = "label-metrics-http" + targets = discovery.relabel.dr_keep_http_targets.output + scheme = "http" + scrape_interval = coalesce(argument.scrape_interval.value, "1m") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + enable_protobuf_negotiation = true + scrape_classic_histograms = true + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + + forward_to = [prometheus.relabel.pr_docker_metrics.receiver] +} + +// perform generic relabeling using keep_metrics and drop_metrics +prometheus.relabel "pr_docker_metrics" { + forward_to = argument.forward_to.value + + // keep only metrics that match the keep_metrics regex + rule { + action = "keep" + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + } + + // drop metrics that match the drop_metrics regex + rule { + action = "drop" + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "") + } +} diff --git a/docker-compose/common/config/agent-flow/monolithic-mode-all.river b/docker-compose/common/config/agent-flow/monolithic-mode-all.river index 0233d9b6..c1c319f7 100644 --- a/docker-compose/common/config/agent-flow/monolithic-mode-all.river +++ b/docker-compose/common/config/agent-flow/monolithic-mode-all.river @@ -1,7 +1,7 @@ // https://github.com/grafana/agent-configurator logging { - level = "info" + level = coalesce(env("AGENT_LOG_LEVEL"), "info") format = "logfmt" } @@ -15,84 +15,20 @@ tracing { ********************************************/ module.file "docker_compose" { - filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" -} - -discovery.relabel "containers" { - targets = module.file.docker_compose.exports.relabelings_common.output + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker_compose.river" } /******************************************** * Metrics ********************************************/ -prometheus.exporter.unix "containers" { - set_collectors = ["cpu"] - disable_collectors = ["diskstats", "mdadm", "textfile", "hwmon"] -} - -prometheus.scrape "integrations" { - targets = concat( - prometheus.exporter.unix.containers.targets, - ) - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } - - forward_to = [prometheus.relabel.integrations.receiver] -} - -prometheus.scrape "containers" { - targets = discovery.relabel.containers.output - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } - - forward_to = [module.file.docker_compose.exports.metrics_receiver] -} - -prometheus.scrape "minio" { - targets = [{"__address__" = "minio:9000", "job" = "minio-job"}] - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/all.river" - scrape_interval = "15s" - metrics_path = "/minio/v2/metrics/cluster" - - forward_to = [prometheus.relabel.integrations.receiver] -} - -prometheus.relabel "integrations" { - rule { - source_labels = ["job"] - regex = "(integrations|monitoring-system)/(.*)" - target_label = "pod" - replacement = "${2}" - } - - rule { - source_labels = ["job"] - regex = "(integrations|monitoring-system)/(.*)" - target_label = "container" - replacement = "${2}" + arguments { + forward_to = [module.file.docker_compose.exports.metrics_receiver] + clustering = true } - - forward_to = [module.file.docker_compose.exports.metrics_receiver] } /******************************************** @@ -100,7 +36,7 @@ prometheus.relabel "integrations" { ********************************************/ module.file "logs_primary" { - filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/all.river" + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/logs/all.river" arguments { forward_to = [module.file.docker_compose.exports.logs_receiver] diff --git a/docker-compose/common/config/agent-flow/profiles.river b/docker-compose/common/config/agent-flow/profiles.river index 038fa70d..c0d87c5c 100644 --- a/docker-compose/common/config/agent-flow/profiles.river +++ b/docker-compose/common/config/agent-flow/profiles.river @@ -1,12 +1,16 @@ // https://github.com/grafana/agent-configurator logging { - level = "warn" + level = coalesce(env("AGENT_LOG_LEVEL"), "info") format = "logfmt" } +/******************************************** + * LGTMP Receiver provider + ********************************************/ + module.file "docker_compose" { - filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker_compose.river" arguments { profiles_endpoint = "http://gateway:4040" diff --git a/docker-compose/common/config/agent-flow/traces.river b/docker-compose/common/config/agent-flow/traces.river index cb8a7600..56fefb42 100644 --- a/docker-compose/common/config/agent-flow/traces.river +++ b/docker-compose/common/config/agent-flow/traces.river @@ -1,7 +1,7 @@ // https://github.com/grafana/agent-configurator logging { - level = "warn" + level = coalesce(env("AGENT_LOG_LEVEL"), "info") format = "logfmt" } @@ -10,8 +10,12 @@ tracing { write_to = [otelcol.processor.batch.containers.input] } +/******************************************** + * LGTMP Receiver provider + ********************************************/ + module.file "docker_compose" { - filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker_compose.river" arguments { metrics_endpoint = "http://gateway:8080" @@ -19,81 +23,17 @@ module.file "docker_compose" { } } -discovery.relabel "containers" { - targets = module.file.docker_compose.exports.relabelings_common.output -} - /******************************************** * Metrics ********************************************/ -prometheus.exporter.unix "containers" { - set_collectors = ["cpu"] - disable_collectors = ["diskstats", "mdadm", "textfile", "hwmon"] -} - -prometheus.scrape "integrations" { - targets = concat( - prometheus.exporter.unix.containers.targets, - ) - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } - - forward_to = [prometheus.relabel.integrations.receiver] -} - -prometheus.scrape "containers" { - targets = discovery.relabel.containers.output - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } +module.file "metrics_primary" { + filename = coalesce(env("AGENT_CONFIG_FOLDER"), "/etc/agent-config") + "/modules/docker/metrics/all.river" - forward_to = [module.file.docker_compose.exports.metrics_receiver] -} - -prometheus.scrape "minio" { - targets = [{"__address__" = "minio:9000", "job" = "minio-job"}] - - scrape_interval = "15s" - - enable_protobuf_negotiation = true - scrape_classic_histograms = true - - clustering { - enabled = true - } - metrics_path = "/minio/v2/metrics/cluster" - - forward_to = [prometheus.relabel.integrations.receiver] -} - -prometheus.relabel "integrations" { - rule { - source_labels = ["job"] - regex = "(integrations|monitoring-system)/(.*)" - target_label = "pod" - replacement = "${2}" - } - - rule { - source_labels = ["job"] - regex = "(integrations|monitoring-system)/(.*)" - target_label = "container" - replacement = "${2}" + arguments { + forward_to = [module.file.docker_compose.exports.metrics_receiver] + clustering = true } - - forward_to = [module.file.docker_compose.exports.metrics_receiver] } /******************************************** diff --git a/docker-compose/common/config/nginx/nginx.conf.template b/docker-compose/common/config/nginx/nginx.conf.template index 5f6863c0..27da32e1 100644 --- a/docker-compose/common/config/nginx/nginx.conf.template +++ b/docker-compose/common/config/nginx/nginx.conf.template @@ -28,7 +28,7 @@ http { '"$http_user_agent" "$http_x_forwarded_for"'; map $status $loggable { - ~^[3] 0; + ~^[23] 0; default 1; } diff --git a/docker-compose/microservices-mode/metrics/docker-compose.yaml b/docker-compose/microservices-mode/metrics/docker-compose.yaml index ec39ab6d..81f7b3b7 100644 --- a/docker-compose/microservices-mode/metrics/docker-compose.yaml +++ b/docker-compose/microservices-mode/metrics/docker-compose.yaml @@ -13,6 +13,9 @@ include: services: gateway: + # https://github.com/qclaogui/codelab-monitoring/blob/main/docker-compose/common/config/agent-flow/modules/docker/README.md + labels: + - metrics.agent.grafana.com/scrape=false depends_on: distributor: condition: service_healthy diff --git a/docker-compose/monolithic-mode/metrics/docker-compose.yaml b/docker-compose/monolithic-mode/metrics/docker-compose.yaml index 9f58e8f3..12dd0a31 100644 --- a/docker-compose/monolithic-mode/metrics/docker-compose.yaml +++ b/docker-compose/monolithic-mode/metrics/docker-compose.yaml @@ -13,6 +13,9 @@ include: services: gateway: + # https://github.com/qclaogui/codelab-monitoring/blob/main/docker-compose/common/config/agent-flow/modules/docker/README.md + labels: + - metrics.agent.grafana.com/scrape=false depends_on: mimir: condition: service_healthy diff --git a/docker-compose/read-write-mode/metrics/docker-compose.yaml b/docker-compose/read-write-mode/metrics/docker-compose.yaml index 413b2209..8cda706b 100644 --- a/docker-compose/read-write-mode/metrics/docker-compose.yaml +++ b/docker-compose/read-write-mode/metrics/docker-compose.yaml @@ -13,6 +13,9 @@ include: services: gateway: + # https://github.com/qclaogui/codelab-monitoring/blob/main/docker-compose/common/config/agent-flow/modules/docker/README.md + labels: + - metrics.agent.grafana.com/scrape=false depends_on: mimir-write: condition: service_healthy