From 91365491f74b02cf2dba2d988b641e694d759681 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 16:20:17 +0530 Subject: [PATCH] [8.x](backport #40411) {,x-pack/}metricbeat/module/prometheus/{collector,remote_write}: Add `metrics_count` to Prometheus module if `metrics_count` is `true` (#40784) * {,x-pack/}metricbeat/module/prometheus/{collector,remote_write}: Add `metrics_count` to Prometheus module if `metrics_count` is `true` (#40411) Co-authored-by: Ishleen Kaur <102962586+ishleenk17@users.noreply.github.com> Co-authored-by: Aman <38116245+devamanv@users.noreply.github.com> (cherry picked from commit 3c9563c813886d11c26f0f4a0de625f8a63d3912) * Update CHANGELOG.next.asciidoc --------- Co-authored-by: subham sarkar --- CHANGELOG.next.asciidoc | 1 + metricbeat/docs/fields.asciidoc | 10 + metricbeat/docs/modules/prometheus.asciidoc | 6 + metricbeat/metricbeat.reference.yml | 6 + .../module/prometheus/_meta/config.epr.yml | 6 + metricbeat/module/prometheus/_meta/config.yml | 6 + metricbeat/module/prometheus/_meta/fields.yml | 4 + .../prometheus/collector/_meta/data.json | 9 +- .../collector/_meta/testdata/config.yml | 2 + .../_meta/testdata/docs.plain-expected.json | 18 +- .../duplicate-metrics.plain-expected.json | 23 +- .../etcd-3.3.10-partial.plain-expected.json | 168 ++++---- .../metrics-with-naninf.plain-expected.json | 83 ++-- ...ometheus-2.6.0-partial.plain-expected.json | 161 ++++---- .../module/prometheus/collector/collector.go | 57 ++- .../prometheus/collector/collector_test.go | 141 +++++++ .../module/prometheus/collector/config.go | 1 + metricbeat/module/prometheus/fields.go | 2 +- .../module/prometheus/remote_write/config.go | 7 +- .../module/prometheus/remote_write/data.go | 47 ++- .../prometheus/remote_write/remote_write.go | 7 +- .../remote_write/remote_write_test.go | 187 ++++++++- metricbeat/modules.d/prometheus.yml.disabled | 6 + x-pack/metricbeat/metricbeat.reference.yml | 12 + .../module/prometheus/_meta/config.yml | 6 + .../prometheus/collector/_meta/data.json | 3 +- .../collector/_meta/testdata/config.yml | 1 + .../_meta/testdata/docs.plain-expected.json | 56 +-- .../metrics-with-naninf.plain-expected.json | 90 +++-- ...ometheus-2.6.0-partial.plain-expected.json | 365 +++++++++--------- .../prometheus/collector/collector_test.go | 125 +++++- .../module/prometheus/remote_write/config.go | 1 + .../module/prometheus/remote_write/data.go | 38 +- .../prometheus/remote_write/remote_write.go | 4 +- .../remote_write/remote_write_test.go | 243 ++++++++++++ .../modules.d/prometheus.yml.disabled | 6 + 36 files changed, 1422 insertions(+), 486 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 7bfe91c618f..f09b4fd0a24 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -328,6 +328,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Add new metricset network for the vSphere module. {pull}40559[40559] - Add new metricset resourcepool for the vSphere module. {pull}40456[40456] - Log the total time taken for GCP `ListTimeSeries` and `AggregatedList` requests {pull}40661[40661] +- Add `metrics_count` to Prometheus module if `metrics_count: true` is set. {pull}40411[40411] *Metricbeat* diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 8f3ccce6fff..7748bf87d44 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -58258,6 +58258,16 @@ Stats scraped from a Prometheus endpoint. +*`metrics_count`*:: ++ +-- +Number of metrics per Elasticsearch document. + + +type: long + +-- + *`prometheus.labels.*`*:: + diff --git a/metricbeat/docs/modules/prometheus.asciidoc b/metricbeat/docs/modules/prometheus.asciidoc index 5072a66426f..ca05a6222e4 100644 --- a/metricbeat/docs/modules/prometheus.asciidoc +++ b/metricbeat/docs/modules/prometheus.asciidoc @@ -50,6 +50,9 @@ metricbeat.modules: #username: "user" #password: "secret" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # This can be used for service account based authorization: #bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token #ssl.certificate_authorities: @@ -62,6 +65,9 @@ metricbeat.modules: # host: "localhost" # port: "9201" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Secure settings for the server using TLS/SSL: #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" diff --git a/metricbeat/metricbeat.reference.yml b/metricbeat/metricbeat.reference.yml index 890031ddb4c..288ad96d29c 100644 --- a/metricbeat/metricbeat.reference.yml +++ b/metricbeat/metricbeat.reference.yml @@ -888,6 +888,9 @@ metricbeat.modules: #username: "user" #password: "secret" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # This can be used for service account based authorization: #bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token #ssl.certificate_authorities: @@ -900,6 +903,9 @@ metricbeat.modules: # host: "localhost" # port: "9201" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Secure settings for the server using TLS/SSL: #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" diff --git a/metricbeat/module/prometheus/_meta/config.epr.yml b/metricbeat/module/prometheus/_meta/config.epr.yml index a2b348cfd70..ae67ed11efe 100644 --- a/metricbeat/module/prometheus/_meta/config.epr.yml +++ b/metricbeat/module/prometheus/_meta/config.epr.yml @@ -10,6 +10,9 @@ username: "user" password: "secret" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # This can be used for service account based authorization: bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token ssl.certificate_authorities: @@ -28,6 +31,9 @@ host: "localhost" port: "9201" + # Count number of metrics present in Elasticsearch document (default: false) + metrics_count: false + # Secure settings for the server using TLS/SSL: ssl.enabled: false ssl.certificate: "/etc/pki/server/cert.pem" diff --git a/metricbeat/module/prometheus/_meta/config.yml b/metricbeat/module/prometheus/_meta/config.yml index 01b87776e4c..d66c424373b 100644 --- a/metricbeat/module/prometheus/_meta/config.yml +++ b/metricbeat/module/prometheus/_meta/config.yml @@ -10,6 +10,9 @@ #username: "user" #password: "secret" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # This can be used for service account based authorization: #bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token #ssl.certificate_authorities: @@ -22,6 +25,9 @@ # host: "localhost" # port: "9201" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Secure settings for the server using TLS/SSL: #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" diff --git a/metricbeat/module/prometheus/_meta/fields.yml b/metricbeat/module/prometheus/_meta/fields.yml index eaeb1070386..70233701b46 100644 --- a/metricbeat/module/prometheus/_meta/fields.yml +++ b/metricbeat/module/prometheus/_meta/fields.yml @@ -6,6 +6,10 @@ release: ga settings: ["ssl", "http"] fields: + - name: metrics_count + type: long + description: > + Number of metrics per Elasticsearch document. - name: prometheus type: group fields: diff --git a/metricbeat/module/prometheus/collector/_meta/data.json b/metricbeat/module/prometheus/collector/_meta/data.json index fffa17833ec..70aa9fb12ed 100644 --- a/metricbeat/module/prometheus/collector/_meta/data.json +++ b/metricbeat/module/prometheus/collector/_meta/data.json @@ -5,20 +5,23 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 2, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "job": "prometheus" + "job": "prometheus", + "listener_name": "http" }, "metrics": { - "up": 1 + "net_conntrack_listener_conn_accepted_total": 3, + "net_conntrack_listener_conn_closed_total": 0 } }, "service": { "address": "127.0.0.1:55555", "type": "prometheus" } -} +} \ No newline at end of file diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/config.yml b/metricbeat/module/prometheus/collector/_meta/testdata/config.yml index 0301667e940..8bd984afe21 100644 --- a/metricbeat/module/prometheus/collector/_meta/testdata/config.yml +++ b/metricbeat/module/prometheus/collector/_meta/testdata/config.yml @@ -2,3 +2,5 @@ type: http url: "/metrics" suffix: plain remove_fields_from_comparison: ["prometheus.labels.instance"] +module: + metrics_count: true diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json b/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json index bcf2489c878..de0207ba219 100644 --- a/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json +++ b/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json @@ -5,17 +5,20 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 2, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:41103", - "job": "prometheus" + "instance": "127.0.0.1:61483", + "job": "prometheus", + "listener_name": "http" }, "metrics": { - "up": 1 + "net_conntrack_listener_conn_accepted_total": 3, + "net_conntrack_listener_conn_closed_total": 0 } }, "service": { @@ -29,19 +32,18 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:41103", - "job": "prometheus", - "listener_name": "http" + "instance": "127.0.0.1:61483", + "job": "prometheus" }, "metrics": { - "net_conntrack_listener_conn_accepted_total": 3, - "net_conntrack_listener_conn_closed_total": 0 + "up": 1 } }, "service": { diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/duplicate-metrics.plain-expected.json b/metricbeat/module/prometheus/collector/_meta/testdata/duplicate-metrics.plain-expected.json index 4380af72d6f..f999c5175f3 100644 --- a/metricbeat/module/prometheus/collector/_meta/testdata/duplicate-metrics.plain-expected.json +++ b/metricbeat/module/prometheus/collector/_meta/testdata/duplicate-metrics.plain-expected.json @@ -5,18 +5,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:44633", + "instance": "127.0.0.1:61485", "job": "prometheus", - "name": "PS Scavenge" + "name": "PS MarkSweep" }, "metrics": { - "base_gc_total_total": 34 + "base_gc_total_total": 4 } }, "service": { @@ -30,18 +31,18 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:44633", - "job": "prometheus", - "name": "PS MarkSweep" + "instance": "127.0.0.1:61485", + "job": "prometheus" }, "metrics": { - "base_gc_total_total": 4 + "up": 1 } }, "service": { @@ -55,17 +56,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:44633", - "job": "prometheus" + "instance": "127.0.0.1:61485", + "job": "prometheus", + "name": "PS Scavenge" }, "metrics": { - "up": 1 + "base_gc_total_total": 34 } }, "service": { diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/etcd-3.3.10-partial.plain-expected.json b/metricbeat/module/prometheus/collector/_meta/testdata/etcd-3.3.10-partial.plain-expected.json index a811dc351ac..c224cf8eb63 100644 --- a/metricbeat/module/prometheus/collector/_meta/testdata/etcd-3.3.10-partial.plain-expected.json +++ b/metricbeat/module/prometheus/collector/_meta/testdata/etcd-3.3.10-partial.plain-expected.json @@ -5,13 +5,14 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:37991", + "instance": "127.0.0.1:61487", "job": "prometheus", "server_go_version": "go1.10.4" }, @@ -30,31 +31,7 @@ "duration": 115000, "module": "prometheus" }, - "metricset": { - "name": "collector", - "period": 10000 - }, - "prometheus": { - "labels": { - "instance": "127.0.0.1:37991", - "job": "prometheus", - "server_version": "3.3.10" - }, - "metrics": { - "etcd_server_version": 1 - } - }, - "service": { - "address": "127.0.0.1:55555", - "type": "prometheus" - } - }, - { - "event": { - "dataset": "prometheus.collector", - "duration": 115000, - "module": "prometheus" - }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 @@ -62,7 +39,7 @@ "prometheus": { "labels": { "action": "create", - "instance": "127.0.0.1:37991", + "instance": "127.0.0.1:61487", "job": "prometheus" }, "metrics": { @@ -80,13 +57,14 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:37991", + "instance": "127.0.0.1:61487", "job": "prometheus", "server_id": "8e9e05c52164694d" }, @@ -105,63 +83,14 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 66, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:37991", - "job": "prometheus", - "version": "go1.10.4" - }, - "metrics": { - "go_info": 1 - } - }, - "service": { - "address": "127.0.0.1:55555", - "type": "prometheus" - } - }, - { - "event": { - "dataset": "prometheus.collector", - "duration": 115000, - "module": "prometheus" - }, - "metricset": { - "name": "collector", - "period": 10000 - }, - "prometheus": { - "labels": { - "action": "getRecursive", - "instance": "127.0.0.1:37991", - "job": "prometheus" - }, - "metrics": { - "etcd_debugging_store_reads_total": 1 - } - }, - "service": { - "address": "127.0.0.1:55555", - "type": "prometheus" - } - }, - { - "event": { - "dataset": "prometheus.collector", - "duration": 115000, - "module": "prometheus" - }, - "metricset": { - "name": "collector", - "period": 10000 - }, - "prometheus": { - "labels": { - "instance": "127.0.0.1:37991", + "instance": "127.0.0.1:61487", "job": "prometheus" }, "metrics": { @@ -244,6 +173,7 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 @@ -251,7 +181,7 @@ "prometheus": { "labels": { "action": "set", - "instance": "127.0.0.1:37991", + "instance": "127.0.0.1:61487", "job": "prometheus" }, "metrics": { @@ -262,5 +192,83 @@ "address": "127.0.0.1:55555", "type": "prometheus" } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metrics_count": 1, + "metricset": { + "name": "collector", + "period": 10000 + }, + "prometheus": { + "labels": { + "action": "getRecursive", + "instance": "127.0.0.1:61487", + "job": "prometheus" + }, + "metrics": { + "etcd_debugging_store_reads_total": 1 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metrics_count": 1, + "metricset": { + "name": "collector", + "period": 10000 + }, + "prometheus": { + "labels": { + "instance": "127.0.0.1:61487", + "job": "prometheus", + "version": "go1.10.4" + }, + "metrics": { + "go_info": 1 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metrics_count": 1, + "metricset": { + "name": "collector", + "period": 10000 + }, + "prometheus": { + "labels": { + "instance": "127.0.0.1:61487", + "job": "prometheus", + "server_version": "3.3.10" + }, + "metrics": { + "etcd_server_version": 1 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } } ] \ No newline at end of file diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json b/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json index 9b65de2c6a1..039b2d134a3 100644 --- a/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json +++ b/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json @@ -5,18 +5,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "le": "3" + "quantile": "0.75" }, "metrics": { - "http_request_duration_seconds_bucket": 3 + "go_gc_duration_seconds": 0.000098154 } }, "service": { @@ -30,18 +31,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "le": "+Inf" + "listener_name": "http" }, "metrics": { - "http_request_duration_seconds_bucket": 3 + "net_conntrack_listener_conn_accepted_total": 1568652315554 } }, "service": { @@ -55,18 +57,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "quantile": "0.75" + "le": "1" }, "metrics": { - "go_gc_duration_seconds": 0.000098154 + "http_request_duration_seconds_bucket": 1 } }, "service": { @@ -80,18 +83,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "le": "5" + "quantile": "1" }, "metrics": { - "http_request_duration_seconds_bucket": 3 + "go_gc_duration_seconds": 0.011689149 } }, "service": { @@ -105,18 +109,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "le": "2" + "le": "3" }, "metrics": { - "http_request_duration_seconds_bucket": 2 + "http_request_duration_seconds_bucket": 3 } }, "service": { @@ -130,18 +135,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", - "job": "prometheus", - "quantile": "1" + "client_id": "consumer4", + "instance": "127.0.0.1:61489", + "job": "prometheus" }, "metrics": { - "go_gc_duration_seconds": 0.011689149 + "kafka_consumer_records_lag_records": 5 } }, "service": { @@ -155,21 +161,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", - "job": "prometheus" + "instance": "127.0.0.1:61489", + "job": "prometheus", + "le": "+Inf" }, "metrics": { - "go_gc_duration_seconds_count": 13118, - "go_gc_duration_seconds_sum": 3.451780079, - "http_request_duration_seconds_count": 3, - "http_request_duration_seconds_sum": 6, - "up": 1 + "http_request_duration_seconds_bucket": 3 } }, "service": { @@ -183,18 +187,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "listener_name": "http" + "le": "5" }, "metrics": { - "net_conntrack_listener_conn_accepted_total": 1568652315554 + "http_request_duration_seconds_bucket": 3 } }, "service": { @@ -208,18 +213,22 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 5, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "client_id": "consumer4", - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus" }, "metrics": { - "kafka_consumer_records_lag_records": 5 + "go_gc_duration_seconds_count": 13118, + "go_gc_duration_seconds_sum": 3.451780079, + "http_request_duration_seconds_count": 3, + "http_request_duration_seconds_sum": 6, + "up": 1 } }, "service": { @@ -233,18 +242,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", - "le": "1" + "le": "2" }, "metrics": { - "http_request_duration_seconds_bucket": 1 + "http_request_duration_seconds_bucket": 2 } }, "service": { @@ -258,13 +268,14 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:43755", + "instance": "127.0.0.1:61489", "job": "prometheus", "method": "GET" }, diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json b/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json index 30ff0fee745..b093d6a4378 100644 --- a/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json +++ b/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json @@ -5,18 +5,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus", - "quantile": "1" + "quantile": "0.5" }, "metrics": { - "go_gc_duration_seconds": 0.004392391 + "go_gc_duration_seconds": 0.000060618 } }, "service": { @@ -30,20 +31,54 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 37, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "dialer_name": "default", - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus" }, "metrics": { - "net_conntrack_dialer_conn_attempted_total": 0, - "net_conntrack_dialer_conn_closed_total": 0, - "net_conntrack_dialer_conn_established_total": 0 + "go_gc_duration_seconds_count": 4, + "go_gc_duration_seconds_sum": 0.004534198, + "go_goroutines": 35, + "go_memstats_alloc_bytes": 10558112, + "go_memstats_alloc_bytes_total": 14087760, + "go_memstats_buck_hash_sys_bytes": 1447018, + "go_memstats_frees_total": 15673, + "go_memstats_gc_cpu_fraction": 0.0008429952574435172, + "go_memstats_gc_sys_bytes": 2379776, + "go_memstats_heap_alloc_bytes": 10558112, + "go_memstats_heap_idle_bytes": 54042624, + "go_memstats_heap_inuse_bytes": 12214272, + "go_memstats_heap_objects": 61771, + "go_memstats_heap_released_bytes": 0, + "go_memstats_heap_sys_bytes": 66256896, + "go_memstats_last_gc_time_seconds": 1553430316.1488917, + "go_memstats_lookups_total": 0, + "go_memstats_mallocs_total": 77444, + "go_memstats_mcache_inuse_bytes": 6912, + "go_memstats_mcache_sys_bytes": 16384, + "go_memstats_mspan_inuse_bytes": 127984, + "go_memstats_mspan_sys_bytes": 131072, + "go_memstats_next_gc_bytes": 18390112, + "go_memstats_other_sys_bytes": 1201294, + "go_memstats_stack_inuse_bytes": 851968, + "go_memstats_stack_sys_bytes": 851968, + "go_memstats_sys_bytes": 72284408, + "go_threads": 14, + "process_cpu_seconds_total": 0.14, + "process_max_fds": 1048576, + "process_open_fds": 13, + "process_resident_memory_bytes": 35934208, + "process_start_time_seconds": 1553430305.4, + "process_virtual_memory_bytes": 150646784, + "process_virtual_memory_max_bytes": -1, + "prometheus_api_remote_read_queries": 0, + "up": 1 } }, "service": { @@ -57,19 +92,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus", - "listener_name": "http" + "quantile": "1" }, "metrics": { - "net_conntrack_listener_conn_accepted_total": 3, - "net_conntrack_listener_conn_closed_total": 0 + "go_gc_duration_seconds": 0.004392391 } }, "service": { @@ -83,18 +118,20 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 2, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus", - "quantile": "0.75" + "listener_name": "http" }, "metrics": { - "go_gc_duration_seconds": 0.004392391 + "net_conntrack_listener_conn_accepted_total": 3, + "net_conntrack_listener_conn_closed_total": 0 } }, "service": { @@ -108,53 +145,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", - "job": "prometheus" + "instance": "127.0.0.1:61491", + "job": "prometheus", + "quantile": "0" }, "metrics": { - "go_gc_duration_seconds_count": 4, - "go_gc_duration_seconds_sum": 0.004534198, - "go_goroutines": 35, - "go_memstats_alloc_bytes": 10558112, - "go_memstats_alloc_bytes_total": 14087760, - "go_memstats_buck_hash_sys_bytes": 1447018, - "go_memstats_frees_total": 15673, - "go_memstats_gc_cpu_fraction": 0.0008429952574435172, - "go_memstats_gc_sys_bytes": 2379776, - "go_memstats_heap_alloc_bytes": 10558112, - "go_memstats_heap_idle_bytes": 54042624, - "go_memstats_heap_inuse_bytes": 12214272, - "go_memstats_heap_objects": 61771, - "go_memstats_heap_released_bytes": 0, - "go_memstats_heap_sys_bytes": 66256896, - "go_memstats_last_gc_time_seconds": 1553430316.1488917, - "go_memstats_lookups_total": 0, - "go_memstats_mallocs_total": 77444, - "go_memstats_mcache_inuse_bytes": 6912, - "go_memstats_mcache_sys_bytes": 16384, - "go_memstats_mspan_inuse_bytes": 127984, - "go_memstats_mspan_sys_bytes": 131072, - "go_memstats_next_gc_bytes": 18390112, - "go_memstats_other_sys_bytes": 1201294, - "go_memstats_stack_inuse_bytes": 851968, - "go_memstats_stack_sys_bytes": 851968, - "go_memstats_sys_bytes": 72284408, - "go_threads": 14, - "process_cpu_seconds_total": 0.14, - "process_max_fds": 1048576, - "process_open_fds": 13, - "process_resident_memory_bytes": 35934208, - "process_start_time_seconds": 1553430305.4, - "process_virtual_memory_bytes": 150646784, - "process_virtual_memory_max_bytes": -1, - "prometheus_api_remote_read_queries": 0, - "up": 1 + "go_gc_duration_seconds": 0.000038386 } }, "service": { @@ -168,18 +171,21 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 3, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", - "job": "prometheus", - "quantile": "0.5" + "dialer_name": "alertmanager", + "instance": "127.0.0.1:61491", + "job": "prometheus" }, "metrics": { - "go_gc_duration_seconds": 0.000060618 + "net_conntrack_dialer_conn_attempted_total": 0, + "net_conntrack_dialer_conn_closed_total": 0, + "net_conntrack_dialer_conn_established_total": 0 } }, "service": { @@ -193,18 +199,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus", - "version": "go1.11.3" + "quantile": "0.75" }, "metrics": { - "go_info": 1 + "go_gc_duration_seconds": 0.004392391 } }, "service": { @@ -218,18 +225,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus", - "quantile": "0" + "quantile": "0.25" }, "metrics": { - "go_gc_duration_seconds": 0.000038386 + "go_gc_duration_seconds": 0.000042803 } }, "service": { @@ -243,20 +251,21 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 3, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "dialer_name": "prometheus", - "instance": "127.0.0.1:35567", + "dialer_name": "default", + "instance": "127.0.0.1:61491", "job": "prometheus" }, "metrics": { - "net_conntrack_dialer_conn_attempted_total": 1, + "net_conntrack_dialer_conn_attempted_total": 0, "net_conntrack_dialer_conn_closed_total": 0, - "net_conntrack_dialer_conn_established_total": 1 + "net_conntrack_dialer_conn_established_total": 0 } }, "service": { @@ -270,20 +279,21 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 3, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "dialer_name": "alertmanager", - "instance": "127.0.0.1:35567", + "dialer_name": "prometheus", + "instance": "127.0.0.1:61491", "job": "prometheus" }, "metrics": { - "net_conntrack_dialer_conn_attempted_total": 0, + "net_conntrack_dialer_conn_attempted_total": 1, "net_conntrack_dialer_conn_closed_total": 0, - "net_conntrack_dialer_conn_established_total": 0 + "net_conntrack_dialer_conn_established_total": 1 } }, "service": { @@ -297,18 +307,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:35567", + "instance": "127.0.0.1:61491", "job": "prometheus", - "quantile": "0.25" + "version": "go1.11.3" }, "metrics": { - "go_gc_duration_seconds": 0.000042803 + "go_info": 1 } }, "service": { diff --git a/metricbeat/module/prometheus/collector/collector.go b/metricbeat/module/prometheus/collector/collector.go index 62ba9796f69..8d3456aca63 100644 --- a/metricbeat/module/prometheus/collector/collector.go +++ b/metricbeat/module/prometheus/collector/collector.go @@ -83,6 +83,8 @@ type MetricSet struct { promEventsGen PromEventsGenerator host string eventGenStarted bool + metricsCount bool + xPack bool } // MetricSetBuilder returns a builder function for a new Prometheus metricset using @@ -103,13 +105,23 @@ func MetricSetBuilder(namespace string, genFactory PromEventsGeneratorFactory) f return nil, err } + // NOTE: We need to know if the generator is is of type *promEventGenerator + // to know if it is xpack or not. If it is promEventsGen is of type *promEventGenerator + // then it is not xpack. Else, it is xpack. + // This is required because how data is nested in x-pack and non-xpack if + // use_types is used in the former. + _, nonXPack := promEventsGen.(*promEventGenerator) + ms := &MetricSet{ BaseMetricSet: base, prometheus: prometheus, namespace: namespace, promEventsGen: promEventsGen, eventGenStarted: false, + metricsCount: config.MetricsCount, + xPack: !nonXPack, } + // store host here to use it as a pointer when building `up` metric ms.host = ms.Host() ms.excludeMetrics, err = p.CompilePatternList(config.MetricsFilters.ExcludeMetrics) @@ -177,9 +189,48 @@ func (m *MetricSet) Fetch(reporter mb.ReporterV2) error { // Report events for _, e := range eventList { - isOpen := reporter.Event(mb.Event{ - RootFields: mapstr.M{m.namespace: e}, - }) + event := mb.Event{RootFields: mapstr.M{m.namespace: e}} + + if m.metricsCount { + // In x-pack prometheus module, the metrics are nested under the "prometheus" key directly. + // whereas in non-x-pack prometheus module, the metrics are nested under the "prometheus.metrics" key. + // Also, it is important that we do not just increment by 1 for each metric because histograms and summaries are special. + // For example, if you notice histogram's implementation in data.go, then you'd notice single PromEvent holds 2 metrics. Here: + // + // PromEvent{ + // Data: mapstr.M{ + // "metrics": mapstr.M{ + // name + "_sum": histogram.GetSampleSum(), + // name + "_count": histogram.GetSampleCount(), + // }, + // }, + // Labels: labels, + // } + // + // Here, name + "_sum" and name + "_count" are the 2 metrics. + // + // So, len(v) will be 2 in the above example. + // Similarly, it will happen for x-pack prometheus module too. Please see + // the unit tests for the same. + switch m.xPack { + case true: + // As, metrics are nested under the "prometheus" key in case of x-pack, + // labels is also nested under the "prometheus" key. So, we need to + // make sure we subtract 1 in case the e["labels"] + // also exists. + if _, hasLabels := e["labels"].(mapstr.M); hasLabels { + event.RootFields.Put("metrics_count", len(e)-1) + } else { + event.RootFields.Put("metrics_count", len(e)) + } + default: + if v, ok := e["metrics"].(mapstr.M); ok { + event.RootFields.Put("metrics_count", len(v)) + } + } + } + + isOpen := reporter.Event(event) if !isOpen { break } diff --git a/metricbeat/module/prometheus/collector/collector_test.go b/metricbeat/module/prometheus/collector/collector_test.go index b682dad7104..2a0e04b777a 100644 --- a/metricbeat/module/prometheus/collector/collector_test.go +++ b/metricbeat/module/prometheus/collector/collector_test.go @@ -20,6 +20,10 @@ package collector import ( + "net/http" + "net/http/httptest" + "sort" + "strings" "testing" "github.com/elastic/beats/v7/metricbeat/mb" @@ -380,3 +384,140 @@ func TestSkipMetricFamily(t *testing.T) { func TestData(t *testing.T) { mbtest.TestDataFiles(t, "prometheus", "collector") } + +func sortPromEvents(events []mb.Event) { + sort.Slice(events, func(i, j int) bool { + return events[i].RootFields["prometheus"].(mapstr.M)["labels"].(mapstr.M).String() < events[j].RootFields["prometheus"].(mapstr.M)["labels"].(mapstr.M).String() + }) +} + +// TestFetchEventForCountingMetrics tests the functionality of fetching events for counting metrics in the Prometheus collector. +// NOTE: For the remote_write metricset, the test will be similar. So, we will only test this for the collector metricset. +func TestFetchEventForCountingMetrics(t *testing.T) { + metricsPath := "/metrics" + server := initServer(metricsPath) + defer server.Close() + + host := strings.TrimPrefix(server.URL, "http://") + + config := map[string]interface{}{ + "module": "prometheus", + "metricsets": []string{"collector"}, + "hosts": []string{server.URL}, + "metrics_path": metricsPath, + "metrics_count": true, + } + + expectedEvents := 11 + + testCases := []struct { + name string + expectedLabel mapstr.M + expectedMetricCount int + }{ + {"Prod API Inf", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "le": "+Inf", "service": "api"}, 1}, + {"Prod API 0.5", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "le": "0.5", "service": "api"}, 1}, + {"Prod API 1", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "le": "1", "service": "api"}, 1}, + {"Prod API Quantile 0.5", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "quantile": "0.5", "service": "api"}, 1}, + {"Prod API Quantile 0.9", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "quantile": "0.9", "service": "api"}, 1}, + {"Prod API Quantile 0.99", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "quantile": "0.99", "service": "api"}, 1}, + {"Prod API", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "service": "api"}, 6}, + {"Prod DB", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "service": "db"}, 2}, + {"Staging API", mapstr.M{"environment": "staging", "instance": host, "job": "prometheus", "service": "api"}, 2}, + {"Staging DB", mapstr.M{"environment": "staging", "instance": host, "job": "prometheus", "service": "db"}, 2}, + {"Default", mapstr.M{"instance": host, "job": "prometheus"}, 1}, + } + + f := mbtest.NewReportingMetricSetV2Error(t, config) + events, errs := mbtest.ReportingFetchV2Error(f) + + for _, err := range errs { + t.Errorf("Unexpected error: %v", err) + } + + assert.Equal(t, expectedEvents, len(events), "Number of events does not match expected") + + sortPromEvents(events) + + for i := range expectedEvents { + t.Run(testCases[i].name, func(t *testing.T) { + validateEvent(t, events[i], testCases[i].expectedLabel, testCases[i].expectedMetricCount) + }) + } +} +func validateEvent(t *testing.T, event mb.Event, expectedLabels mapstr.M, expectedMetricsCount int) { + t.Helper() + + metricsCount, err := event.RootFields.GetValue("metrics_count") + assert.NoError(t, err, "Failed to get metrics_count") + + labels, ok := event.RootFields["prometheus"].(mapstr.M)["labels"].(mapstr.M) + assert.True(t, ok, "Failed to get labels") + + assert.Equal(t, expectedLabels, labels, "Labels do not match expected") + assert.Equal(t, expectedMetricsCount, metricsCount, "Metrics count does not match expected") +} + +// NOTE(shmsr): If "test_histogram_bucket{environment="prod",service="api",le="0.1"} 0" +// is added to: +// # HELP test_histogram A test histogram metric +// # TYPE test_histogram histogram +// Then a bug occurs for arm64 i.e., uint64(math.NaN()) evaluates to 0 whereas in amd64 it evaluates to 9223372036854775808. +// So in data.go: +// +// for _, bucket := range histogram.GetBucket() { +// if bucket.GetCumulativeCount() == uint64(math.NaN()) || bucket.GetCumulativeCount() == uint64(math.Inf(0)) { +// +// and some other similar areas have this issue. +// So, currently "test_histogram_bucket{environment="prod",service="api",le="0.1"} 0" being ignored when I run my tests +// darwin/arm64 but works fine on linux/amd64. +// +// Related: https://github.com/elastic/beats/issues/34235 +// Also see: https://github.com/golang/go/issues/67756#issuecomment-2142850931 +// +// TODO(shmsr): We have to handle this properly. +// +// For now, I am skipping the test case so that tests passes on both darwin/arm64 and linux/amd64. +func initServer(endpoint string) *httptest.Server { + data := []byte(`# HELP test_gauge A test gauge metric +# TYPE test_gauge gauge +test_gauge{environment="prod",service="api"} 10.5 +test_gauge{environment="staging",service="api"} 8.2 +test_gauge{environment="prod",service="db"} 20.7 +test_gauge{environment="staging",service="db"} 15.1 + +# HELP test_counter A test counter metric +# TYPE test_counter counter +test_counter{environment="prod",service="api"} 42 +test_counter{environment="staging",service="api"} 444 +test_counter{environment="prod",service="db"} 123 +test_counter{environment="staging",service="db"} 98 + +# HELP test_histogram A test histogram metric +# TYPE test_histogram histogram +test_histogram_bucket{environment="prod",service="api",le="0.5"} 1 +test_histogram_bucket{environment="prod",service="api",le="1.0"} 2 +test_histogram_bucket{environment="prod",service="api",le="+Inf"} 3 +test_histogram_sum{environment="prod",service="api"} 2.7 +test_histogram_count{environment="prod",service="api"} 3 + +# HELP test_summary A test summary metric +# TYPE test_summary summary +test_summary{environment="prod",service="api",quantile="0.5"} 0.2 +test_summary{environment="prod",service="api",quantile="0.9"} 0.7 +test_summary{environment="prod",service="api",quantile="0.99"} 1.2 +test_summary_sum{environment="prod",service="api"} 1234.5 +test_summary_count{environment="prod",service="api"} 1000`) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == endpoint { + // https://github.com/prometheus/client_golang/blob/dbf72fc1a20e87bea6e15281eda7ef4d139a01ec/prometheus/registry_test.go#L364 + w.Header().Set("Content-Type", "text/plain; version=0.0.4") + w.WriteHeader(http.StatusOK) + w.Write(data) + } else { + w.WriteHeader(http.StatusNotFound) + } + })) + return server +} diff --git a/metricbeat/module/prometheus/collector/config.go b/metricbeat/module/prometheus/collector/config.go index 1a2c5688177..9fc8a5f6a7c 100644 --- a/metricbeat/module/prometheus/collector/config.go +++ b/metricbeat/module/prometheus/collector/config.go @@ -18,6 +18,7 @@ package collector type metricsetConfig struct { + MetricsCount bool `config:"metrics_count"` MetricsFilters MetricFilters `config:"metrics_filters" yaml:"metrics_filters,omitempty"` } diff --git a/metricbeat/module/prometheus/fields.go b/metricbeat/module/prometheus/fields.go index 43b8e84da07..9aafe6079ff 100644 --- a/metricbeat/module/prometheus/fields.go +++ b/metricbeat/module/prometheus/fields.go @@ -32,5 +32,5 @@ func init() { // AssetPrometheus returns asset data. // This is the base64 encoded zlib format compressed contents of module/prometheus. func AssetPrometheus() string { - return "eJzMkk1u20AMhfc6xYO6C5wcQIueoEBbdFkUxlh6sqaZv5JUDN++kGU5im2gf5tyyTckP77hI555bFAkR9rAUSvAvAU2qD9dknUFdNRWfDGfU4P3FQB8MWcKbcUVduglRzi8VoGpK9kne6oAHbLYts2p9/sGvQvKChAGOmWDvZve0MynvTb4WquGeoN6MCv1twroPUOnzWnuI5KLvKKewo5l6iV5LOfMumyKd/goHQVe4WPJYi4ZBgo3CG7HoDj4EBCdtQN6L2ob2EAI1eCE6PK4C7z0W1Dm4qeHi7DA5N13trZKz4ntrD7zeMjSreQ7Ni+xcjbSxLfnqTcws/rnNFe7vVG30ZXi0/78tH6o/xL6hvbHSDn+b6wvLoynXx+DLbc9yZ8/rPjfXu+drW52Wp/mL2BODZav5NqHe2NvL30BEcZs3B7EG/+FZ+6DU58F69WXs21KeaH8NuvPAAAA///rUkpn" + return "eJzMk7mO20AMhns9xQ+lW3j3AVSkSxfkQMogEMYjSprsXCGpNfz2gU7LsYNcTabkP0N+/Dl8xDOdK2ROgbSnQQpAnXqqUH7YgmUBNCSWXVaXYoXXBQB8UqMCsWwyNWg5BRhcXoFik5OL+lQA0ifW2qbYuq5Ca7xQATB5MkIVOjPeIVUXO6nwuRTx5QFlr5rLLwXQOvKNVFPdR0QTqEIgZWeltmmIOimAnjNV8Cl2S+AO93jeDeFIjNSuWZCJ8cYbUWeFDNseTbJDoJn/UvXKq0vJjtOQl8gedjyv8J4bYjiBCzmxmqjoiekAb47kBSfnPYJR26N1LHqA9gQmURgmNGk4etryrSjz46eHTVhh0vErWd2F50A9q890PiVudvJPTBrPbp6zU0vVG5jFxz+m+aG3K7UOJmcXu+Vq+VD+JfQN7beB+Py/sb4YP0xTH7yuGzXKH9/u+K935k5XNz3tv+YvYKYE6yhp78O9src/fQVhCkmpPrFT+heeOQ+mPNumbr4stgnxC/Fvs34PAAD//zFqb0I=" } diff --git a/metricbeat/module/prometheus/remote_write/config.go b/metricbeat/module/prometheus/remote_write/config.go index d7fbb7dfb23..fe2d54003a2 100644 --- a/metricbeat/module/prometheus/remote_write/config.go +++ b/metricbeat/module/prometheus/remote_write/config.go @@ -20,9 +20,10 @@ package remote_write import "github.com/elastic/elastic-agent-libs/transport/tlscommon" type Config struct { - Host string `config:"host"` - Port int `config:"port"` - TLS *tlscommon.ServerConfig `config:"ssl"` + MetricsCount bool `config:"metrics_count"` + Host string `config:"host"` + Port int `config:"port"` + TLS *tlscommon.ServerConfig `config:"ssl"` } func defaultConfig() Config { diff --git a/metricbeat/module/prometheus/remote_write/data.go b/metricbeat/module/prometheus/remote_write/data.go index 2063c881aa7..7b6758a32e9 100644 --- a/metricbeat/module/prometheus/remote_write/data.go +++ b/metricbeat/module/prometheus/remote_write/data.go @@ -26,17 +26,32 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +type RemoteWriteEventsGeneratorOption func(r *RemoteWriteEventGenerator) + +func WithCountMetrics(countMetrics bool) RemoteWriteEventsGeneratorOption { + return func(r *RemoteWriteEventGenerator) { + r.metricsCount = countMetrics + } +} + // DefaultRemoteWriteEventsGeneratorFactory returns the default prometheus events generator -func DefaultRemoteWriteEventsGeneratorFactory(ms mb.BaseMetricSet) (RemoteWriteEventsGenerator, error) { - return &remoteWriteEventGenerator{}, nil +func DefaultRemoteWriteEventsGeneratorFactory(ms mb.BaseMetricSet, opts ...RemoteWriteEventsGeneratorOption) (RemoteWriteEventsGenerator, error) { + generator := &RemoteWriteEventGenerator{} + for _, opt := range opts { + opt(generator) + } + + return generator, nil } -type remoteWriteEventGenerator struct{} +type RemoteWriteEventGenerator struct { + metricsCount bool +} -func (p *remoteWriteEventGenerator) Start() {} -func (p *remoteWriteEventGenerator) Stop() {} +func (p *RemoteWriteEventGenerator) Start() {} +func (p *RemoteWriteEventGenerator) Stop() {} -func (p *remoteWriteEventGenerator) GenerateEvents(metrics model.Samples) map[string]mb.Event { +func (p *RemoteWriteEventGenerator) GenerateEvents(metrics model.Samples) map[string]mb.Event { eventList := map[string]mb.Event{} for _, metric := range metrics { @@ -50,6 +65,7 @@ func (p *remoteWriteEventGenerator) GenerateEvents(metrics model.Samples) map[st continue } + //nolint:typecheck,nolintlint // 'name' is being used in as a key in mapstr.M below name := string(metric.Metric["__name__"]) delete(metric.Metric, "__name__") @@ -61,6 +77,7 @@ func (p *remoteWriteEventGenerator) GenerateEvents(metrics model.Samples) map[st labelsHash := labels.String() + metric.Timestamp.Time().String() if _, ok := eventList[labelsHash]; !ok { eventList[labelsHash] = mb.Event{ + RootFields: mapstr.M{}, ModuleFields: mapstr.M{ "metrics": mapstr.M{}, }, @@ -75,11 +92,23 @@ func (p *remoteWriteEventGenerator) GenerateEvents(metrics model.Samples) map[st // Not checking anything here because we create these maps some lines before e := eventList[labelsHash] - data := mapstr.M{ - name: val, - } + + data := mapstr.M{name: val} e.ModuleFields["metrics"].(mapstr.M).Update(data) } + if p.metricsCount { + for _, e := range eventList { + // In x-pack prometheus module, the metrics are nested under the "prometheus" key directly. + // whereas in non-x-pack prometheus module, the metrics are nested under the "prometheus.metrics" key. + // Also, it is important that we do not just increment by 1 for each e.ModuleFields["metrics"] may have more than 1 metric. + // See unit tests for the same. + v, ok := e.ModuleFields["metrics"].(mapstr.M) + if ok { + e.RootFields["metrics_count"] = len(v) + } + } + } + return eventList } diff --git a/metricbeat/module/prometheus/remote_write/remote_write.go b/metricbeat/module/prometheus/remote_write/remote_write.go index 5ab789aed46..95048f05263 100644 --- a/metricbeat/module/prometheus/remote_write/remote_write.go +++ b/metricbeat/module/prometheus/remote_write/remote_write.go @@ -52,7 +52,7 @@ type RemoteWriteEventsGenerator interface { } // RemoteWriteEventsGeneratorFactory creates a RemoteWriteEventsGenerator when instanciating a metricset -type RemoteWriteEventsGeneratorFactory func(ms mb.BaseMetricSet) (RemoteWriteEventsGenerator, error) +type RemoteWriteEventsGeneratorFactory func(ms mb.BaseMetricSet, opts ...RemoteWriteEventsGeneratorOption) (RemoteWriteEventsGenerator, error) type MetricSet struct { mb.BaseMetricSet @@ -69,7 +69,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return nil, err } - promEventsGen, err := DefaultRemoteWriteEventsGeneratorFactory(base) + promEventsGen, err := DefaultRemoteWriteEventsGeneratorFactory(base, WithCountMetrics(config.MetricsCount)) if err != nil { return nil, err } @@ -99,7 +99,7 @@ func MetricSetBuilder(genFactory RemoteWriteEventsGeneratorFactory) func(base mb return nil, err } - promEventsGen, err := genFactory(base) + promEventsGen, err := genFactory(base, WithCountMetrics(config.MetricsCount)) if err != nil { return nil, err } @@ -110,6 +110,7 @@ func MetricSetBuilder(genFactory RemoteWriteEventsGeneratorFactory) func(base mb promEventsGen: promEventsGen, eventGenStarted: false, } + svc, err := httpserver.NewHttpServerWithHandler(base, m.handleFunc) if err != nil { return nil, err diff --git a/metricbeat/module/prometheus/remote_write/remote_write_test.go b/metricbeat/module/prometheus/remote_write/remote_write_test.go index 183ba7e7220..c295fdf88d6 100644 --- a/metricbeat/module/prometheus/remote_write/remote_write_test.go +++ b/metricbeat/module/prometheus/remote_write/remote_write_test.go @@ -28,7 +28,7 @@ import ( // TestGenerateEventsCounter tests counter simple cases func TestGenerateEventsCounter(t *testing.T) { - g := remoteWriteEventGenerator{} + g := RemoteWriteEventGenerator{} timestamp := model.Time(424242) timestamp1 := model.Time(424243) @@ -78,3 +78,188 @@ func TestGenerateEventsCounter(t *testing.T) { assert.EqualValues(t, e.ModuleFields, expected1) assert.EqualValues(t, e.Timestamp, timestamp1.Time()) } + +func TestMetricsCount(t *testing.T) { + tests := []struct { + name string + samples model.Samples + expected map[string]int + }{ + { + name: "HTTP requests counter with multiple dimensions", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "GET", "status": "200", "path": "/api/v1/users"}, + Value: 100, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "POST", "status": "201", "path": "/api/v1/users"}, + Value: 50, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "GET", "status": "404", "path": "/api/v1/products"}, + Value: 10, + }, + }, + expected: map[string]int{ + `{"method":"GET","path":"/api/v1/users","status":"200"}`: 1, + `{"method":"POST","path":"/api/v1/users","status":"201"}`: 1, + `{"method":"GET","path":"/api/v1/products","status":"404"}`: 1, + }, + }, + { + name: "CPU and memory usage gauges", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "node_cpu_usage_percent", "cpu": "0", "mode": "user"}, + Value: 25.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "node_cpu_usage_percent", "cpu": "0", "mode": "system"}, + Value: 10.2, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "node_memory_usage_bytes", "type": "used"}, + Value: 4294967296, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "node_memory_usage_bytes", "type": "free"}, + Value: 8589934592, + }, + }, + expected: map[string]int{ + `{"cpu":"0","mode":"user"}`: 1, + `{"cpu":"0","mode":"system"}`: 1, + `{"type":"used"}`: 1, + `{"type":"free"}`: 1, + }, + }, + { + name: "Request duration histogram", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_bucket", "le": "0.1", "handler": "/home"}, + Value: 200, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_bucket", "le": "0.5", "handler": "/home"}, + Value: 400, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_bucket", "le": "+Inf", "handler": "/home"}, + Value: 500, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_sum", "handler": "/home"}, + Value: 120.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_count", "handler": "/home"}, + Value: 500, + }, + }, + expected: map[string]int{ + `{"handler":"/home","le":"+Inf"}`: 1, + `{"handler":"/home"}`: 2, + `{"handler":"/home","le":"0.1"}`: 1, + `{"handler":"/home","le":"0.5"}`: 1, + }, + }, + { + name: "Mix of counter, gauge, and histogram", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "GET", "status": "200"}, + Value: 100, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "cpu_usage", "core": "0"}, + Value: 45.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_bucket", "le": "0.1"}, + Value: 30, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_bucket", "le": "0.5"}, + Value: 50, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_sum"}, + Value: 75.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_count"}, + Value: 60, + }, + }, + expected: map[string]int{ + `{"le":"0.1"}`: 1, + `{"le":"0.5"}`: 1, + `{"method":"GET","status":"200"}`: 1, + `{"core":"0"}`: 1, + `{}`: 2, + }, + }, + { + name: "Duplicate labels and distinct labels", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "api_calls", "endpoint": "/users", "method": "GET"}, + Value: 50, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "api_calls", "endpoint": "/users", "method": "POST"}, + Value: 30, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "api_calls", "endpoint": "/products", "method": "GET"}, + Value: 40, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "system_load", "host": "server1"}, + Value: 1.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "system_load", "host": "server2"}, + Value: 2.0, + }, + }, + expected: map[string]int{ + `{"endpoint":"/users","method":"GET"}`: 1, + `{"endpoint":"/users","method":"POST"}`: 1, + `{"endpoint":"/products","method":"GET"}`: 1, + `{"host":"server1"}`: 1, + `{"host":"server2"}`: 1, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + generator := RemoteWriteEventGenerator{ + metricsCount: true, + } + + events := generator.GenerateEvents(tt.samples) + + assert.Equal(t, len(tt.expected), len(events), "Number of generated events should match expected") + + for _, event := range events { + count, ok := event.RootFields["metrics_count"] + assert.True(t, ok, "metrics_count should be present for test: %s", tt.name) + + labels, ok := event.ModuleFields["labels"].(mapstr.M) + if !ok { + labels = mapstr.M{} // If no labels, create an empty map so that we can handle metrics with no labels + } + labelsHash := labels.String() + + expectedCount, ok := tt.expected[labelsHash] + assert.True(t, ok, "should have an expected count for these labels") + assert.Equal(t, expectedCount, count, "metrics_count should match expected value for labels %v", labels) + + } + }) + } +} diff --git a/metricbeat/modules.d/prometheus.yml.disabled b/metricbeat/modules.d/prometheus.yml.disabled index f829e3d89da..b62206a2aa6 100644 --- a/metricbeat/modules.d/prometheus.yml.disabled +++ b/metricbeat/modules.d/prometheus.yml.disabled @@ -13,6 +13,9 @@ #username: "user" #password: "secret" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # This can be used for service account based authorization: #bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token #ssl.certificate_authorities: @@ -25,6 +28,9 @@ # host: "localhost" # port: "9201" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Secure settings for the server using TLS/SSL: #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 738d7ef2830..48b8b97ca60 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1325,6 +1325,9 @@ metricbeat.modules: #ssl.certificate_authorities: # - /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Use Elasticsearch histogram type to store histograms (beta, default: false) # This will change the default layout and put metric type in the field name #use_types: true @@ -1342,6 +1345,9 @@ metricbeat.modules: #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Use Elasticsearch histogram type to store histograms (beta, default: false) # This will change the default layout and put metric type in the field name #use_types: true @@ -1393,6 +1399,9 @@ metricbeat.modules: #username: "user" #password: "secret" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # This can be used for service account based authorization: #bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token #ssl.certificate_authorities: @@ -1405,6 +1414,9 @@ metricbeat.modules: # host: "localhost" # port: "9201" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Secure settings for the server using TLS/SSL: #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" diff --git a/x-pack/metricbeat/module/prometheus/_meta/config.yml b/x-pack/metricbeat/module/prometheus/_meta/config.yml index 789e7937252..3f4cc7bd755 100644 --- a/x-pack/metricbeat/module/prometheus/_meta/config.yml +++ b/x-pack/metricbeat/module/prometheus/_meta/config.yml @@ -13,6 +13,9 @@ #ssl.certificate_authorities: # - /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Use Elasticsearch histogram type to store histograms (beta, default: false) # This will change the default layout and put metric type in the field name #use_types: true @@ -30,6 +33,9 @@ #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Use Elasticsearch histogram type to store histograms (beta, default: false) # This will change the default layout and put metric type in the field name #use_types: true diff --git a/x-pack/metricbeat/module/prometheus/collector/_meta/data.json b/x-pack/metricbeat/module/prometheus/collector/_meta/data.json index a9f1a53d10c..8cd81d4b38d 100644 --- a/x-pack/metricbeat/module/prometheus/collector/_meta/data.json +++ b/x-pack/metricbeat/module/prometheus/collector/_meta/data.json @@ -5,13 +5,14 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-4e623477470e", + "device": "br-0cb306323b90", "job": "prometheus" }, "node_network_carrier": { diff --git a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/config.yml b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/config.yml index 380627ba09f..fc6afb916e1 100644 --- a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/config.yml +++ b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/config.yml @@ -5,3 +5,4 @@ remove_fields_from_comparison: ["prometheus.labels.instance"] module: use_types: true rate_counters: true + metrics_count: true diff --git a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json index 2b1a2f55d70..735df7e6d76 100644 --- a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json +++ b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/docs.plain-expected.json @@ -5,14 +5,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-4e623477470e", - "instance": "127.0.0.1:64033", + "device": "br-425cb4c454a6", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -30,14 +31,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-210476dc4ef8", - "instance": "127.0.0.1:64033", + "device": "br-38feb0aad6ab", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -55,17 +57,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:64033", + "device": "br-10229e3512d9", + "instance": "127.0.0.1:65340", "job": "prometheus" }, - "up": { - "value": 1 + "node_network_carrier": { + "value": 0 } }, "service": { @@ -79,14 +83,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-33d819d5f834", - "instance": "127.0.0.1:64033", + "device": "br-4e623477470e", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -104,14 +109,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-10229e3512d9", - "instance": "127.0.0.1:64033", + "device": "br-38425a39f36b", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -129,14 +135,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-38feb0aad6ab", - "instance": "127.0.0.1:64033", + "device": "br-3a285aa5e58c", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -154,14 +161,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-0cb306323b90", - "instance": "127.0.0.1:64033", + "device": "br-210476dc4ef8", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -179,18 +187,18 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-3a285aa5e58c", - "instance": "127.0.0.1:64033", + "instance": "127.0.0.1:65340", "job": "prometheus" }, - "node_network_carrier": { - "value": 0 + "up": { + "value": 1 } }, "service": { @@ -204,14 +212,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-425cb4c454a6", - "instance": "127.0.0.1:64033", + "device": "br-33d819d5f834", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { @@ -229,14 +238,15 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "device": "br-38425a39f36b", - "instance": "127.0.0.1:64033", + "device": "br-0cb306323b90", + "instance": "127.0.0.1:65340", "job": "prometheus" }, "node_network_carrier": { diff --git a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json index b2c36b40bfc..94abed134b3 100644 --- a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json +++ b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json @@ -5,6 +5,33 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, + "metricset": { + "name": "collector", + "period": 10000 + }, + "prometheus": { + "go_gc_duration_seconds": { + "value": 0.000098154 + }, + "labels": { + "instance": "127.0.0.1:65342", + "job": "prometheus", + "quantile": "0.75" + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metrics_count": 4, "metricset": { "name": "collector", "period": 10000 @@ -37,7 +64,7 @@ } }, "labels": { - "instance": "127.0.0.1:64035", + "instance": "127.0.0.1:65342", "job": "prometheus" }, "up": { @@ -55,18 +82,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "go_gc_duration_seconds": { - "value": 0.011689149 - }, "labels": { - "instance": "127.0.0.1:64035", + "instance": "127.0.0.1:65342", "job": "prometheus", - "quantile": "1" + "listener_name": "http" + }, + "net_conntrack_listener_conn_accepted_total": { + "value": 1568652315554 } }, "service": { @@ -80,18 +108,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "kafka_consumer_records_lag_records": { - "value": 5 + "go_gc_duration_seconds": { + "value": 0.011689149 }, "labels": { - "client_id": "consumer4", - "instance": "127.0.0.1:64035", - "job": "prometheus" + "instance": "127.0.0.1:65342", + "job": "prometheus", + "quantile": "1" } }, "service": { @@ -105,18 +134,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "go_gc_duration_seconds": { - "value": 0.000098154 + "kafka_consumer_records_lag_records": { + "value": 5 }, "labels": { - "instance": "127.0.0.1:64035", - "job": "prometheus", - "quantile": "0.75" + "client_id": "consumer4", + "instance": "127.0.0.1:65342", + "job": "prometheus" } }, "service": { @@ -130,6 +160,7 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 @@ -140,7 +171,7 @@ "rate": 0 }, "labels": { - "instance": "127.0.0.1:64035", + "instance": "127.0.0.1:65342", "job": "prometheus", "method": "GET" } @@ -149,30 +180,5 @@ "address": "127.0.0.1:55555", "type": "prometheus" } - }, - { - "event": { - "dataset": "prometheus.collector", - "duration": 115000, - "module": "prometheus" - }, - "metricset": { - "name": "collector", - "period": 10000 - }, - "prometheus": { - "labels": { - "instance": "127.0.0.1:64035", - "job": "prometheus", - "listener_name": "http" - }, - "net_conntrack_listener_conn_accepted_total": { - "value": 1568652315554 - } - }, - "service": { - "address": "127.0.0.1:55555", - "type": "prometheus" - } } ] \ No newline at end of file diff --git a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json index c6c4f34bba5..ab748dfbe73 100644 --- a/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json +++ b/x-pack/metricbeat/module/prometheus/collector/_meta/testdata/prometheus-2.6.0-partial.plain-expected.json @@ -5,27 +5,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "labels": { - "dialer_name": "alertmanager", - "instance": "127.0.0.1:64037", - "job": "prometheus" - }, - "net_conntrack_dialer_conn_attempted_total": { - "counter": 0, - "rate": 0 - }, - "net_conntrack_dialer_conn_closed_total": { - "counter": 0, - "rate": 0 + "go_gc_duration_seconds": { + "value": 0.000042803 }, - "net_conntrack_dialer_conn_established_total": { - "counter": 0, - "rate": 0 + "labels": { + "instance": "127.0.0.1:65344", + "job": "prometheus", + "quantile": "0.25" } }, "service": { @@ -39,18 +31,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "go_gc_duration_seconds": { - "value": 0.000042803 + "value": 0.000060618 }, "labels": { - "instance": "127.0.0.1:64037", + "instance": "127.0.0.1:65344", "job": "prometheus", - "quantile": "0.25" + "quantile": "0.5" } }, "service": { @@ -64,21 +57,26 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 3, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "instance": "127.0.0.1:64037", - "job": "prometheus", - "listener_name": "http" + "dialer_name": "alertmanager", + "instance": "127.0.0.1:65344", + "job": "prometheus" }, - "net_conntrack_listener_conn_accepted_total": { - "counter": 3, + "net_conntrack_dialer_conn_attempted_total": { + "counter": 0, "rate": 0 }, - "net_conntrack_listener_conn_closed_total": { + "net_conntrack_dialer_conn_closed_total": { + "counter": 0, + "rate": 0 + }, + "net_conntrack_dialer_conn_established_total": { "counter": 0, "rate": 0 } @@ -94,18 +92,133 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 37, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "go_gc_duration_seconds": { - "value": 0.004392391 + "go_gc_duration_seconds_count": { + "counter": 4, + "rate": 0 + }, + "go_gc_duration_seconds_sum": { + "counter": 0.004534198, + "rate": 0 + }, + "go_goroutines": { + "value": 35 + }, + "go_memstats_alloc_bytes": { + "value": 10558112 + }, + "go_memstats_alloc_bytes_total": { + "counter": 14087760, + "rate": 0 + }, + "go_memstats_buck_hash_sys_bytes": { + "value": 1447018 + }, + "go_memstats_frees_total": { + "counter": 15673, + "rate": 0 + }, + "go_memstats_gc_cpu_fraction": { + "value": 0.0008429952574435172 + }, + "go_memstats_gc_sys_bytes": { + "value": 2379776 + }, + "go_memstats_heap_alloc_bytes": { + "value": 10558112 + }, + "go_memstats_heap_idle_bytes": { + "value": 54042624 + }, + "go_memstats_heap_inuse_bytes": { + "value": 12214272 + }, + "go_memstats_heap_objects": { + "value": 61771 + }, + "go_memstats_heap_released_bytes": { + "value": 0 + }, + "go_memstats_heap_sys_bytes": { + "value": 66256896 + }, + "go_memstats_last_gc_time_seconds": { + "value": 1553430316.1488917 + }, + "go_memstats_lookups_total": { + "counter": 0, + "rate": 0 + }, + "go_memstats_mallocs_total": { + "counter": 77444, + "rate": 0 + }, + "go_memstats_mcache_inuse_bytes": { + "value": 6912 + }, + "go_memstats_mcache_sys_bytes": { + "value": 16384 + }, + "go_memstats_mspan_inuse_bytes": { + "value": 127984 + }, + "go_memstats_mspan_sys_bytes": { + "value": 131072 + }, + "go_memstats_next_gc_bytes": { + "value": 18390112 + }, + "go_memstats_other_sys_bytes": { + "value": 1201294 + }, + "go_memstats_stack_inuse_bytes": { + "value": 851968 + }, + "go_memstats_stack_sys_bytes": { + "value": 851968 + }, + "go_memstats_sys_bytes": { + "value": 72284408 + }, + "go_threads": { + "value": 14 }, "labels": { - "instance": "127.0.0.1:64037", - "job": "prometheus", - "quantile": "0.75" + "instance": "127.0.0.1:65344", + "job": "prometheus" + }, + "process_cpu_seconds_total": { + "counter": 0.14, + "rate": 0 + }, + "process_max_fds": { + "value": 1048576 + }, + "process_open_fds": { + "value": 13 + }, + "process_resident_memory_bytes": { + "value": 35934208 + }, + "process_start_time_seconds": { + "value": 1553430305.4 + }, + "process_virtual_memory_bytes": { + "value": 150646784 + }, + "process_virtual_memory_max_bytes": { + "value": -1 + }, + "prometheus_api_remote_read_queries": { + "value": 0 + }, + "up": { + "value": 1 } }, "service": { @@ -119,27 +232,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "labels": { - "dialer_name": "prometheus", - "instance": "127.0.0.1:64037", - "job": "prometheus" - }, - "net_conntrack_dialer_conn_attempted_total": { - "counter": 1, - "rate": 0 - }, - "net_conntrack_dialer_conn_closed_total": { - "counter": 0, - "rate": 0 + "go_gc_duration_seconds": { + "value": 0.004392391 }, - "net_conntrack_dialer_conn_established_total": { - "counter": 1, - "rate": 0 + "labels": { + "instance": "127.0.0.1:65344", + "job": "prometheus", + "quantile": "0.75" } }, "service": { @@ -153,18 +258,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "go_gc_duration_seconds": { - "value": 0.000060618 + "value": 0.004392391 }, "labels": { - "instance": "127.0.0.1:64037", + "instance": "127.0.0.1:65344", "job": "prometheus", - "quantile": "0.5" + "quantile": "1" } }, "service": { @@ -178,18 +284,28 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 3, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "go_gc_duration_seconds": { - "value": 0.000038386 - }, "labels": { - "instance": "127.0.0.1:64037", - "job": "prometheus", - "quantile": "0" + "dialer_name": "default", + "instance": "127.0.0.1:65344", + "job": "prometheus" + }, + "net_conntrack_dialer_conn_attempted_total": { + "counter": 0, + "rate": 0 + }, + "net_conntrack_dialer_conn_closed_total": { + "counter": 0, + "rate": 0 + }, + "net_conntrack_dialer_conn_established_total": { + "counter": 0, + "rate": 0 } }, "service": { @@ -203,18 +319,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "go_gc_duration_seconds": { - "value": 0.004392391 + "value": 0.000038386 }, "labels": { - "instance": "127.0.0.1:64037", + "instance": "127.0.0.1:65344", "job": "prometheus", - "quantile": "1" + "quantile": "0" } }, "service": { @@ -228,18 +345,19 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 3, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { "labels": { - "dialer_name": "default", - "instance": "127.0.0.1:64037", + "dialer_name": "prometheus", + "instance": "127.0.0.1:65344", "job": "prometheus" }, "net_conntrack_dialer_conn_attempted_total": { - "counter": 0, + "counter": 1, "rate": 0 }, "net_conntrack_dialer_conn_closed_total": { @@ -247,7 +365,7 @@ "rate": 0 }, "net_conntrack_dialer_conn_established_total": { - "counter": 0, + "counter": 1, "rate": 0 } }, @@ -262,132 +380,24 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 2, "metricset": { "name": "collector", "period": 10000 }, "prometheus": { - "go_gc_duration_seconds_count": { - "counter": 4, - "rate": 0 - }, - "go_gc_duration_seconds_sum": { - "counter": 0.004534198, - "rate": 0 - }, - "go_goroutines": { - "value": 35 - }, - "go_memstats_alloc_bytes": { - "value": 10558112 - }, - "go_memstats_alloc_bytes_total": { - "counter": 14087760, - "rate": 0 - }, - "go_memstats_buck_hash_sys_bytes": { - "value": 1447018 + "labels": { + "instance": "127.0.0.1:65344", + "job": "prometheus", + "listener_name": "http" }, - "go_memstats_frees_total": { - "counter": 15673, + "net_conntrack_listener_conn_accepted_total": { + "counter": 3, "rate": 0 }, - "go_memstats_gc_cpu_fraction": { - "value": 0.0008429952574435172 - }, - "go_memstats_gc_sys_bytes": { - "value": 2379776 - }, - "go_memstats_heap_alloc_bytes": { - "value": 10558112 - }, - "go_memstats_heap_idle_bytes": { - "value": 54042624 - }, - "go_memstats_heap_inuse_bytes": { - "value": 12214272 - }, - "go_memstats_heap_objects": { - "value": 61771 - }, - "go_memstats_heap_released_bytes": { - "value": 0 - }, - "go_memstats_heap_sys_bytes": { - "value": 66256896 - }, - "go_memstats_last_gc_time_seconds": { - "value": 1553430316.1488917 - }, - "go_memstats_lookups_total": { + "net_conntrack_listener_conn_closed_total": { "counter": 0, "rate": 0 - }, - "go_memstats_mallocs_total": { - "counter": 77444, - "rate": 0 - }, - "go_memstats_mcache_inuse_bytes": { - "value": 6912 - }, - "go_memstats_mcache_sys_bytes": { - "value": 16384 - }, - "go_memstats_mspan_inuse_bytes": { - "value": 127984 - }, - "go_memstats_mspan_sys_bytes": { - "value": 131072 - }, - "go_memstats_next_gc_bytes": { - "value": 18390112 - }, - "go_memstats_other_sys_bytes": { - "value": 1201294 - }, - "go_memstats_stack_inuse_bytes": { - "value": 851968 - }, - "go_memstats_stack_sys_bytes": { - "value": 851968 - }, - "go_memstats_sys_bytes": { - "value": 72284408 - }, - "go_threads": { - "value": 14 - }, - "labels": { - "instance": "127.0.0.1:64037", - "job": "prometheus" - }, - "process_cpu_seconds_total": { - "counter": 0.14, - "rate": 0 - }, - "process_max_fds": { - "value": 1048576 - }, - "process_open_fds": { - "value": 13 - }, - "process_resident_memory_bytes": { - "value": 35934208 - }, - "process_start_time_seconds": { - "value": 1553430305.4 - }, - "process_virtual_memory_bytes": { - "value": 150646784 - }, - "process_virtual_memory_max_bytes": { - "value": -1 - }, - "prometheus_api_remote_read_queries": { - "value": 0 - }, - "up": { - "value": 1 } }, "service": { @@ -401,6 +411,7 @@ "duration": 115000, "module": "prometheus" }, + "metrics_count": 1, "metricset": { "name": "collector", "period": 10000 @@ -410,7 +421,7 @@ "value": 1 }, "labels": { - "instance": "127.0.0.1:64037", + "instance": "127.0.0.1:65344", "job": "prometheus", "version": "go1.11.3" } diff --git a/x-pack/metricbeat/module/prometheus/collector/collector_test.go b/x-pack/metricbeat/module/prometheus/collector/collector_test.go index 66f8778d340..1a245973878 100644 --- a/x-pack/metricbeat/module/prometheus/collector/collector_test.go +++ b/x-pack/metricbeat/module/prometheus/collector/collector_test.go @@ -7,16 +7,139 @@ package collector import ( + "net/http" + "net/http/httptest" + "sort" + "strings" "testing" - mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" + "github.com/stretchr/testify/assert" + "github.com/elastic/beats/v7/metricbeat/mb" + mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/prometheus" // Import common fields for validation _ "github.com/elastic/beats/v7/metricbeat/module/prometheus" + "github.com/elastic/elastic-agent-libs/mapstr" ) func TestData(t *testing.T) { mbtest.TestDataFiles(t, "prometheus", "collector") } + +func sortPromEvents(events []mb.Event) { + sort.Slice(events, func(i, j int) bool { + return events[i].RootFields["prometheus"].(mapstr.M)["labels"].(mapstr.M).String() < events[j].RootFields["prometheus"].(mapstr.M)["labels"].(mapstr.M).String() + }) +} + +// TestFetchEventForCountingMetrics tests the functionality of fetching events for counting metrics in the Prometheus collector. +// NOTE: For the remote_write metricset, the test will be similar. So, we will only test this for the collector metricset. +func TestFetchEventForCountingMetrics(t *testing.T) { + metricsPath := "/metrics" + server := initServer(metricsPath) + defer server.Close() + + host := strings.TrimPrefix(server.URL, "http://") + + config := map[string]interface{}{ + "module": "prometheus", + "metricsets": []string{"collector"}, + "hosts": []string{server.URL}, + "metrics_path": metricsPath, + "metrics_count": true, + "use_types": true, + "rate_counters": true, + } + + expectedEvents := 8 + + testCases := []struct { + name string + expectedLabel mapstr.M + expectedMetricCount int + }{ + {"ProdAPIWithQuantile50", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "quantile": "0.5", "service": "api"}, 1}, + {"ProdAPIWithQuantile90", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "quantile": "0.9", "service": "api"}, 1}, + {"ProdAPIWithQuantile99", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "quantile": "0.99", "service": "api"}, 1}, + {"ProdAPIWithoutQuantile", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "service": "api"}, 5}, + {"ProdDBWithoutQuantile", mapstr.M{"environment": "prod", "instance": host, "job": "prometheus", "service": "db"}, 2}, + {"StagingAPIWithoutQuantile", mapstr.M{"environment": "staging", "instance": host, "job": "prometheus", "service": "api"}, 2}, + {"StagingDBWithoutQuantile", mapstr.M{"environment": "staging", "instance": host, "job": "prometheus", "service": "db"}, 2}, + {"PrometheusJobOnly", mapstr.M{"instance": host, "job": "prometheus"}, 1}, + } + f := mbtest.NewReportingMetricSetV2Error(t, config) + events, errs := mbtest.ReportingFetchV2Error(f) + + for _, err := range errs { + t.Errorf("Unexpected error: %v", err) + } + + assert.Equal(t, expectedEvents, len(events), "Number of events does not match expected") + + sortPromEvents(events) + + for i := range expectedEvents { + t.Run(testCases[i].name, func(t *testing.T) { + validateEvent(t, events[i], testCases[i].expectedLabel, testCases[i].expectedMetricCount) + }) + } +} +func validateEvent(t *testing.T, event mb.Event, expectedLabels mapstr.M, expectedMetricsCount int) { + t.Helper() + + metricsCount, err := event.RootFields.GetValue("metrics_count") + assert.NoError(t, err, "Failed to get metrics_count") + + labels, ok := event.RootFields["prometheus"].(mapstr.M)["labels"].(mapstr.M) + assert.True(t, ok, "Failed to get labels") + + assert.Equal(t, expectedLabels, labels, "Labels do not match expected") + assert.Equal(t, expectedMetricsCount, metricsCount, "Metrics count does not match expected") +} + +func initServer(endpoint string) *httptest.Server { + data := []byte(`# HELP test_gauge A test gauge metric +# TYPE test_gauge gauge +test_gauge{environment="prod",service="api"} 10.5 +test_gauge{environment="staging",service="api"} 8.2 +test_gauge{environment="prod",service="db"} 20.7 +test_gauge{environment="staging",service="db"} 15.1 + +# HELP test_counter A test counter metric +# TYPE test_counter counter +test_counter{environment="prod",service="api"} 42 +test_counter{environment="staging",service="api"} 444 +test_counter{environment="prod",service="db"} 123 +test_counter{environment="staging",service="db"} 98 + +# HELP test_histogram A test histogram metric +# TYPE test_histogram histogram +test_histogram_bucket{environment="prod",service="api",le="0.1"} 0 +test_histogram_bucket{environment="prod",service="api",le="0.5"} 1 +test_histogram_bucket{environment="prod",service="api",le="1.0"} 2 +test_histogram_bucket{environment="prod",service="api",le="+Inf"} 3 +test_histogram_sum{environment="prod",service="api"} 2.7 +test_histogram_count{environment="prod",service="api"} 3 + +# HELP test_summary A test summary metric +# TYPE test_summary summary +test_summary{environment="prod",service="api",quantile="0.5"} 0.2 +test_summary{environment="prod",service="api",quantile="0.9"} 0.7 +test_summary{environment="prod",service="api",quantile="0.99"} 1.2 +test_summary_sum{environment="prod",service="api"} 1234.5 +test_summary_count{environment="prod",service="api"} 1000`) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == endpoint { + // https://github.com/prometheus/client_golang/blob/dbf72fc1a20e87bea6e15281eda7ef4d139a01ec/prometheus/registry_test.go#L364 + w.Header().Set("Content-Type", "text/plain; version=0.0.4") + w.WriteHeader(http.StatusOK) + w.Write(data) + } else { + w.WriteHeader(http.StatusNotFound) + } + })) + return server +} diff --git a/x-pack/metricbeat/module/prometheus/remote_write/config.go b/x-pack/metricbeat/module/prometheus/remote_write/config.go index 9e86facb1d0..0f0d91ead7d 100644 --- a/x-pack/metricbeat/module/prometheus/remote_write/config.go +++ b/x-pack/metricbeat/module/prometheus/remote_write/config.go @@ -10,6 +10,7 @@ import ( ) type config struct { + MetricsCount bool `config:"metrics_count"` UseTypes bool `config:"use_types"` RateCounters bool `config:"rate_counters"` TypesPatterns TypesPatterns `config:"types_patterns" yaml:"types_patterns,omitempty"` diff --git a/x-pack/metricbeat/module/prometheus/remote_write/data.go b/x-pack/metricbeat/module/prometheus/remote_write/data.go index 5161d785fb0..47c3d4af7a2 100644 --- a/x-pack/metricbeat/module/prometheus/remote_write/data.go +++ b/x-pack/metricbeat/module/prometheus/remote_write/data.go @@ -17,7 +17,7 @@ import ( "github.com/elastic/beats/v7/libbeat/common/cfgwarn" p "github.com/elastic/beats/v7/metricbeat/helper/prometheus" "github.com/elastic/beats/v7/metricbeat/mb" - "github.com/elastic/beats/v7/metricbeat/module/prometheus/remote_write" + rw "github.com/elastic/beats/v7/metricbeat/module/prometheus/remote_write" "github.com/elastic/beats/v7/x-pack/metricbeat/module/prometheus/collector" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" @@ -36,10 +36,9 @@ type histogram struct { metricName string } -func remoteWriteEventsGeneratorFactory(base mb.BaseMetricSet) (remote_write.RemoteWriteEventsGenerator, error) { - var err error +func remoteWriteEventsGeneratorFactory(base mb.BaseMetricSet, opts ...rw.RemoteWriteEventsGeneratorOption) (rw.RemoteWriteEventsGenerator, error) { config := defaultConfig - if err = base.Module().UnpackConfig(&config); err != nil { + if err := base.Module().UnpackConfig(&config); err != nil { return nil, err } @@ -52,8 +51,10 @@ func remoteWriteEventsGeneratorFactory(base mb.BaseMetricSet) (remote_write.Remo g := remoteWriteTypedGenerator{ counterCache: counters, rateCounters: config.RateCounters, + metricsCount: config.MetricsCount, } + var err error g.counterPatterns, err = p.CompilePatternList(config.TypesPatterns.CounterPatterns) if err != nil { return nil, fmt.Errorf("unable to compile counter patterns: %w", err) @@ -66,10 +67,11 @@ func remoteWriteEventsGeneratorFactory(base mb.BaseMetricSet) (remote_write.Remo return &g, nil } - return remote_write.DefaultRemoteWriteEventsGeneratorFactory(base) + return rw.DefaultRemoteWriteEventsGeneratorFactory(base, opts...) } type remoteWriteTypedGenerator struct { + metricsCount bool counterCache collector.CounterCache rateCounters bool counterPatterns []*regexp.Regexp @@ -102,11 +104,11 @@ func (g remoteWriteTypedGenerator) GenerateEvents(metrics model.Samples) map[str eventList := map[string]mb.Event{} for _, metric := range metrics { - labels := mapstr.M{} - if metric == nil { continue } + + labels := mapstr.M{} val := float64(metric.Value) if math.IsNaN(val) || math.IsInf(val, 0) { continue @@ -130,6 +132,7 @@ func (g remoteWriteTypedGenerator) GenerateEvents(metrics model.Samples) map[str // join metrics with same labels in a single event if _, ok := eventList[labelsHash]; !ok { eventList[labelsHash] = mb.Event{ + RootFields: mapstr.M{}, ModuleFields: mapstr.M{}, Timestamp: metric.Timestamp.Time(), } @@ -145,6 +148,7 @@ func (g remoteWriteTypedGenerator) GenerateEvents(metrics model.Samples) map[str } e := eventList[labelsHash] + switch promType { case counterType: data = mapstr.M{ @@ -182,12 +186,30 @@ func (g remoteWriteTypedGenerator) GenerateEvents(metrics model.Samples) map[str histograms[histKey] = hist continue } - e.ModuleFields.Update(data) + e.ModuleFields.Update(data) } // process histograms together g.processPromHistograms(eventList, histograms) + + if g.metricsCount { + for _, e := range eventList { + // In x-pack prometheus module, the metrics are nested under the "prometheus" key directly. + // whereas in non-x-pack prometheus module, the metrics are nested under the "prometheus.metrics" key. + // Also, it is important that we do not just increment by 1 for each e.ModuleFields["metrics"] may have more than 1 metric. + // As, metrics are nested under the "prometheus" key, labels is also nested under the "prometheus" key. So, we need to make sure + // we subtract 1 in case the e.ModuleFields["labels"] also exists. + // + // See unit tests for the same. + if _, hasLabels := e.ModuleFields["labels"]; hasLabels { + e.RootFields["metrics_count"] = len(e.ModuleFields) - 1 + } else { + e.RootFields["metrics_count"] = len(e.ModuleFields) + } + } + } + return eventList } diff --git a/x-pack/metricbeat/module/prometheus/remote_write/remote_write.go b/x-pack/metricbeat/module/prometheus/remote_write/remote_write.go index 74eadff6d7b..32940c12098 100644 --- a/x-pack/metricbeat/module/prometheus/remote_write/remote_write.go +++ b/x-pack/metricbeat/module/prometheus/remote_write/remote_write.go @@ -7,12 +7,12 @@ package remote_write import ( "github.com/elastic/beats/v7/metricbeat/mb" "github.com/elastic/beats/v7/metricbeat/mb/parse" - "github.com/elastic/beats/v7/metricbeat/module/prometheus/remote_write" + rw "github.com/elastic/beats/v7/metricbeat/module/prometheus/remote_write" ) func init() { mb.Registry.MustAddMetricSet("prometheus", "remote_write", - remote_write.MetricSetBuilder(remoteWriteEventsGeneratorFactory), + rw.MetricSetBuilder(remoteWriteEventsGeneratorFactory), mb.WithHostParser(parse.EmptyHostParser), // must replace ensures that we are replacing the oss implementation with this one diff --git a/x-pack/metricbeat/module/prometheus/remote_write/remote_write_test.go b/x-pack/metricbeat/module/prometheus/remote_write/remote_write_test.go index 76b591c44eb..f959bde42c6 100644 --- a/x-pack/metricbeat/module/prometheus/remote_write/remote_write_test.go +++ b/x-pack/metricbeat/module/prometheus/remote_write/remote_write_test.go @@ -18,6 +18,70 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +func BenchmarkGenerateEvents(b *testing.B) { + // Create a sample set of metrics + metrics := createSampleMetrics() + + // Create an instance of remoteWriteTypedGenerator + generator := remoteWriteTypedGenerator{ + // Initialize with appropriate values + metricsCount: true, + // Add other necessary fields + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + generator.GenerateEvents(metrics) + } +} + +func createSampleMetrics() model.Samples { + now := model.TimeFromUnix(time.Now().Unix()) + return model.Samples{ + &model.Sample{ + Metric: model.Metric{ + "__name__": "http_requests_total", + "method": "GET", + "status": "200", + }, + Value: 1234, + Timestamp: now, + }, + &model.Sample{ + Metric: model.Metric{ + "__name__": "http_request_duration_seconds", + "method": "POST", + "path": "/api/v1/users", + }, + Value: 0.543, + Timestamp: now, + }, + &model.Sample{ + Metric: model.Metric{ + "__name__": "node_cpu_seconds_total", + "cpu": "0", + "mode": "idle", + }, + Value: 3600.5, + Timestamp: now, + }, + &model.Sample{ + Metric: model.Metric{ + "__name__": "go_goroutines", + }, + Value: 42, + Timestamp: now, + }, + &model.Sample{ + Metric: model.Metric{ + "__name__": "process_resident_memory_bytes", + }, + Value: 2.5e+7, + Timestamp: now, + }, + } +} + // TestGenerateEventsCounter tests counter simple cases func TestGenerateEventsCounter(t *testing.T) { @@ -1207,3 +1271,182 @@ func TestGenerateEventsHistogramWithDefinedPattern(t *testing.T) { assert.EqualValues(t, e.ModuleFields, expected) } + +func TestMetricsCount(t *testing.T) { + tests := []struct { + name string + samples model.Samples + expected map[string]int + }{ + { + name: "HTTP requests counter with multiple dimensions", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "GET", "status": "200", "path": "/api/v1/users"}, + Value: 100, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "POST", "status": "201", "path": "/api/v1/users"}, + Value: 50, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "GET", "status": "404", "path": "/api/v1/products"}, + Value: 10, + }, + }, + expected: map[string]int{ + `{"method":"GET","path":"/api/v1/users","status":"200"}`: 1, + `{"method":"POST","path":"/api/v1/users","status":"201"}`: 1, + `{"method":"GET","path":"/api/v1/products","status":"404"}`: 1, + }, + }, + { + name: "CPU and memory usage gauges", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "node_cpu_usage_percent", "cpu": "0", "mode": "user"}, + Value: 25.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "node_cpu_usage_percent", "cpu": "0", "mode": "system"}, + Value: 10.2, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "node_memory_usage_bytes", "type": "used"}, + Value: 4294967296, // 4GB + }, + &model.Sample{ + Metric: model.Metric{"__name__": "node_memory_usage_bytes", "type": "free"}, + Value: 8589934592, // 8GB + }, + }, + expected: map[string]int{ + `{"cpu":"0","mode":"user"}`: 1, + `{"cpu":"0","mode":"system"}`: 1, + `{"type":"used"}`: 1, + `{"type":"free"}`: 1, + }, + }, + { + name: "Request duration histogram", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_bucket", "le": "0.1", "handler": "/home"}, + Value: 200, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_bucket", "le": "0.5", "handler": "/home"}, + Value: 400, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_bucket", "le": "+Inf", "handler": "/home"}, + Value: 500, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_sum", "handler": "/home"}, + Value: 120.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "http_request_duration_seconds_count", "handler": "/home"}, + Value: 500, + }, + }, + expected: map[string]int{ + `{"handler":"/home"}`: 3, + }, + }, + { + name: "Mix of counter, gauge, and histogram", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "http_requests_total", "method": "GET", "status": "200"}, + Value: 100, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "cpu_usage", "core": "0"}, + Value: 45.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_bucket", "le": "0.1"}, + Value: 30, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_bucket", "le": "0.5"}, + Value: 50, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_sum"}, + Value: 75.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "request_duration_seconds_count"}, + Value: 60, + }, + }, + expected: map[string]int{ + `{"method":"GET","status":"200"}`: 1, + `{"core":"0"}`: 1, + `{}`: 3, + }, + }, + { + name: "Duplicate labels and distinct labels", + samples: model.Samples{ + &model.Sample{ + Metric: model.Metric{"__name__": "api_calls", "endpoint": "/users", "method": "GET"}, + Value: 50, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "api_calls", "endpoint": "/users", "method": "POST"}, + Value: 30, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "api_calls", "endpoint": "/products", "method": "GET"}, + Value: 40, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "system_load", "host": "server1"}, + Value: 1.5, + }, + &model.Sample{ + Metric: model.Metric{"__name__": "system_load", "host": "server2"}, + Value: 2.0, + }, + }, + expected: map[string]int{ + `{"endpoint":"/users","method":"GET"}`: 1, + `{"endpoint":"/users","method":"POST"}`: 1, + `{"endpoint":"/products","method":"GET"}`: 1, + `{"host":"server1"}`: 1, + `{"host":"server2"}`: 1, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + generator := remoteWriteTypedGenerator{ + metricsCount: true, + counterCache: xcollector.NewCounterCache(time.Minute), + } + + events := generator.GenerateEvents(tt.samples) + + for _, event := range events { + count, ok := event.RootFields["metrics_count"] + assert.True(t, ok, "metrics_count should be present") + + labels, ok := event.ModuleFields["labels"].(mapstr.M) + if !ok { + labels = mapstr.M{} // If no labels, create an empty map so that we can handle metrics with no labels + } + + labelsHash := labels.String() + + expectedCount, ok := tt.expected[labelsHash] + assert.True(t, ok, "should have an expected count for these labels") + assert.Equal(t, expectedCount, count, "metrics_count should match expected value for labels %v", labels) + } + }) + } +} diff --git a/x-pack/metricbeat/modules.d/prometheus.yml.disabled b/x-pack/metricbeat/modules.d/prometheus.yml.disabled index 11cc449ba47..0783b3a8cb0 100644 --- a/x-pack/metricbeat/modules.d/prometheus.yml.disabled +++ b/x-pack/metricbeat/modules.d/prometheus.yml.disabled @@ -16,6 +16,9 @@ #ssl.certificate_authorities: # - /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Use Elasticsearch histogram type to store histograms (beta, default: false) # This will change the default layout and put metric type in the field name #use_types: true @@ -33,6 +36,9 @@ #ssl.certificate: "/etc/pki/server/cert.pem" #ssl.key: "/etc/pki/server/cert.key" + # Count number of metrics present in Elasticsearch document (default: false) + #metrics_count: false + # Use Elasticsearch histogram type to store histograms (beta, default: false) # This will change the default layout and put metric type in the field name #use_types: true