From 8b5ccc459a90dbf6ac892fc925f1771d083db9f9 Mon Sep 17 00:00:00 2001 From: Hisar Balik Date: Sat, 7 Dec 2024 18:24:31 +0100 Subject: [PATCH 1/3] fix prometheus discovery job, optimize discovery for resources on same node as agent running --- .../config/metric/agent/config.go | 8 ++- .../metric/agent/prometheus_receiver.go | 61 ++++++++++++++----- .../agent/testdata/config_istio_enabled.yaml | 12 ++++ .../testdata/config_istio_not_enabled.yaml | 6 ++ 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/internal/otelcollector/config/metric/agent/config.go b/internal/otelcollector/config/metric/agent/config.go index 9718c5415..a369491fc 100644 --- a/internal/otelcollector/config/metric/agent/config.go +++ b/internal/otelcollector/config/metric/agent/config.go @@ -187,11 +187,17 @@ type StaticDiscoveryConfig struct { } type KubernetesDiscoveryConfig struct { - Role Role `yaml:"role"` + Role Role `yaml:"role"` + Selectors []K8SDiscoverySelector `yaml:"selectors,omitempty"` } type Role string +type K8SDiscoverySelector struct { + Role Role `yaml:"role"` + Field string `yaml:"field"` +} + const ( RoleEndpoints Role = "endpoints" RolePod Role = "pod" diff --git a/internal/otelcollector/config/metric/agent/prometheus_receiver.go b/internal/otelcollector/config/metric/agent/prometheus_receiver.go index 63edb3c20..1212d1a93 100644 --- a/internal/otelcollector/config/metric/agent/prometheus_receiver.go +++ b/internal/otelcollector/config/metric/agent/prometheus_receiver.go @@ -18,8 +18,9 @@ const ( type AnnotatedResource string const ( - AnnotatedPod AnnotatedResource = "pod" - AnnotatedService AnnotatedResource = "service" + AnnotatedPod AnnotatedResource = "pod" + AnnotatedService AnnotatedResource = "service" + PodNodeSelectorFieldExpression string = "spec.nodeName=${MY_NODE_NAME}" ) const ( @@ -35,11 +36,21 @@ func makePrometheusConfigForPods() *PrometheusReceiver { var config PrometheusReceiver scrapeConfig := ScrapeConfig{ - ScrapeInterval: scrapeInterval, - SampleLimit: sampleLimit, - KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{{Role: RolePod}}, - JobName: appPodsJobName, - RelabelConfigs: makePrometheusPodsRelabelConfigs(), + ScrapeInterval: scrapeInterval, + SampleLimit: sampleLimit, + KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{ + { + Role: RolePod, + Selectors: []K8SDiscoverySelector{ + { + Role: RolePod, + Field: PodNodeSelectorFieldExpression, + }, + }, + }, + }, + JobName: appPodsJobName, + RelabelConfigs: makePrometheusPodsRelabelConfigs(), } config.Config.ScrapeConfigs = append(config.Config.ScrapeConfigs, scrapeConfig) @@ -55,9 +66,19 @@ func makePrometheusConfigForServices(opts BuildOptions) *PrometheusReceiver { var config PrometheusReceiver baseScrapeConfig := ScrapeConfig{ - ScrapeInterval: scrapeInterval, - SampleLimit: sampleLimit, - KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{{Role: RoleEndpoints}}, + ScrapeInterval: scrapeInterval, + SampleLimit: sampleLimit, + KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{ + { + Role: RoleEndpoints, + Selectors: []K8SDiscoverySelector{ + { + Role: RolePod, + Field: PodNodeSelectorFieldExpression, + }, + }, + }, + }, } httpScrapeConfig := baseScrapeConfig @@ -142,11 +163,21 @@ func makePrometheusIstioConfig() *PrometheusReceiver { Config: PrometheusConfig{ ScrapeConfigs: []ScrapeConfig{ { - JobName: "istio-proxy", - SampleLimit: sampleLimit, - MetricsPath: "/stats/prometheus", - ScrapeInterval: scrapeInterval, - KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{{Role: RolePod}}, + JobName: "istio-proxy", + SampleLimit: sampleLimit, + MetricsPath: "/stats/prometheus", + ScrapeInterval: scrapeInterval, + KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{ + { + Role: RolePod, + Selectors: []K8SDiscoverySelector{ + { + Role: RolePod, + Field: PodNodeSelectorFieldExpression, + }, + }, + }, + }, RelabelConfigs: []RelabelConfig{ keepIfRunningOnSameNode(NodeAffiliatedPod), keepIfIstioProxy(), diff --git a/internal/otelcollector/config/metric/agent/testdata/config_istio_enabled.yaml b/internal/otelcollector/config/metric/agent/testdata/config_istio_enabled.yaml index f5795386b..b7e2f30c9 100644 --- a/internal/otelcollector/config/metric/agent/testdata/config_istio_enabled.yaml +++ b/internal/otelcollector/config/metric/agent/testdata/config_istio_enabled.yaml @@ -175,6 +175,9 @@ receivers: action: replace kubernetes_sd_configs: - role: pod + selectors: + - role: pod + field: spec.nodeName=${MY_NODE_NAME} prometheus/app-services: config: scrape_configs: @@ -223,6 +226,9 @@ receivers: action: replace kubernetes_sd_configs: - role: endpoints + selectors: + - role: pod + field: spec.nodeName=${MY_NODE_NAME} - job_name: app-services-secure sample_limit: 50000 scrape_interval: 30s @@ -268,6 +274,9 @@ receivers: action: replace kubernetes_sd_configs: - role: endpoints + selectors: + - role: pod + field: spec.nodeName=${MY_NODE_NAME} tls_config: ca_file: /etc/istio-output-certs/root-cert.pem cert_file: /etc/istio-output-certs/cert-chain.pem @@ -299,6 +308,9 @@ receivers: action: keep kubernetes_sd_configs: - role: pod + selectors: + - role: pod + field: spec.nodeName=${MY_NODE_NAME} processors: batch: send_batch_size: 1024 diff --git a/internal/otelcollector/config/metric/agent/testdata/config_istio_not_enabled.yaml b/internal/otelcollector/config/metric/agent/testdata/config_istio_not_enabled.yaml index d79f32157..46c5d73c3 100644 --- a/internal/otelcollector/config/metric/agent/testdata/config_istio_not_enabled.yaml +++ b/internal/otelcollector/config/metric/agent/testdata/config_istio_not_enabled.yaml @@ -164,6 +164,9 @@ receivers: action: replace kubernetes_sd_configs: - role: pod + selectors: + - role: pod + field: spec.nodeName=${MY_NODE_NAME} prometheus/app-services: config: scrape_configs: @@ -212,6 +215,9 @@ receivers: action: replace kubernetes_sd_configs: - role: endpoints + selectors: + - role: pod + field: spec.nodeName=${MY_NODE_NAME} processors: batch: send_batch_size: 1024 From 60d3751722fc2f7855ba60a9d794ec689762a505 Mon Sep 17 00:00:00 2001 From: Hisar Balik Date: Sat, 7 Dec 2024 18:29:28 +0100 Subject: [PATCH 2/3] fix pod affinity for metric agent load test setup --- hack/load-tests/metric-agent-test-setup.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/load-tests/metric-agent-test-setup.yaml b/hack/load-tests/metric-agent-test-setup.yaml index 4feb43db8..639a6a27f 100644 --- a/hack/load-tests/metric-agent-test-setup.yaml +++ b/hack/load-tests/metric-agent-test-setup.yaml @@ -50,7 +50,7 @@ spec: - podAffinityTerm: labelSelector: matchExpressions: - - key: app + - key: app.kubernetes.io/name operator: In values: - metric-agent-load-generator From 3da7eaf4b68976613001bbc6146e14292238a0df Mon Sep 17 00:00:00 2001 From: Hisar Balik Date: Tue, 10 Dec 2024 08:21:59 +0100 Subject: [PATCH 3/3] move discovery config creation to dedicated function --- .../metric/agent/prometheus_receiver.go | 70 +++++++------------ 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/internal/otelcollector/config/metric/agent/prometheus_receiver.go b/internal/otelcollector/config/metric/agent/prometheus_receiver.go index 1212d1a93..a7ef14efe 100644 --- a/internal/otelcollector/config/metric/agent/prometheus_receiver.go +++ b/internal/otelcollector/config/metric/agent/prometheus_receiver.go @@ -36,21 +36,11 @@ func makePrometheusConfigForPods() *PrometheusReceiver { var config PrometheusReceiver scrapeConfig := ScrapeConfig{ - ScrapeInterval: scrapeInterval, - SampleLimit: sampleLimit, - KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{ - { - Role: RolePod, - Selectors: []K8SDiscoverySelector{ - { - Role: RolePod, - Field: PodNodeSelectorFieldExpression, - }, - }, - }, - }, - JobName: appPodsJobName, - RelabelConfigs: makePrometheusPodsRelabelConfigs(), + ScrapeInterval: scrapeInterval, + SampleLimit: sampleLimit, + KubernetesDiscoveryConfigs: makeDiscoveryConfigWithNodeSelector(RolePod), + JobName: appPodsJobName, + RelabelConfigs: makePrometheusPodsRelabelConfigs(), } config.Config.ScrapeConfigs = append(config.Config.ScrapeConfigs, scrapeConfig) @@ -66,19 +56,9 @@ func makePrometheusConfigForServices(opts BuildOptions) *PrometheusReceiver { var config PrometheusReceiver baseScrapeConfig := ScrapeConfig{ - ScrapeInterval: scrapeInterval, - SampleLimit: sampleLimit, - KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{ - { - Role: RoleEndpoints, - Selectors: []K8SDiscoverySelector{ - { - Role: RolePod, - Field: PodNodeSelectorFieldExpression, - }, - }, - }, - }, + ScrapeInterval: scrapeInterval, + SampleLimit: sampleLimit, + KubernetesDiscoveryConfigs: makeDiscoveryConfigWithNodeSelector(RoleEndpoints), } httpScrapeConfig := baseScrapeConfig @@ -163,21 +143,11 @@ func makePrometheusIstioConfig() *PrometheusReceiver { Config: PrometheusConfig{ ScrapeConfigs: []ScrapeConfig{ { - JobName: "istio-proxy", - SampleLimit: sampleLimit, - MetricsPath: "/stats/prometheus", - ScrapeInterval: scrapeInterval, - KubernetesDiscoveryConfigs: []KubernetesDiscoveryConfig{ - { - Role: RolePod, - Selectors: []K8SDiscoverySelector{ - { - Role: RolePod, - Field: PodNodeSelectorFieldExpression, - }, - }, - }, - }, + JobName: "istio-proxy", + SampleLimit: sampleLimit, + MetricsPath: "/stats/prometheus", + ScrapeInterval: scrapeInterval, + KubernetesDiscoveryConfigs: makeDiscoveryConfigWithNodeSelector(RolePod), RelabelConfigs: []RelabelConfig{ keepIfRunningOnSameNode(NodeAffiliatedPod), keepIfIstioProxy(), @@ -322,3 +292,17 @@ func dropIfSchemeHTTPS() RelabelConfig { Regex: "(https)", } } + +func makeDiscoveryConfigWithNodeSelector(role Role) []KubernetesDiscoveryConfig { + return []KubernetesDiscoveryConfig{ + { + Role: role, + Selectors: []K8SDiscoverySelector{ + { + Role: RolePod, + Field: PodNodeSelectorFieldExpression, + }, + }, + }, + } +}