Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubernetes jobs Metrics Scrape #119

Merged
merged 1 commit into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,12 @@ manifests-common: $(KUSTOMIZE)
@$(KUSTOMIZE) build --enable-helm kubernetes/common/gateway > kubernetes/common/gateway/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana > kubernetes/common/grafana/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/kube-prometheus-stack > kubernetes/common/kube-prometheus-stack/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/kube-state-metrics > kubernetes/common/kube-state-metrics/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/memcached > kubernetes/common/memcached/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-operator > kubernetes/common/minio-operator/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant > kubernetes/common/minio-tenant/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/mysql > kubernetes/common/mysql/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/prometheus-node-exporter > kubernetes/common/prometheus-node-exporter/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/prometheus-operator-crds > kubernetes/common/prometheus-operator-crds/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/rancher-pushprox > kubernetes/common/rancher-pushprox/manifests/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/common/redis > kubernetes/common/redis/manifests/k8s-all-in-one.yaml
Expand Down
6 changes: 3 additions & 3 deletions alloy-modules/kubernetes/integrations/mysql.alloy
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare "mysql_metrics_scrape" {
}

argument "namespace" {
comment = "kubernetes secret name (default: monitoring-system)"
comment = "kubernetes secret namespace (default: monitoring-system)"
optional = true
}

Expand All @@ -27,8 +27,8 @@ declare "mysql_metrics_scrape" {
}

argument "keep_metrics" {
comment = "A regex of metrics to keep (default: see below)"
optional = true
default = "(up|instance:mysql_heartbeat_lag_seconds|instance:mysql_slave_lag_seconds|mysql_global_status_aborted_clients|mysql_global_status_aborted_connects|mysql_global_status_buffer_pool_pages|mysql_global_status_bytes_received|mysql_global_status_bytes_sent|mysql_global_status_commands_total|mysql_global_status_created_tmp_disk_tables|mysql_global_status_created_tmp_files|mysql_global_status_created_tmp_tables|mysql_global_status_handlers_total|mysql_global_status_innodb_log_waits|mysql_global_status_innodb_mem_adaptive_hash|mysql_global_status_innodb_mem_dictionary|mysql_global_status_innodb_num_open_files|mysql_global_status_innodb_page_size|mysql_global_status_max_used_connections|mysql_global_status_open_files|mysql_global_status_open_table_definitions|mysql_global_status_open_tables|mysql_global_status_opened_files|mysql_global_status_opened_table_definitions|mysql_global_status_opened_tables|mysql_global_status_qcache_free_memory|mysql_global_status_qcache_hits|mysql_global_status_qcache_inserts|mysql_global_status_qcache_lowmem_prunes|mysql_global_status_qcache_not_cached|mysql_global_status_qcache_queries_in_cache|mysql_global_status_queries|mysql_global_status_questions|mysql_global_status_select_full_join|mysql_global_status_select_full_range_join|mysql_global_status_select_range|mysql_global_status_select_range_check|mysql_global_status_select_scan|mysql_global_status_slow_queries|mysql_global_status_sort_merge_passes|mysql_global_status_sort_range|mysql_global_status_sort_rows|mysql_global_status_sort_scan|mysql_global_status_table_locks_immediate|mysql_global_status_table_locks_waited|mysql_global_status_table_open_cache_hits|mysql_global_status_table_open_cache_misses|mysql_global_status_table_open_cache_overflows|mysql_global_status_threads_cached|mysql_global_status_threads_connected|mysql_global_status_threads_created|mysql_global_status_threads_running|mysql_global_status_uptime|mysql_global_status_wsrep_local_recv_queue|mysql_global_status_wsrep_local_state|mysql_global_status_wsrep_ready|mysql_global_variables_innodb_additional_mem_pool_size|mysql_global_variables_innodb_buffer_pool_size|mysql_global_variables_innodb_log_buffer_size|mysql_global_variables_key_buffer_size|mysql_global_variables_max_connections|mysql_global_variables_open_files_limit|mysql_global_variables_query_cache_size|mysql_global_variables_table_definition_cache|mysql_global_variables_table_open_cache|mysql_global_variables_thread_cache_size|mysql_global_variables_tokudb_cache_size|mysql_global_variables_wsrep_desync|mysql_heartbeat_now_timestamp_seconds|mysql_heartbeat_stored_timestamp_seconds|mysql_info_schema_processlist_threads|mysql_slave_status_seconds_behind_master|mysql_slave_status_slave_io_running|mysql_slave_status_slave_sql_running|mysql_slave_status_sql_delay|mysql_up)"
}

argument "scrape_interval" {
Expand Down Expand Up @@ -100,7 +100,7 @@ declare "mysql_metrics_scrape" {
// keep only metrics that match the keep_metrics regex
rule {
source_labels = ["__name__"]
regex = argument.keep_metrics.value
regex = coalesce(argument.keep_metrics.value, "(up|instance:mysql_heartbeat_lag_seconds|instance:mysql_slave_lag_seconds|mysql_global_status_aborted_clients|mysql_global_status_aborted_connects|mysql_global_status_buffer_pool_pages|mysql_global_status_bytes_received|mysql_global_status_bytes_sent|mysql_global_status_commands_total|mysql_global_status_created_tmp_disk_tables|mysql_global_status_created_tmp_files|mysql_global_status_created_tmp_tables|mysql_global_status_handlers_total|mysql_global_status_innodb_log_waits|mysql_global_status_innodb_mem_adaptive_hash|mysql_global_status_innodb_mem_dictionary|mysql_global_status_innodb_num_open_files|mysql_global_status_innodb_page_size|mysql_global_status_max_used_connections|mysql_global_status_open_files|mysql_global_status_open_table_definitions|mysql_global_status_open_tables|mysql_global_status_opened_files|mysql_global_status_opened_table_definitions|mysql_global_status_opened_tables|mysql_global_status_qcache_free_memory|mysql_global_status_qcache_hits|mysql_global_status_qcache_inserts|mysql_global_status_qcache_lowmem_prunes|mysql_global_status_qcache_not_cached|mysql_global_status_qcache_queries_in_cache|mysql_global_status_queries|mysql_global_status_questions|mysql_global_status_select_full_join|mysql_global_status_select_full_range_join|mysql_global_status_select_range|mysql_global_status_select_range_check|mysql_global_status_select_scan|mysql_global_status_slow_queries|mysql_global_status_sort_merge_passes|mysql_global_status_sort_range|mysql_global_status_sort_rows|mysql_global_status_sort_scan|mysql_global_status_table_locks_immediate|mysql_global_status_table_locks_waited|mysql_global_status_table_open_cache_hits|mysql_global_status_table_open_cache_misses|mysql_global_status_table_open_cache_overflows|mysql_global_status_threads_cached|mysql_global_status_threads_connected|mysql_global_status_threads_created|mysql_global_status_threads_running|mysql_global_status_uptime|mysql_global_status_wsrep_local_recv_queue|mysql_global_status_wsrep_local_state|mysql_global_status_wsrep_ready|mysql_global_variables_innodb_additional_mem_pool_size|mysql_global_variables_innodb_buffer_pool_size|mysql_global_variables_innodb_log_buffer_size|mysql_global_variables_key_buffer_size|mysql_global_variables_max_connections|mysql_global_variables_open_files_limit|mysql_global_variables_query_cache_size|mysql_global_variables_table_definition_cache|mysql_global_variables_table_open_cache|mysql_global_variables_thread_cache_size|mysql_global_variables_tokudb_cache_size|mysql_global_variables_wsrep_desync|mysql_heartbeat_now_timestamp_seconds|mysql_heartbeat_stored_timestamp_seconds|mysql_info_schema_processlist_threads|mysql_slave_status_seconds_behind_master|mysql_slave_status_slave_io_running|mysql_slave_status_slave_sql_running|mysql_slave_status_sql_delay|mysql_up)")
action = "keep"
}
}
Expand Down
17 changes: 17 additions & 0 deletions alloy-modules/kubernetes/jobs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Module Components

Kubernetes Jobs Module Components

## Components

- [`apiserver`](#apiserver_metrics_scrape)

### `apiserver_metrics_scrape`

kubernetes Apiserver Metrics Scrape

***Arguments***

***Exports***

***Example***
206 changes: 206 additions & 0 deletions alloy-modules/kubernetes/jobs/apiserver.alloy
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
Module Components: apiserver
Description: kubernetes Apiserver Metrics Scrape

*/

declare "apiserver_metrics_scrape" {

/********************************************
* ARGUMENTS
********************************************/
argument "forward_to" {
comment = "Must be a list(MetricsReceiver) where collected metrics should be forwarded to"
}

argument "cluster" { }

argument "namespaces" {
comment = "The namespaces to look for targets in (default: default)"
optional = true
}

argument "field_selectors" {
// Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
comment = "The label selectors to use to find matching targets (default: [\"metadata.name=kubernetes\"])"
optional = true
}

argument "label_selectors" {
// Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
comment = "The label selectors to use to find matching targets (default: [])"
optional = true
}

argument "port_name" {
comment = "The value of the label for the selector (default: https)"
optional = true
}

argument "job_label" {
comment = "The job label to add for all kube-apiserver metrics (default: integrations/kubernetes/apiserver)"
optional = true
}

argument "keep_metrics" {
comment = "A regex of metrics to keep (default: see below)"
optional = true
}

// drop metrics and les from kube-prometheus
// https://github.com/prometheus-operator/kube-prometheus/blob/main/manifests/kubernetesControlPlane-serviceMonitorApiserver.yaml
argument "drop_metrics" {
comment = "A regular expression of metrics to drop (default: see below)"
optional = true
}

argument "drop_les" {
comment = "Regular expression of metric les label values to drop (default: see below)"
optional = true
}

argument "scrape_interval" {
comment = "How often to scrape metrics from the targets (default: 60s)"
optional = true
}

argument "scrape_timeout" {
comment = "How long before a scrape times out (default: 10s)"
optional = true
}

argument "max_cache_size" {
comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate."
optional = true
}

/*****************************************************************
* Targets From Docker Discovery
*****************************************************************/
discovery.kubernetes "apiserver" {
role = "service"

selectors {
role = "service"
field = join(coalesce(argument.field_selectors.value, ["metadata.name=kubernetes"]), ",")
label = join(coalesce(argument.label_selectors.value, []), ",")
}

namespaces {
names = coalesce(argument.namespaces.value, ["default"])
}
}

/*****************************************************************
* Discovery Relabelings (pre-scrape)
*****************************************************************/
discovery.relabel "apiserver" {
targets = discovery.kubernetes.apiserver.targets

// only keep targets with a matching port name
rule {
source_labels = ["__meta_kubernetes_service_port_name"]
regex = coalesce(argument.port_name.value, "https")
action = "keep"
}

// set the namespace
rule {
action = "replace"
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}

// set the service_name
rule {
action = "replace"
source_labels = ["__meta_kubernetes_service_name"]
target_label = "service"
}

// set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:"
rule {
action = "replace"
source_labels = [
"__meta_kubernetes_service_label_app_kubernetes_io_name",
"__meta_kubernetes_service_label_k8s_app",
"__meta_kubernetes_service_label_app",
]
separator = ";"
regex = "^(?:;*)?([^;]+).*$"
replacement = "$1"
target_label = "app"
}

// set the cluster label
rule {
action = "replace"
replacement = argument.cluster.value
target_label = "cluster"
}

// set a source label
rule {
action = "replace"
replacement = "kubernetes"
target_label = "source"
}
}

/*****************************************************************
* Prometheus Scrape Labels Targets
*****************************************************************/
prometheus.scrape "apiserver" {
targets = discovery.relabel.apiserver.output

job_name = coalesce(argument.job_label.value, "integrations/kubernetes/apiserver")
scheme = "https"
scrape_interval = coalesce(argument.scrape_interval.value, "60s")
scrape_timeout = coalesce(argument.scrape_timeout.value, "10s")
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"

tls_config {
ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
insecure_skip_verify = false
server_name = "kubernetes"
}

clustering {
enabled = true
}

forward_to = [prometheus.relabel.apiserver.receiver]
}

/********************************************
* Prometheus Metric Relabelings (post-scrape)
********************************************/
prometheus.relabel "apiserver" {
forward_to = argument.forward_to.value
max_cache_size = coalesce(argument.max_cache_size.value, 100000)

// drop metrics that match the drop_metrics regex
rule {
source_labels = ["__name__"]
regex = coalesce(argument.drop_metrics.value, "(((go|process)_.+)|kubelet_node_name|kubelet_(pod_(worker|start)_latency_microseconds|cgroup_manager_latency_microseconds|pleg_relist_(latency|interval)_microseconds|runtime_operations(_latency_microseconds|_errors)?|eviction_stats_age_microseconds|device_plugin_(registration_count|alloc_latency_microseconds)|network_plugin_operations_latency_microseconds)|scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_(predicate|priority|preemption)_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)|apiserver_(request_(count|latencies(_summary)?)|dropped_requests|storage_(data_key_generation|transformation_(failures_total|latencies_microseconds))|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers)|kubelet_docker_(operations(_latency_microseconds|_errors|_timeout)?)|reflector_(items_per_(list|watch)|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)|etcd_(helper_(cache_(hit|miss)_count|cache_entry_count|object_counts)|request_(cache_(get|add)_latencies_summary|latencies_summary)|debugging.*|disk.*|server.*)|transformation_(latencies_microseconds|failures_total)|(admission_quota_controller|APIServiceOpenAPIAggregationControllerQueue1|APIServiceRegistrationController|autoregister|AvailableConditionController|crd_(autoregistration_controller|Establishing|finalizer|naming_condition_controller|openapi_controller)|DiscoveryController|non_structural_schema_condition_controller|kubeproxy_sync_proxy_rules|rest_client_request_latency|storage_operation_(errors_total|status_count))(_.*)|apiserver_admission_(controller_admission|step_admission)_latencies_seconds_.*)")
action = "drop"
}

// drop metrics whose name and le label match the drop_les regex
rule {
source_labels = [
"__name__",
"le",
]
regex = coalesce(argument.drop_les.value, "apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)")
action = "drop"
}

// keep only metrics that match the keep_metrics regex
rule {
source_labels = ["__name__"]
regex = coalesce(argument.keep_metrics.value, "(.+)")
action = "keep"
}
}
}
Loading
Loading