-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #119 from qclaogui:k8s-jobs
kubernetes jobs Metrics Scrape
- Loading branch information
Showing
97 changed files
with
106,425 additions
and
51,515 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Module Components | ||
|
||
Kubernetes Jobs Module Components | ||
|
||
## Components | ||
|
||
- [`apiserver`](#apiserver_metrics_scrape) | ||
|
||
### `apiserver_metrics_scrape` | ||
|
||
kubernetes Apiserver Metrics Scrape | ||
|
||
***Arguments*** | ||
|
||
***Exports*** | ||
|
||
***Example*** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* | ||
Module Components: apiserver | ||
Description: kubernetes Apiserver Metrics Scrape | ||
|
||
*/ | ||
|
||
declare "apiserver_metrics_scrape" { | ||
|
||
/******************************************** | ||
* ARGUMENTS | ||
********************************************/ | ||
argument "forward_to" { | ||
comment = "Must be a list(MetricsReceiver) where collected metrics should be forwarded to" | ||
} | ||
|
||
argument "cluster" { } | ||
|
||
argument "namespaces" { | ||
comment = "The namespaces to look for targets in (default: default)" | ||
optional = true | ||
} | ||
|
||
argument "field_selectors" { | ||
// Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ | ||
comment = "The label selectors to use to find matching targets (default: [\"metadata.name=kubernetes\"])" | ||
optional = true | ||
} | ||
|
||
argument "label_selectors" { | ||
// Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ | ||
comment = "The label selectors to use to find matching targets (default: [])" | ||
optional = true | ||
} | ||
|
||
argument "port_name" { | ||
comment = "The value of the label for the selector (default: https)" | ||
optional = true | ||
} | ||
|
||
argument "job_label" { | ||
comment = "The job label to add for all kube-apiserver metrics (default: integrations/kubernetes/apiserver)" | ||
optional = true | ||
} | ||
|
||
argument "keep_metrics" { | ||
comment = "A regex of metrics to keep (default: see below)" | ||
optional = true | ||
} | ||
|
||
// drop metrics and les from kube-prometheus | ||
// https://github.com/prometheus-operator/kube-prometheus/blob/main/manifests/kubernetesControlPlane-serviceMonitorApiserver.yaml | ||
argument "drop_metrics" { | ||
comment = "A regular expression of metrics to drop (default: see below)" | ||
optional = true | ||
} | ||
|
||
argument "drop_les" { | ||
comment = "Regular expression of metric les label values to drop (default: see below)" | ||
optional = true | ||
} | ||
|
||
argument "scrape_interval" { | ||
comment = "How often to scrape metrics from the targets (default: 60s)" | ||
optional = true | ||
} | ||
|
||
argument "scrape_timeout" { | ||
comment = "How long before a scrape times out (default: 10s)" | ||
optional = true | ||
} | ||
|
||
argument "max_cache_size" { | ||
comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." | ||
optional = true | ||
} | ||
|
||
/***************************************************************** | ||
* Targets From Docker Discovery | ||
*****************************************************************/ | ||
discovery.kubernetes "apiserver" { | ||
role = "service" | ||
|
||
selectors { | ||
role = "service" | ||
field = join(coalesce(argument.field_selectors.value, ["metadata.name=kubernetes"]), ",") | ||
label = join(coalesce(argument.label_selectors.value, []), ",") | ||
} | ||
|
||
namespaces { | ||
names = coalesce(argument.namespaces.value, ["default"]) | ||
} | ||
} | ||
|
||
/***************************************************************** | ||
* Discovery Relabelings (pre-scrape) | ||
*****************************************************************/ | ||
discovery.relabel "apiserver" { | ||
targets = discovery.kubernetes.apiserver.targets | ||
|
||
// only keep targets with a matching port name | ||
rule { | ||
source_labels = ["__meta_kubernetes_service_port_name"] | ||
regex = coalesce(argument.port_name.value, "https") | ||
action = "keep" | ||
} | ||
|
||
// set the namespace | ||
rule { | ||
action = "replace" | ||
source_labels = ["__meta_kubernetes_namespace"] | ||
target_label = "namespace" | ||
} | ||
|
||
// set the service_name | ||
rule { | ||
action = "replace" | ||
source_labels = ["__meta_kubernetes_service_name"] | ||
target_label = "service" | ||
} | ||
|
||
// set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" | ||
rule { | ||
action = "replace" | ||
source_labels = [ | ||
"__meta_kubernetes_service_label_app_kubernetes_io_name", | ||
"__meta_kubernetes_service_label_k8s_app", | ||
"__meta_kubernetes_service_label_app", | ||
] | ||
separator = ";" | ||
regex = "^(?:;*)?([^;]+).*$" | ||
replacement = "$1" | ||
target_label = "app" | ||
} | ||
|
||
// set the cluster label | ||
rule { | ||
action = "replace" | ||
replacement = argument.cluster.value | ||
target_label = "cluster" | ||
} | ||
|
||
// set a source label | ||
rule { | ||
action = "replace" | ||
replacement = "kubernetes" | ||
target_label = "source" | ||
} | ||
} | ||
|
||
/***************************************************************** | ||
* Prometheus Scrape Labels Targets | ||
*****************************************************************/ | ||
prometheus.scrape "apiserver" { | ||
targets = discovery.relabel.apiserver.output | ||
|
||
job_name = coalesce(argument.job_label.value, "integrations/kubernetes/apiserver") | ||
scheme = "https" | ||
scrape_interval = coalesce(argument.scrape_interval.value, "60s") | ||
scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") | ||
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" | ||
|
||
tls_config { | ||
ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" | ||
insecure_skip_verify = false | ||
server_name = "kubernetes" | ||
} | ||
|
||
clustering { | ||
enabled = true | ||
} | ||
|
||
forward_to = [prometheus.relabel.apiserver.receiver] | ||
} | ||
|
||
/******************************************** | ||
* Prometheus Metric Relabelings (post-scrape) | ||
********************************************/ | ||
prometheus.relabel "apiserver" { | ||
forward_to = argument.forward_to.value | ||
max_cache_size = coalesce(argument.max_cache_size.value, 100000) | ||
|
||
// drop metrics that match the drop_metrics regex | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = coalesce(argument.drop_metrics.value, "(((go|process)_.+)|kubelet_node_name|kubelet_(pod_(worker|start)_latency_microseconds|cgroup_manager_latency_microseconds|pleg_relist_(latency|interval)_microseconds|runtime_operations(_latency_microseconds|_errors)?|eviction_stats_age_microseconds|device_plugin_(registration_count|alloc_latency_microseconds)|network_plugin_operations_latency_microseconds)|scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_(predicate|priority|preemption)_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)|apiserver_(request_(count|latencies(_summary)?)|dropped_requests|storage_(data_key_generation|transformation_(failures_total|latencies_microseconds))|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers)|kubelet_docker_(operations(_latency_microseconds|_errors|_timeout)?)|reflector_(items_per_(list|watch)|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)|etcd_(helper_(cache_(hit|miss)_count|cache_entry_count|object_counts)|request_(cache_(get|add)_latencies_summary|latencies_summary)|debugging.*|disk.*|server.*)|transformation_(latencies_microseconds|failures_total)|(admission_quota_controller|APIServiceOpenAPIAggregationControllerQueue1|APIServiceRegistrationController|autoregister|AvailableConditionController|crd_(autoregistration_controller|Establishing|finalizer|naming_condition_controller|openapi_controller)|DiscoveryController|non_structural_schema_condition_controller|kubeproxy_sync_proxy_rules|rest_client_request_latency|storage_operation_(errors_total|status_count))(_.*)|apiserver_admission_(controller_admission|step_admission)_latencies_seconds_.*)") | ||
action = "drop" | ||
} | ||
|
||
// drop metrics whose name and le label match the drop_les regex | ||
rule { | ||
source_labels = [ | ||
"__name__", | ||
"le", | ||
] | ||
regex = coalesce(argument.drop_les.value, "apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)") | ||
action = "drop" | ||
} | ||
|
||
// keep only metrics that match the keep_metrics regex | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = coalesce(argument.keep_metrics.value, "(.+)") | ||
action = "keep" | ||
} | ||
} | ||
} |
Oops, something went wrong.