Skip to content

Commit

Permalink
Add an option to enable KSM experimental metrics and add some new met…
Browse files Browse the repository at this point in the history
…rics from KSM 1.9
  • Loading branch information
vboulineau committed Jan 13, 2020
1 parent ade080d commit 41303a6
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import Counter, defaultdict
from copy import deepcopy

from six import iteritems
from six import iteritems, iterkeys

from datadog_checks.checks.openmetrics import OpenMetricsBaseCheck
from datadog_checks.config import is_affirmative
Expand Down Expand Up @@ -228,6 +228,7 @@ def _create_kubernetes_state_prometheus_instance(self, instance):
'kube_pod_container_resource_limits_nvidia_gpu_devices': 'container.gpu.limit',
'kube_pod_status_ready': 'pod.ready',
'kube_pod_status_scheduled': 'pod.scheduled',
'kube_pod_status_unschedulable': 'pod.unschedulable',
'kube_poddisruptionbudget_status_current_healthy': 'pdb.pods_healthy',
'kube_poddisruptionbudget_status_desired_healthy': 'pdb.pods_desired',
'kube_poddisruptionbudget_status_pod_disruptions_allowed': 'pdb.disruptions_allowed',
Expand Down Expand Up @@ -342,6 +343,23 @@ def _create_kubernetes_state_prometheus_instance(self, instance):
}
)

experimental_metrics_mapping = {
'kube_hpa_spec_target_metric': 'hpa.spec_target_metric',
'kube_hpa_status_current_metrics_average_utilization': 'hpa.current_metrics_average_utilization',
'kube_hpa_status_current_metrics_average_value': 'hpa.current_metrics_average_value',
'kube_verticalpodautoscaler_spec_resourcepolicy_container_policies_minallowed': (
'vpa.spec_container_minallowed'
),
'kube_verticalpodautoscaler_spec_resourcepolicy_container_policies_maxallowed': (
'vpa.spec_container_maxallowed'
)
}
experimental_metrics = is_affirmative(ksm_instance.get('experimental_metrics', False))
if experimental_metrics:
ksm_instance['metrics'].append(experimental_metrics_mapping)
else:
ksm_instance['ignore_metrics'].append(iterkeys(experimental_metrics_mapping))

ksm_instance['prometheus_url'] = endpoint
ksm_instance['label_joins'].update(extra_labels)
if hostname_override:
Expand Down
1 change: 1 addition & 0 deletions kubernetes_state/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ kubernetes_state.pdb.pods_healthy,gauge,,,,Current number of healthy pods,1,kube
kubernetes_state.pdb.pods_total,gauge,,,,Total number of pods counted by this disruption budget,0,kubernetes,k8s_state.pdb.pods_total
kubernetes_state.pod.ready,gauge,,,,"In association with the `condition` tag, whether the pod is ready to serve requests, e.g. `condition:true` keeps the pods that are in a ready state",1,kubernetes,k8s_state.pod.ready
kubernetes_state.pod.scheduled,gauge,,,,Reports the status of the scheduling process for the pod with its tags,0,kubernetes,k8s_state.pod.scheduled
kubernetes_state.pod.unschedulable,gauge,,,,Reports PODs that Kube scheduler cannot schedule on any node,0,kubernetes,k8s_state.pod.unschedulable
kubernetes_state.pod.status_phase,gauge,,,,"To sum by `phase` to get number of pods in a given phase, and `namespace` to break this down by namespace",0,kubernetes,k8s_state.pod.status_phase
kubernetes_state.replicaset.replicas,gauge,,,,The number of replicas per ReplicaSet,0,kubernetes,k8s_state.rs.replicas
kubernetes_state.replicaset.fully_labeled_replicas,gauge,,,,The number of fully labeled replicas per ReplicaSet,0,kubernetes,k8s_state.rs.fully_labeled
Expand Down
6 changes: 6 additions & 0 deletions kubernetes_state/tests/fixtures/prometheus.txt
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,9 @@ kube_verticalpodautoscaler_status_recommendation_containerrecommendations_target
# HELP kube_verticalpodautoscaler_status_recommendation_containerrecommendations_uncappedtarget Target resources the VPA recommends for the container ignoring bounds.
# TYPE kube_verticalpodautoscaler_status_recommendation_containerrecommendations_uncappedtarget gauge
kube_verticalpodautoscaler_status_recommendation_containerrecommendations_uncappedtarget{container="container1",namespace="default",resource="cpu",target_api_version="extensions/v1beta1",target_kind="Deployment",target_name="deployment1",unit="core",verticalpodautoscaler="vpa1"} 6
# HELP kube_hpa_status_current_metrics_average_value Average metric value observed by the autoscaler.
# TYPE kube_hpa_status_current_metrics_average_value gauge
kube_hpa_status_current_metrics_average_value{namespace="default",hpa="dummy-nginx-ingress-controller"} 0.002
# HELP kube_hpa_status_current_metrics_average_utilization Average metric utilization observed by the autoscaler.
# TYPE kube_hpa_status_current_metrics_average_utilization gauge
kube_hpa_status_current_metrics_average_utilization{namespace="default",hpa="dummy-nginx-ingress-controller"} 0
51 changes: 36 additions & 15 deletions kubernetes_state/tests/test_kubernetes_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,39 @@ def test_job_counts(aggregator, instance):
)


def test_keep_ksm_labels_desactivated(aggregator, instance):
instance['keep_ksm_labels'] = False
check = KubernetesState(CHECK_NAME, {}, {}, [instance])
check.poll = mock.MagicMock(return_value=MockResponse(mock_from_file("prometheus.txt"), 'text/plain'))
check.check(instance)
for _ in range(2):
check.check(instance)
aggregator.assert_metric(
NAMESPACE + '.pod.status_phase', tags=['kube_namespace:default', 'pod_phase:running', 'optional:tag1'], value=3
)


def test_experimental_labels(aggregator, instance):
check = KubernetesState(CHECK_NAME, {}, {}, [instance])
check.poll = mock.MagicMock(return_value=MockResponse(mock_from_file("prometheus.txt"), 'text/plain'))
for _ in range(2):
check.check(instance)

assert aggregator.metrics(NAMESPACE + '.hpa.current_metrics_average_value') == []

instance['experimental_metrics'] = True
check = KubernetesState(CHECK_NAME, {}, {}, [instance])
check.poll = mock.MagicMock(return_value=MockResponse(mock_from_file("prometheus.txt"), 'text/plain'))
for _ in range(2):
check.check(instance)

aggregator.assert_metric(
NAMESPACE + '.hpa.current_metrics_average_value',
tags=['hpa:dummy-nginx-ingress-controller', 'kube_namespace:default', 'namespace:default', 'optional:tag1'],
value=0.002,
)


def test_telemetry(aggregator, instance):
instance['telemetry'] = True

Expand All @@ -558,9 +591,9 @@ def test_telemetry(aggregator, instance):

for _ in range(2):
check.check(instance)
aggregator.assert_metric(NAMESPACE + '.telemetry.payload.size', tags=['optional:tag1'], value=90397.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.metrics.processed.count', tags=['optional:tag1'], value=912.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.metrics.input.count', tags=['optional:tag1'], value=1286.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.payload.size', tags=['optional:tag1'], value=90958.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.metrics.processed.count', tags=['optional:tag1'], value=916.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.metrics.input.count', tags=['optional:tag1'], value=1290.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.metrics.blacklist.count', tags=['optional:tag1'], value=24.0)
aggregator.assert_metric(NAMESPACE + '.telemetry.metrics.ignored.count', tags=['optional:tag1'], value=374.0)
aggregator.assert_metric(
Expand All @@ -573,15 +606,3 @@ def test_telemetry(aggregator, instance):
tags=['resource_name:hpa', 'resource_namespace:ns1', 'optional:tag1'],
value=8.0,
)


def test_keep_ksm_labels_desactivated(aggregator, instance):
instance['keep_ksm_labels'] = False
check = KubernetesState(CHECK_NAME, {}, {}, [instance])
check.poll = mock.MagicMock(return_value=MockResponse(mock_from_file("prometheus.txt"), 'text/plain'))
check.check(instance)
for _ in range(2):
check.check(instance)
aggregator.assert_metric(
NAMESPACE + '.pod.status_phase', tags=['kube_namespace:default', 'pod_phase:running', 'optional:tag1'], value=3
)

0 comments on commit 41303a6

Please sign in to comment.