Skip to content

Commit

Permalink
Kubernetes: Add CPU and memory capacity reporting (DataDog#2935)
Browse files Browse the repository at this point in the history
* Add patch from DataDog#2908 DataDog#2908 to beter handle units.

* Port change from DataDog/dd-agent DataDog#2766

* Move machine info URL management into kubeutil

* Update kubernetes tests for capacity data.
  • Loading branch information
markine authored and efx-jjohnson committed Oct 24, 2016
1 parent 5a2be69 commit 88fd8ca
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 4 deletions.
12 changes: 12 additions & 0 deletions checks.d/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,18 @@ def parse_quantity(s):
self.log.debug("Container object for {}: {}".format(c_name, container))

self._update_pods_metrics(instance, pods_list)
self._update_node(instance)

def _update_node(self, instance):
machine_info = self.kubeutil.retrieve_machine_info()
num_cores = machine_info.get('num_cores', 0)
memory_capacity = machine_info.get('memory_capacity', 0)

tags = instance.get('tags', [])
self.publish_gauge(self, NAMESPACE + '.cpu.capacity', float(num_cores), tags)
self.publish_gauge(self, NAMESPACE + '.memory.capacity', float(memory_capacity), tags)
# TODO(markine): Report 'allocatable' which is capacity minus capacity
# reserved for system/Kubernetes.

def _update_pods_metrics(self, instance, pods):
supported_kinds = [
Expand Down
1 change: 1 addition & 0 deletions tests/checks/fixtures/kubernetes/machine_info_1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"num_cores":2,"cpu_frequency_khz":2400080,"memory_capacity":8391204864,"machine_id":"abcabcabcabcabcabcabcabcabcabcab","system_uuid":"ABCABCAB-ABCA-ABCA-ABCA-ABCABCABCABC","boot_id":"abcabcab-abca-abca-abca-abcabcabcabc","filesystems":[{"device":"/dev/xvda1","capacity":338067161088,"type":"vfs","inodes":20971520}],"disk_map":{"202:0":{"name":"xvda","major":202,"minor":0,"size":343597383680,"scheduler":"cfq"}},"network_devices":[{"name":"eth0","mac_address":"00:11:22:33:44:55","speed":10000,"mtu":9001}],"topology":[{"node_id":0,"memory":8391204864,"cores":[{"core_id":0,"thread_ids":[0,1],"caches":[{"size":32768,"type":"Data","level":1},{"size":32768,"type":"Instruction","level":1},{"size":262144,"type":"Unified","level":2}]}],"caches":[{"size":31457280,"type":"Unified","level":3}]}],"cloud_provider":"AWS","instance_type":"m4.large","instance_id":"i-abcabcab"}
30 changes: 26 additions & 4 deletions tests/checks/mock/test_kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
PODS = "pods"
LIM = "limits"
REQ = "requests"
CAP = "capacity"

METRICS = [
('kubernetes.memory.usage', MEM),
Expand All @@ -40,8 +41,10 @@
('kubernetes.pods.running', PODS),
('kubernetes.cpu.limits', LIM),
('kubernetes.cpu.requests', REQ),
('kubernetes.cpu.capacity', CAP),
('kubernetes.memory.limits', LIM),
('kubernetes.memory.requests', REQ),
('kubernetes.memory.capacity', CAP),
]


Expand All @@ -50,6 +53,7 @@ class TestKubernetes(AgentCheckTest):
CHECK_NAME = 'kubernetes'

@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics',
side_effect=lambda: json.loads(Fixtures.read_file("metrics_1.1.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
Expand All @@ -65,6 +69,7 @@ def test_fail_1_1(self, *args):
self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL, tags=None, count=1)

@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics',
side_effect=lambda: json.loads(Fixtures.read_file("metrics_1.1.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
Expand Down Expand Up @@ -112,7 +117,7 @@ def test_metrics_1_1(self, *args):
(['kube_replication_controller:redis-slave'], [PODS]),
(['kube_replication_controller:frontend'], [PODS]),
(['kube_replication_controller:heapster-v11'], [PODS]),
([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
([], [LIM, REQ, CAP]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
]
for m, _type in METRICS:
for tags, types in expected_tags:
Expand All @@ -122,6 +127,7 @@ def test_metrics_1_1(self, *args):
self.coverage_report()

@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics',
side_effect=lambda: json.loads(Fixtures.read_file("metrics_1.1.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
Expand Down Expand Up @@ -159,7 +165,7 @@ def test_historate_1_1(self, *args):
(['kube_replication_controller:redis-slave'], [PODS]),
(['kube_replication_controller:frontend'], [PODS]),
(['kube_replication_controller:heapster-v11'], [PODS]),
([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
([], [LIM, REQ, CAP]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
]

for m, _type in METRICS:
Expand All @@ -171,6 +177,8 @@ def test_historate_1_1(self, *args):
self.coverage_report()

@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info',
side_effect=lambda: json.loads(Fixtures.read_file("machine_info_1.2.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics',
side_effect=lambda: json.loads(Fixtures.read_file("metrics_1.2.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
Expand All @@ -186,6 +194,8 @@ def test_fail_1_2(self, *args):
self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL)

@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info',
side_effect=lambda: json.loads(Fixtures.read_file("machine_info_1.2.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics',
side_effect=lambda: json.loads(Fixtures.read_file("metrics_1.2.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
Expand Down Expand Up @@ -216,17 +226,23 @@ def test_metrics_1_2(self, *args):
'pod_name:default/dd-agent-1rxlh', 'kube_namespace:default', 'kube_app:dd-agent', 'kube_foo:bar',
'kube_bar:baz', 'kube_replication_controller:dd-agent'], [LIM, REQ, MEM, CPU, NET, DISK, DISK_USAGE]),
(['kube_replication_controller:dd-agent'], [PODS]),
([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
([], [LIM, REQ, CAP]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
]

for m, _type in METRICS:
for tags, types in expected_tags:
if _type in types:
self.assertMetric(m, count=1, tags=tags)

# Verify exact capacity values read from machine_info_1.2.json fixture.
self.assertMetric('kubernetes.cpu.capacity', value=2)
self.assertMetric('kubernetes.memory.capacity', value=8391204864)

self.coverage_report()

@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info',
side_effect=lambda: json.loads(Fixtures.read_file("machine_info_1.2.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics',
side_effect=lambda: json.loads(Fixtures.read_file("metrics_1.2.json")))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
Expand Down Expand Up @@ -257,7 +273,7 @@ def test_historate_1_2(self, *args):
'kube_replication_controller:dd-agent'], [MEM, CPU, NET, DISK, NET_ERRORS, DISK_USAGE, LIM, REQ]),
(['pod_name:no_pod'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]),
(['kube_replication_controller:dd-agent'], [PODS]),
([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
([], [LIM, REQ, CAP]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor
]

for m, _type in METRICS:
Expand All @@ -274,6 +290,7 @@ def test_historate_1_2(self, *args):
side_effect=lambda x, y: x)
@mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth',
side_effect=lambda x,y: json.loads(Fixtures.read_file("events.json", string_escape=False)))
@mock.patch('utils.kubeutil.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics')
@mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list',
side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False)))
Expand Down Expand Up @@ -351,6 +368,11 @@ def test_retrieve_pods_list(self, retrieve_json):
self.kubeutil.retrieve_pods_list()
retrieve_json.assert_called_once_with(self.kubeutil.pods_list_url)

@mock.patch('utils.kubeutil.retrieve_json')
def test_retrieve_machine_info(self, retrieve_json):
self.kubeutil.retrieve_machine_info()
retrieve_json.assert_called_once_with(self.kubeutil.machine_info_url)

@mock.patch('utils.kubeutil.retrieve_json')
def test_retrieve_metrics(self, retrieve_json):
self.kubeutil.retrieve_metrics()
Expand Down
8 changes: 8 additions & 0 deletions utils/kubeutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class KubeUtil:
__metaclass__ = Singleton

DEFAULT_METHOD = 'http'
MACHINE_INFO_PATH = '/api/v1.3/machine/'
METRICS_PATH = '/api/v1.3/subcontainers/'
PODS_LIST_PATH = '/pods/'
DEFAULT_CADVISOR_PORT = 4194
Expand Down Expand Up @@ -66,6 +67,7 @@ def __init__(self, instance=None):
self.kubernetes_api_url = 'https://%s/api/v1' % (os.environ.get('KUBERNETES_SERVICE_HOST') or self.DEFAULT_MASTER_NAME)

self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
self.machine_info_url = urljoin(self.cadvisor_url, KubeUtil.MACHINE_INFO_PATH)
self.pods_list_url = urljoin(self.kubelet_api_url, KubeUtil.PODS_LIST_PATH)
self.kube_health_url = urljoin(self.kubelet_api_url, 'healthz')

Expand Down Expand Up @@ -124,6 +126,12 @@ def retrieve_pods_list(self):
"""
return retrieve_json(self.pods_list_url)

def retrieve_machine_info(self):
"""
Retrieve machine info from Cadvisor.
"""
return retrieve_json(self.machine_info_url)

def retrieve_metrics(self):
"""
Retrieve metrics from Cadvisor.
Expand Down

0 comments on commit 88fd8ca

Please sign in to comment.