Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker check adjustment #964

Merged
merged 4 commits into from
Jun 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions checks.d/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import os
import re
import time
from urlparse import urlsplit, urljoin
from util import json, headers
from urlparse import urlsplit
from util import json
try:
from collections import defaultdict
except ImportError:
Expand Down Expand Up @@ -55,8 +55,8 @@
"cgroup": "cpuacct",
"file": "%s/%s/cpuacct.stat",
"metrics": {
"user": ("docker.cpu.user", "gauge"),
"system": ("docker.cpu.system", "gauge"),
"user": ("docker.cpu.user", "rate"),
"system": ("docker.cpu.system", "rate"),
},
},
]
Expand Down Expand Up @@ -109,18 +109,18 @@ def unix_open(self, req):
class Docker(AgentCheck):
def __init__(self, *args, **kwargs):
super(Docker, self).__init__(*args, **kwargs)
self._mounpoints = {}
self.cgroup_path_prefix = None # Depending on the version
self._mountpoints = {}
self.cgroup_path_prefix = None # Depending on the version
for metric in LXC_METRICS:
self._mounpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"])
self._mountpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"])
self._path_prefix = None
self._last_event_collection_ts = defaultdict(lambda: None)

@property
def path_prefix(self):
if self._path_prefix is None:
metric = LXC_METRICS[0]
mountpoint = self._mounpoints[metric["cgroup"]]
mountpoint = self._mountpoints[metric["cgroup"]]
stat_file_lxc = os.path.join(mountpoint, "lxc")
stat_file_docker = os.path.join(mountpoint, "docker")

Expand All @@ -136,9 +136,16 @@ def check(self, instance):
urllib2.install_opener(urllib2.build_opener(UnixSocketHandler())) # We need to reinstall the opener every time as it gets uninstalled
tags = instance.get("tags") or []

self._process_events(self._get_events(instance))
try:
self._process_events(self._get_events(instance))
except socket.timeout:
self.warning('Timeout during socket connection. Events will be missing.')

try:
containers = self._get_containers(instance)
except socket.timeout:
raise Exception('Cannot get containers list: timeout during socket connection. Try to refine the containers to collect by editing the configuration file.')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you know it's a timeout ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah my last commit was just incomplete, this should catch only socket.timeout. Fixing it.


containers = self._get_containers(instance)
if not containers:
self.gauge("docker.containers.running", 0)
raise Exception("No containers are running.")
Expand Down Expand Up @@ -173,10 +180,12 @@ def check(self, instance):
if key in container:
getattr(self, metric_type)(dd_key, int(container[key]), tags=container_tags)
for metric in LXC_METRICS:
mountpoint = self._mounpoints[metric["cgroup"]]
mountpoint = self._mountpoints[metric["cgroup"]]
stat_file = os.path.join(mountpoint, metric["file"] % (self.path_prefix, container["Id"]))
stats = self._parse_cgroup_file(stat_file)
for key, (dd_key, metric_type) in metric["metrics"].items():
if key.startswith("total_") and not instance.get("collect_total"):
continue
if key in stats:
getattr(self, metric_type)(dd_key, int(stats[key]), tags=container_tags)

Expand All @@ -187,7 +196,7 @@ def _process_events(self, events):
'timestamp': ev['time'],
'host': self.hostname,
'event_type': EVENT_TYPE,
'msg_title': "%s %s on %s" % (ev['from'], ev['status'], self.hostname),
'msg_title': "%s %s on %s" % (ev['from'], ev['status'], self.hostname),
'source_type_name': EVENT_TYPE,
'event_object': ev['from'],
})
Expand Down
12 changes: 11 additions & 1 deletion conf.d/docker.yaml.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Warning
# Warning
# The user running the Datadog Agent (usually "dd-agent") must be part of the "docker" group

init_config:
Expand Down Expand Up @@ -30,3 +30,13 @@ instances:
# exclude:
# - "image:ubuntu"
# - "image:debian"

# Sub-cgroups metrics
#
# If you want to include sub-cgroups metrics, turn the collect_total option on.
# It is useless for a normal usage, as total_ metrics will be the same as normal ones.
#
# Example:
# instances:
# - url: "unix://var/run/docker.sock"
# collect_total: true