diff --git a/agent.py b/agent.py index 499a3ccfa0..f985120449 100755 --- a/agent.py +++ b/agent.py @@ -91,7 +91,7 @@ def __init__(self, pidfile, autorestart, start_event=True, in_developer_mode=Fal self._checksd = [] self.collector_profile_interval = DEFAULT_COLLECTOR_PROFILE_INTERVAL self.check_frequency = None - # this flag can be set to True, False, or a list of checks (for partial reload) + # this flag can be set to True, False, or a set of checks (for partial reload) self.reload_configs_flag = False self.sd_backend = None self.supervisor_proxy = None @@ -161,7 +161,7 @@ def reload_configs(self, checks_to_reload=set()): log.info("No checksd configs found") def refresh_specific_checks(self, hostname, checksd, checks): - """take a list of checks and for each of them: + """take a set of checks and for each of them: - remove it from the init_failed_checks if it was there - load a fresh config for it - replace its old config with the new one in initialized_checks if there was one @@ -245,7 +245,7 @@ def run(self, config=None): if self._agentConfig.get('service_discovery'): self.sd_backend = get_sd_backend(self._agentConfig) - if _is_affirmative(self._agentConfig.get('sd_jmx_enable')): + if _is_affirmative(self._agentConfig.get('sd_jmx_enable', False)): pipe_path = get_jmx_pipe_path() if Platform.is_windows(): pipe_name = pipe_path.format(pipename=SD_PIPE_NAME) diff --git a/checks.d/docker_daemon.py b/checks.d/docker_daemon.py index 5f08ff890d..7fcf5029bb 100644 --- a/checks.d/docker_daemon.py +++ b/checks.d/docker_daemon.py @@ -855,7 +855,7 @@ def _parse_cgroup_file(self, stat_file): except IOError: # It is possible that the container got stopped between the API call and now. # Some files can also be missing (like cpu.stat) and that's fine. - self.log.info("Can't open %s. Some metrics for this container may be missing." % stat_file) + self.log.debug("Can't open %s. Its metrics will be missing." % stat_file) def _parse_blkio_metrics(self, stats): """Parse the blkio metrics.""" diff --git a/config.py b/config.py index 7cb0ed2ba5..19b844462c 100644 --- a/config.py +++ b/config.py @@ -1108,7 +1108,7 @@ def load_check(agentConfig, hostname, checkname): # try to load the check and return the result load_success, load_failure = load_check_from_places(check_config, check_name, checks_places, agentConfig) - return load_success.values()[0] or load_failure + return load_success.values()[0] if load_success else load_failure return None diff --git a/tests/core/test_service_discovery.py b/tests/core/test_service_discovery.py index 05cc8c604f..6a1c6e2d4d 100644 --- a/tests/core/test_service_discovery.py +++ b/tests/core/test_service_discovery.py @@ -2,6 +2,7 @@ import copy import mock import unittest +from collections import defaultdict # 3p from nose.plugins.attrib import attr @@ -11,7 +12,8 @@ from utils.service_discovery.config_stores import get_config_store from utils.service_discovery.consul_config_store import ConsulStore from utils.service_discovery.etcd_config_store import EtcdStore -from utils.service_discovery.abstract_config_store import AbstractConfigStore, CONFIG_FROM_KUBE +from utils.service_discovery.abstract_config_store import AbstractConfigStore, \ + _TemplateCache, CONFIG_FROM_KUBE, CONFIG_FROM_TEMPLATE, CONFIG_FROM_AUTOCONF from utils.service_discovery.sd_backend import get_sd_backend from utils.service_discovery.sd_docker_backend import SDDockerBackend, _SDDockerBackendConfigFetchState @@ -66,11 +68,11 @@ def client_read(path, **kwargs): if 'all' in kwargs: return {} else: - return TestServiceDiscovery.mock_tpls.get(image)[0][config_parts.index(config_part)] + return TestServiceDiscovery.mock_raw_templates.get(image)[0][config_parts.index(config_part)] def issue_read(identifier): - return TestServiceDiscovery.mock_tpls.get(identifier) + return TestServiceDiscovery.mock_raw_templates.get(identifier) @attr('unix') class TestServiceDiscovery(unittest.TestCase): @@ -122,7 +124,7 @@ class TestServiceDiscovery(unittest.TestCase): } # raw templates coming straight from the config store - mock_tpls = { + mock_raw_templates = { # image_name: ('[check_name]', '[init_tpl]', '[instance_tpl]', expected_python_tpl_list) 'image_0': ( ('["check_0"]', '[{}]', '[{"host": "%%host%%"}]'), @@ -509,12 +511,12 @@ def test_fill_tpl(self, *args): def test_get_auto_config(self): """Test _get_auto_config""" expected_tpl = { - 'redis': ('redisdb', None, {"host": "%%host%%", "port": "%%port%%"}), - 'consul': ('consul', None, { - "url": "http://%%host%%:%%port%%", "catalog_checks": True, "new_leader_checks": True - }), - 'redis:v1': ('redisdb', None, {"host": "%%host%%", "port": "%%port%%"}), - 'foobar': None + 'redis': [('redisdb', None, {"host": "%%host%%", "port": "%%port%%"})], + 'consul': [('consul', None, { + "url": "http://%%host%%:%%port%%", "catalog_checks": True, "new_leader_checks": True + })], + 'redis:v1': [('redisdb', None, {"host": "%%host%%", "port": "%%port%%"})], + 'foobar': [] } config_store = get_config_store(self.auto_conf_agentConfig) @@ -529,10 +531,10 @@ def test_get_check_tpls(self, mock_client_read): invalid_config = ['bad_image_0', 'bad_image_1'] config_store = get_config_store(self.auto_conf_agentConfig) for image in valid_config: - tpl = self.mock_tpls.get(image)[1] + tpl = self.mock_raw_templates.get(image)[1] self.assertEquals(tpl, config_store.get_check_tpls(image)) for image in invalid_config: - tpl = self.mock_tpls.get(image)[1] + tpl = self.mock_raw_templates.get(image)[1] self.assertEquals(tpl, config_store.get_check_tpls(image)) @mock.patch.object(AbstractConfigStore, 'client_read', side_effect=client_read) @@ -542,7 +544,7 @@ def test_get_check_tpls_kube(self, mock_client_read): invalid_config = ['bad_image_0'] config_store = get_config_store(self.auto_conf_agentConfig) for image in valid_config + invalid_config: - tpl = self.mock_tpls.get(image)[1] + tpl = self.mock_raw_templates.get(image)[1] tpl = [(CONFIG_FROM_KUBE, t[1]) for t in tpl] if tpl: self.assertNotEquals( @@ -558,7 +560,7 @@ def test_get_check_tpls_kube(self, mock_client_read): ['service-discovery.datadoghq.com/foo.check_names', 'service-discovery.datadoghq.com/foo.init_configs', 'service-discovery.datadoghq.com/foo.instances'], - self.mock_tpls[image][0])))) + self.mock_raw_templates[image][0])))) def test_get_config_id(self): """Test get_config_id""" @@ -570,8 +572,8 @@ def test_get_config_id(self): expected_ident) clear_singletons(self.auto_conf_agentConfig) - @mock.patch.object(AbstractConfigStore, '_issue_read', side_effect=issue_read) - def test_read_config_from_store(self, issue_read): + @mock.patch.object(_TemplateCache, '_issue_read', side_effect=issue_read) + def test_read_config_from_store(self, args): """Test read_config_from_store""" valid_idents = [('nginx', 'nginx'), ('nginx:latest', 'nginx:latest'), ('custom-nginx', 'custom-nginx'), ('custom-nginx:latest', 'custom-nginx'), @@ -582,7 +584,13 @@ def test_read_config_from_store(self, issue_read): for ident, expected_key in valid_idents: tpl = config_store.read_config_from_store(ident) # source is added after reading from the store - self.assertEquals(tpl, ('template',) + self.mock_tpls.get(expected_key)) + self.assertEquals( + tpl, + { + CONFIG_FROM_AUTOCONF: None, + CONFIG_FROM_TEMPLATE: self.mock_raw_templates.get(expected_key) + } + ) for ident in invalid_idents: self.assertEquals(config_store.read_config_from_store(ident), []) @@ -600,3 +608,72 @@ def test_read_jmx_config_from_store(self, *args): for check in self.jmx_sd_configs: key = '{}_0'.format(check) self.assertEquals(jmx_configs[key], valid_configs[key]) + + # Template cache + @mock.patch('utils.service_discovery.abstract_config_store.get_auto_conf_images') + def test_populate_auto_conf(self, mock_get_auto_conf_images): + """test _populate_auto_conf""" + auto_tpls = { + 'foo': [['check0', 'check1'], [{}, {}], [{}, {}]], + 'bar': [['check2', 'check3', 'check3'], [{}, {}, {}], [{}, {'foo': 'bar'}, {'bar': 'foo'}]], + } + cache = _TemplateCache(issue_read, '') + cache.auto_conf_templates = defaultdict(lambda: [[]] * 3) + mock_get_auto_conf_images.return_value = auto_tpls + + cache._populate_auto_conf() + self.assertEquals(cache.auto_conf_templates['foo'], auto_tpls['foo']) + self.assertEquals(cache.auto_conf_templates['bar'], + [['check2', 'check3'], [{}, {}], [{}, {'foo': 'bar'}]]) + + @mock.patch.object(_TemplateCache, '_issue_read', return_value=None) + def test_get_templates(self, args): + """test get_templates""" + kv_tpls = { + 'foo': [['check0', 'check1'], [{}, {}], [{}, {}]], + 'bar': [['check2', 'check3'], [{}, {}], [{}, {}]], + } + auto_tpls = { + 'foo': [['check3', 'check5'], [{}, {}], [{}, {}]], + 'bar': [['check2', 'check6'], [{}, {}], [{}, {}]], + 'foobar': [['check4'], [{}], [{}]], + } + cache = _TemplateCache(issue_read, '') + cache.kv_templates = kv_tpls + cache.auto_conf_templates = auto_tpls + self.assertEquals(cache.get_templates('foo'), + {CONFIG_FROM_TEMPLATE: [['check0', 'check1'], [{}, {}], [{}, {}]], + CONFIG_FROM_AUTOCONF: [['check3', 'check5'], [{}, {}], [{}, {}]]} + ) + + self.assertEquals(cache.get_templates('bar'), + # check3 must come from template not autoconf + {CONFIG_FROM_TEMPLATE: [['check2', 'check3'], [{}, {}], [{}, {}]], + CONFIG_FROM_AUTOCONF: [['check6'], [{}], [{}]]} + ) + + self.assertEquals(cache.get_templates('foobar'), + {CONFIG_FROM_TEMPLATE: None, + CONFIG_FROM_AUTOCONF: [['check4'], [{}], [{}]]} + ) + + self.assertEquals(cache.get_templates('baz'), None) + + def test_get_check_names(self): + """Test get_check_names""" + kv_tpls = { + 'foo': [['check0', 'check1'], [{}, {}], [{}, {}]], + 'bar': [['check2', 'check3'], [{}, {}], [{}, {}]], + } + auto_tpls = { + 'foo': [['check4', 'check5'], [{}, {}], [{}, {}]], + 'bar': [['check2', 'check6'], [{}, {}], [{}, {}]], + 'foobar': None, + } + cache = _TemplateCache(issue_read, '') + cache.kv_templates = kv_tpls + cache.auto_conf_templates = auto_tpls + self.assertEquals(cache.get_check_names('foo'), set(['check0', 'check1', 'check4', 'check5'])) + self.assertEquals(cache.get_check_names('bar'), set(['check2', 'check3', 'check6'])) + self.assertEquals(cache.get_check_names('foobar'), set()) + self.assertEquals(cache.get_check_names('baz'), set()) diff --git a/utils/checkfiles.py b/utils/checkfiles.py index 1468a6f242..75a363c4af 100644 --- a/utils/checkfiles.py +++ b/utils/checkfiles.py @@ -50,7 +50,7 @@ def get_check_class(agentConfig, check_name): return None -def get_auto_conf(agentConfig, check_name): +def get_auto_conf(check_name): """Return the yaml auto_config dict for a check name (None if it doesn't exist).""" from config import PathNotFound, get_auto_confd_path @@ -75,11 +75,11 @@ def get_auto_conf(agentConfig, check_name): return auto_conf -def get_auto_conf_images(agentConfig): +def get_auto_conf_images(full_tpl=False): """Walk through the auto_config folder and build a dict of auto-configurable images.""" from config import PathNotFound, get_auto_confd_path auto_conf_images = { - # image_name: check_name + # image_name: [check_names] or [[check_names], [init_tpls], [instance_tpls]] } try: @@ -100,5 +100,18 @@ def get_auto_conf_images(agentConfig): # extract the supported image list images = auto_conf.get('docker_images', []) for image in images: - auto_conf_images[image] = check_name + if full_tpl: + init_tpl = auto_conf.get('init_config') or {} + instance_tpl = auto_conf.get('instances', []) + if image not in auto_conf_images: + auto_conf_images[image] = [[check_name], [init_tpl], [instance_tpl]] + else: + for idx, item in enumerate([check_name, init_tpl, instance_tpl]): + auto_conf_images[image][idx].append(item) + else: + if image in auto_conf_images: + auto_conf_images[image].append(check_name) + else: + auto_conf_images[image] = [check_name] + return auto_conf_images diff --git a/utils/configcheck.py b/utils/configcheck.py index d8ebef7b22..a42ae93e61 100644 --- a/utils/configcheck.py +++ b/utils/configcheck.py @@ -94,10 +94,10 @@ def print_templates(agentConfig): except Exception as ex: print("Failed to extract configuration templates from the backend:\n%s" % str(ex)) - for img, tpl in templates.iteritems(): + for ident, tpl in templates.iteritems(): print( - "- Image %s:\n\tcheck names: %s\n\tinit_configs: %s\n\tinstances: %s" % ( - img, + "- Identifier %s:\n\tcheck names: %s\n\tinit_configs: %s\n\tinstances: %s" % ( + ident, tpl.get('check_names'), tpl.get('init_configs'), tpl.get('instances'), diff --git a/utils/service_discovery/abstract_config_store.py b/utils/service_discovery/abstract_config_store.py index 6a351ba4fe..50975c8f69 100644 --- a/utils/service_discovery/abstract_config_store.py +++ b/utils/service_discovery/abstract_config_store.py @@ -3,9 +3,10 @@ # Licensed under Simplified BSD License (see LICENSE) # std -from collections import defaultdict import logging import simplejson as json +from collections import defaultdict +from copy import deepcopy from os import path # 3p @@ -34,6 +35,141 @@ class KeyNotFound(Exception): pass +class _TemplateCache(object): + """ + Store templates coming from the configuration store and files from auto_conf. + + Templates from different sources are stored in separate attributes, and + reads will look up identifiers in both of them in the right order. + + read_func is expected to return raw templates coming from the config store. + + The cache must be invalidated when an update is made to templates. + """ + + def __init__(self, read_func, root_template_path): + self.read_func = read_func + self.root_path = root_template_path + self.kv_templates = defaultdict(lambda: [[]] * 3) + self.auto_conf_templates = defaultdict(lambda: [[]] * 3) + self._populate_auto_conf() + + def invalidate(self): + """Clear out the KV cache""" + log.debug("Clearing the cache for configuration templates.") + self.kv_templates = defaultdict(lambda: [[]] * 3) + + def _populate_auto_conf(self): + """Retrieve auto_conf templates""" + raw_templates = get_auto_conf_images(full_tpl=True) + for image, tpls in raw_templates.iteritems(): + for check_name, init_tpl, instance_tpl in zip(*tpls): + if image in self.auto_conf_templates: + if check_name in self.auto_conf_templates[image][0]: + log.warning("Conflicting templates in auto_conf for image %s and check %s. " + "Please check your template files." % (image, check_name)) + continue + self.auto_conf_templates[image][0].append(check_name) + self.auto_conf_templates[image][1].append(init_tpl) + self.auto_conf_templates[image][2].append(instance_tpl) + else: + self.auto_conf_templates[image][0] = [check_name] + self.auto_conf_templates[image][1] = [init_tpl or {}] + # no list wrapping because auto_conf files already have a list of instances + self.auto_conf_templates[image][2] = instance_tpl or [{}] + + def _issue_read(self, identifier): + """Perform a read against the KV store""" + + # templates from the config store + try: + check_names = json.loads( + self.read_func(path.join(self.root_path, identifier, CHECK_NAMES).lstrip('/'))) + init_config_tpls = json.loads( + self.read_func(path.join(self.root_path, identifier, INIT_CONFIGS).lstrip('/'))) + instance_tpls = json.loads( + self.read_func(path.join(self.root_path, identifier, INSTANCES).lstrip('/'))) + return [check_names, init_config_tpls, instance_tpls] + except KeyNotFound: + return None + + def get_templates(self, identifier): + """ + Return a dict of templates coming from the config store and + the auto_conf folder and their source for a given identifier. + Templates from kv_templates take precedence. + """ + templates = { + # source: [[check_names], [init_configs], [instances]] + CONFIG_FROM_TEMPLATE: None, + CONFIG_FROM_AUTOCONF: None + } + + # cache miss + if identifier not in self.kv_templates: + try: + tpls = self._issue_read(identifier) + except NotImplementedError: + # expected when get_check_names is called in auto-conf mode + tpls = None + except Exception: + tpls = None + log.exception('Failed to retrieve a template for %s.' % identifier) + # create a key in the cache even if _issue_read doesn't return a tpl + # so that subsequent reads don't trigger issue_read + self.kv_templates[identifier] = tpls + + templates[CONFIG_FROM_TEMPLATE] = deepcopy(self.kv_templates[identifier]) + + if identifier in self.auto_conf_templates: + auto_conf_tpls = [[], [], []] + unfiltered_tpls = self.auto_conf_templates[identifier] + + # add auto_conf templates only if the same check is + # not already configured by a user-provided template. + for idx, check_name in enumerate(unfiltered_tpls[0]): + if not templates[CONFIG_FROM_TEMPLATE] or \ + check_name not in templates[CONFIG_FROM_TEMPLATE][0]: + auto_conf_tpls[0].append(check_name) + auto_conf_tpls[1].append(unfiltered_tpls[1][idx]) + auto_conf_tpls[2].append(unfiltered_tpls[2][idx]) + + templates[CONFIG_FROM_AUTOCONF] = deepcopy(auto_conf_tpls) + + if templates[CONFIG_FROM_TEMPLATE] or templates[CONFIG_FROM_AUTOCONF]: + return templates + + return None + + def get_check_names(self, identifier): + """Return a set of all check names associated with an identifier""" + check_names = set() + + # cache miss + if identifier not in self.kv_templates and identifier not in self.auto_conf_templates: + tpls = self.get_templates(identifier) + + if not tpls: + return check_names + + auto_conf = tpls[CONFIG_FROM_AUTOCONF] + if auto_conf: + check_names.update(auto_conf[0]) + + kv_conf = tpls[CONFIG_FROM_TEMPLATE] + if kv_conf: + check_names.update(kv_conf[0]) + + if identifier in self.kv_templates and self.kv_templates[identifier]: + check_names.update(set(self.kv_templates[identifier][0])) + + if identifier in self.auto_conf_templates and self.auto_conf_templates[identifier]: + check_names.update(set(self.auto_conf_templates[identifier][0])) + + return check_names + + + class AbstractConfigStore(object): """Singleton for config stores""" __metaclass__ = Singleton @@ -46,18 +182,13 @@ def __init__(self, agentConfig): self.settings = self._extract_settings(agentConfig) self.client = self.get_client() self.sd_template_dir = agentConfig.get('sd_template_dir') - self.auto_conf_images = get_auto_conf_images(agentConfig) + self.auto_conf_images = get_auto_conf_images() - # cache used by dockerutil to determine which check to reload based on the image linked to an event + # this cache is used to determine which check to + # reload based on the image linked to a docker event # - # it is invalidated entirely when a change is detected in the kv store - # - # this is a defaultdict(set) and some calls to it rely on this property - # so if you're planning on changing that, track its references - # - # TODO Haissam: this should be fleshed out a bit more and used as a cache instead - # of querying the kv store for each template - self.identifier_to_checks = self._populate_identifier_to_checks() + # it is invalidated entirely when a change is detected in the config store + self.template_cache = _TemplateCache(self.client_read, self.sd_template_dir) @classmethod def _drop(cls): @@ -77,28 +208,6 @@ def client_read(self, path, **kwargs): def dump_directory(self, path, **kwargs): raise NotImplementedError() - def _populate_identifier_to_checks(self): - """Populate the identifier_to_checks cache with templates pulled - from the config store and from the auto-config folder""" - identifier_to_checks = defaultdict(set) - # config store templates - try: - templates = self.client_read(self.sd_template_dir.lstrip('/'), all=True) - except (NotImplementedError, TimeoutError, AttributeError): - templates = [] - for tpl in templates: - split_tpl = tpl[0].split('/') - ident, var = split_tpl[-2], split_tpl[-1] - if var == CHECK_NAMES: - identifier_to_checks[ident].update(set(json.loads(tpl[1]))) - - # auto-config templates - templates = get_auto_conf_images(self.agentConfig) - for image, check in templates.iteritems(): - identifier_to_checks[image].add(check) - - return identifier_to_checks - def _get_kube_config(self, identifier, kube_annotations, kube_container_name): try: prefix = '{}/{}.'.format(KUBE_ANNOTATION_PREFIX, kube_container_name) @@ -117,28 +226,28 @@ def _get_auto_config(self, image_name): from jmxfetch import JMX_CHECKS ident = self._get_image_ident(image_name) + templates = [] if ident in self.auto_conf_images: - check_name = self.auto_conf_images[ident] + check_names = self.auto_conf_images[ident] - # get the check class to verify it matches - check = get_check_class(self.agentConfig, check_name) if check_name not in JMX_CHECKS else True - if check is None: - log.info("Could not find an auto configuration template for %s." - " Leaving it unconfigured." % image_name) - return None + for check_name in check_names: + # get the check class to verify it matches + check = get_check_class(self.agentConfig, check_name) if check_name not in JMX_CHECKS else True + if check is None: + log.info("Failed auto configuring check %s for %s." % (check_name, image_name)) + continue - auto_conf = get_auto_conf(self.agentConfig, check_name) - init_config, instances = auto_conf.get('init_config', {}), auto_conf.get('instances', []) - return (check_name, init_config, instances[0] or {}) + auto_conf = get_auto_conf(check_name) + init_config, instances = auto_conf.get('init_config', {}), auto_conf.get('instances', []) + templates.append((check_name, init_config, instances[0] or {})) - return None + return templates def get_checks_to_refresh(self, identifier, **kwargs): to_check = set() # try from the cache - if identifier in self.identifier_to_checks: - to_check.update(self.identifier_to_checks[identifier]) + to_check.update(self.template_cache.get_check_names(identifier)) kube_annotations = kwargs.get(KUBE_ANNOTATIONS) kube_container_name = kwargs.get(KUBE_CONTAINER_NAME) @@ -149,14 +258,14 @@ def get_checks_to_refresh(self, identifier, **kwargs): if kube_config is not None: to_check.update(kube_config[0]) - if to_check: - return to_check - else: - # lastly fallback to auto_conf - return set(self.identifier_to_checks[self._get_image_ident(identifier)]) + # lastly, try with legacy name for auto-conf + to_check.update(self.template_cache.get_check_names(self._get_image_ident(identifier))) + + return to_check def get_check_tpls(self, identifier, **kwargs): """Retrieve template configs for an identifier from the config_store or auto configuration.""" + # this flag is used when no valid configuration store was provided # it makes the method skip directly to the auto_conf if kwargs.get('auto_conf') is True: @@ -174,84 +283,56 @@ def get_check_tpls(self, identifier, **kwargs): # in auto config mode, identifier is the image name auto_config = self._get_auto_config(identifier) - if auto_config is not None: + if auto_config: source = CONFIG_FROM_AUTOCONF - return [(source, auto_config)] + return [(source, conf) for conf in auto_config] else: log.debug('No auto config was found for image %s, leaving it alone.' % identifier) return [] else: - config = self.read_config_from_store(identifier) - if config: - source, check_names, init_config_tpls, instance_tpls = config - else: + configs = self.read_config_from_store(identifier) + + if not configs: return [] - if len(check_names) != len(init_config_tpls) or len(check_names) != len(instance_tpls): - log.error('Malformed configuration template: check_names, init_configs ' - 'and instances are not all the same length. Container with identifier {} ' - 'will not be configured by the service discovery'.format(identifier)) - return [] + res = [] + + for source, config in configs.iteritems(): + if not config: + continue - # Try to update the identifier_to_checks cache - self._update_identifier_to_checks(identifier, check_names) + check_names, init_config_tpls, instance_tpls = config + if len(check_names) != len(init_config_tpls) or len(check_names) != len(instance_tpls): + log.error('Malformed configuration template: check_names, init_configs ' + 'and instances are not all the same length. Container with identifier {} ' + 'will not be configured by the service discovery'.format(identifier)) + continue - return [(source, values) + res += [(source, values) for values in zip(check_names, init_config_tpls, instance_tpls)] + return res + def read_config_from_store(self, identifier): - """Try to read from the config store, falls back to auto-config in case of failure.""" + """Query templates from the cache. Fallback to canonical identifier for auto-config.""" try: - try: - res = self._issue_read(identifier) - except KeyNotFound: - log.debug("Could not find directory {} in the config store, " - "trying to convert to the old format.".format(identifier)) - image_ident = self._get_image_ident(identifier) - res = self._issue_read(image_ident) + res = self.template_cache.get_templates(identifier) - if res and len(res) == 3: - source = CONFIG_FROM_TEMPLATE - check_names, init_config_tpls, instance_tpls = res - else: - log.debug("Could not find directory {} in the config store, " - "trying to convert to the old format...".format(identifier)) + if not res: + log.debug("No template found for {}, trying with auto-config...".format(identifier)) image_ident = self._get_image_ident(identifier) - res = self._issue_read(image_ident) - if res and len(res) == 3: - source = CONFIG_FROM_TEMPLATE - check_names, init_config_tpls, instance_tpls = res - else: - raise KeyError - except (KeyError, KeyNotFound, TimeoutError, json.JSONDecodeError) as ex: - # this is kind of expected, it means that no template was provided for this container - if isinstance(ex, KeyError) or isinstance(ex, KeyNotFound): - log.debug("Could not find directory {} in the config store, " - "trying to auto-configure a check...".format(identifier)) - # this case is not expected, the agent can't reach the config store - if isinstance(ex, TimeoutError): - log.warning("Connection to the config backend timed out. Is it reachable?\n" - "Trying to auto-configure a check for the container with ident %s." % identifier) - # the template is reachable but invalid - elif isinstance(ex, json.JSONDecodeError): - log.error('Could not decode the JSON configuration template ' - 'for the container with ident %s...' % identifier) - return [] - # try to read from auto-config templates - auto_config = self._get_auto_config(identifier) - if auto_config is not None: - # create list-format config based on an autoconf template - check_names, init_config_tpls, instance_tpls = map(lambda x: [x], auto_config) - source = CONFIG_FROM_AUTOCONF - else: - log.debug('No config was found for container with ident %s, leaving it alone.' % identifier) - return [] + res = self.template_cache.get_templates(image_ident) + + if not res: + # at this point no check is considered applicable to this identifier. + return [] + except Exception as ex: - log.warning( - 'Fetching the value for {0} in the config store failed, this check ' - 'will not be configured by the service discovery. Error: {1}'.format(identifier, str(ex))) + log.debug( + 'No config template found for {0}. Error: {1}'.format(identifier, str(ex))) return [] - return source, check_names, init_config_tpls, instance_tpls + + return res def _get_image_ident(self, ident): """Extract an identifier from the image""" @@ -262,24 +343,12 @@ def _get_image_ident(self, ident): else: return ident.split(':')[0].split('/')[-1] - def _issue_read(self, identifier): - try: - check_names = json.loads( - self.client_read(path.join(self.sd_template_dir, identifier, CHECK_NAMES).lstrip('/'))) - init_config_tpls = json.loads( - self.client_read(path.join(self.sd_template_dir, identifier, INIT_CONFIGS).lstrip('/'))) - instance_tpls = json.loads( - self.client_read(path.join(self.sd_template_dir, identifier, INSTANCES).lstrip('/'))) - return [check_names, init_config_tpls, instance_tpls] - except KeyError: - return None - def crawl_config_template(self): """Return whether or not configuration templates have changed since the previous crawl""" try: config_index = self.client_read(self.sd_template_dir.lstrip('/'), recursive=True, watch=True) except KeyNotFound: - log.debug('Config template not found (normal if running on auto-config alone).' + log.debug('No config template found (expected if running on auto-config alone).' ' Not Triggering a config reload.') return False except TimeoutError: @@ -294,17 +363,6 @@ def crawl_config_template(self): if config_index != self.previous_config_index: log.info('Detected an update in config templates, reloading check configs...') self.previous_config_index = config_index - self.identifier_to_checks = self._populate_identifier_to_checks() + self.template_cache.invalidate() return True return False - - def _update_identifier_to_checks(self, identifier, check_names): - """Try to insert in the identifier_to_checks cache the mapping between - an identifier and its check names. - This should very rarely happen. - When/If it does we can correct the cache if the key was missing but not if there is a conflict.""" - if identifier not in self.identifier_to_checks: - self.identifier_to_checks[identifier] = set(check_names) - elif self.identifier_to_checks[identifier] != set(check_names): - log.warning("Trying to cache check names for ident %s but a different value is already there." - "Not updating." % identifier) diff --git a/utils/service_discovery/consul_config_store.py b/utils/service_discovery/consul_config_store.py index fb6d51b9c2..2032bd86e2 100644 --- a/utils/service_discovery/consul_config_store.py +++ b/utils/service_discovery/consul_config_store.py @@ -50,9 +50,6 @@ def client_read(self, path, **kwargs): res = self.client.kv.get(path, recurse=recurse) if kwargs.get('watch', False): return res[0] - elif kwargs.get('all', False): - # we use it in _populate_identifier_to_checks - return [(child.get('Key'), child.get('Value')) for child in res[1]] else: if res[1] is not None: return res[1].get('Value') if not recurse else res[1] diff --git a/utils/service_discovery/etcd_config_store.py b/utils/service_discovery/etcd_config_store.py index 11c35b3b3a..21958b6908 100644 --- a/utils/service_discovery/etcd_config_store.py +++ b/utils/service_discovery/etcd_config_store.py @@ -48,9 +48,6 @@ def client_read(self, path, **kwargs): if kwargs.get('watch', False): modified_indices = (res.modifiedIndex, ) + tuple(leaf.modifiedIndex for leaf in res.leaves) return max(modified_indices) - elif kwargs.get('all', False): - # we use it in _populate_identifier_to_checks - return [(child.key, child.value) for child in res.children] else: return res.value except EtcdKeyNotFound: diff --git a/utils/service_discovery/sd_docker_backend.py b/utils/service_discovery/sd_docker_backend.py index bb429327da..5e0a4515f2 100644 --- a/utils/service_discovery/sd_docker_backend.py +++ b/utils/service_discovery/sd_docker_backend.py @@ -102,7 +102,8 @@ def update_checks(self, changed_containers): conf_reload_set = set() for c_id in changed_containers: checks = self._get_checks_to_refresh(state, c_id) - conf_reload_set.update(set(checks)) + if checks: + conf_reload_set.update(set(checks)) if conf_reload_set: self.reload_check_configs = conf_reload_set @@ -112,9 +113,13 @@ def _get_checks_to_refresh(self, state, c_id): Use the DATADOG_ID label or the image.""" inspect = state.inspect_container(c_id) - # if the container was removed we can't tell which check is concerned - # so we have to reload everything - if not inspect: + # If the container was removed we can't tell which check is concerned + # so we have to reload everything. + # Same thing if it's stopped and we're on Kubernetes in auto_conf mode + # because the pod was deleted and its template could have been in the annotations. + if not inspect or \ + (not inspect.get('State', {}).get('Running') + and Platform.is_k8s() and not self.agentConfig.get('sd_config_backend')): self.reload_check_configs = True return @@ -172,7 +177,7 @@ def _extract_ip_from_networks(self, ip_dict, tpl_var): # no specifier if len(tpl_parts) < 2: - log.warning("No key was passed for template variable %s." % tpl_var) + log.debug("No key was passed for template variable %s." % tpl_var) return self._get_fallback_ip(ip_dict) else: res = ip_dict.get(tpl_parts[-1]) @@ -185,11 +190,11 @@ def _extract_ip_from_networks(self, ip_dict, tpl_var): def _get_fallback_ip(self, ip_dict): """try to pick the bridge key, falls back to the value of the last key""" if 'bridge' in ip_dict: - log.warning("Using the bridge network.") + log.debug("Using the bridge network.") return ip_dict['bridge'] else: last_key = sorted(ip_dict.iterkeys())[-1] - log.warning("Trying with the last (sorted) network: '%s'." % last_key) + log.debug("Trying with the last (sorted) network: '%s'." % last_key) return ip_dict[last_key] def _get_port(self, state, c_id, tpl_var): @@ -316,8 +321,6 @@ def _get_check_configs(self, state, c_id, identifier): } config_templates = self._get_config_templates(identifier, **platform_kwargs) if not config_templates: - log.debug('No config template for container %s with identifier %s. ' - 'It will be left unconfigured.' % (c_id[:12], identifier)) return None check_configs = [] @@ -342,7 +345,6 @@ def _get_config_templates(self, identifier, **platform_kwargs): templates = [] if config_backend is None: auto_conf = True - log.warning('No supported configuration backend was provided, using auto-config only.') else: auto_conf = False