From 7ea6787b177280778790b81035bacfedc9b6a80c Mon Sep 17 00:00:00 2001 From: Yann Mahe Date: Tue, 26 Jan 2016 17:11:45 -0500 Subject: [PATCH 1/2] [core][wmi] `list` to cache connections Use a `list` to cache WMI connections instead of a `set`. --- checks/libs/wmi/sampler.py | 4 ++-- tests/core/test_wmi.py | 2 +- tests/core/test_wmi_calculator.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/checks/libs/wmi/sampler.py b/checks/libs/wmi/sampler.py index 1e2a8ae614..9a9ae8e925 100644 --- a/checks/libs/wmi/sampler.py +++ b/checks/libs/wmi/sampler.py @@ -53,7 +53,7 @@ class WMISampler(object): """ # Shared resources _wmi_locators = {} - _wmi_connections = defaultdict(set) + _wmi_connections = defaultdict(list) def __init__(self, logger, class_name, property_names, filters="", host="localhost", namespace="root\\cimv2", username="", password="", timeout_duration=10): @@ -293,7 +293,7 @@ def get_connection(self): yield connection # Release it - self._wmi_connections[self.connection_key].add(connection) + self._wmi_connections[self.connection_key].append(connection) @staticmethod def _format_filter(filters): diff --git a/tests/core/test_wmi.py b/tests/core/test_wmi.py index bea3ca3c93..b4fe572b97 100644 --- a/tests/core/test_wmi.py +++ b/tests/core/test_wmi.py @@ -224,7 +224,7 @@ def tearDown(self): # Flush cache from checks.libs.wmi.sampler import WMISampler WMISampler._wmi_locators = {} - WMISampler._wmi_connections = defaultdict(set) + WMISampler._wmi_connections = defaultdict(list) def assertWMIConn(self, wmi_sampler, param=None, count=None): """ diff --git a/tests/core/test_wmi_calculator.py b/tests/core/test_wmi_calculator.py index bcaa2f2337..488e4ca9eb 100644 --- a/tests/core/test_wmi_calculator.py +++ b/tests/core/test_wmi_calculator.py @@ -36,7 +36,7 @@ def test_calculator_decorator(self): Asssign a calculator to a counter_type. Raise when the calculator is missing. """ @calculator(123456) - def do_something(): + def do_something(*args, **kwargs): """A function that does something.""" pass From 657328011018a5c2eca5350476cfaa40fea0a138 Mon Sep 17 00:00:00 2001 From: Yann Mahe Date: Tue, 26 Jan 2016 19:04:38 -0500 Subject: [PATCH 2/2] [system][wmi_check] handle `TimeoutException` WMI queries can potentially raise a `TimeoutException` on query timeouts. Handle it. --- checks.d/wmi_check.py | 28 +++++++++++--- checks/libs/wmi/sampler.py | 11 ++++-- checks/system/win32.py | 58 +++++++++++++++++++++++++---- tests/checks/mock/test_wmi_check.py | 31 ++++++++++++++- tests/core/test_wmi.py | 26 ++++++------- 5 files changed, 123 insertions(+), 31 deletions(-) diff --git a/checks.d/wmi_check.py b/checks.d/wmi_check.py index 640ab9180a..7b1ab884bb 100644 --- a/checks.d/wmi_check.py +++ b/checks.d/wmi_check.py @@ -4,6 +4,7 @@ # project from checks import AgentCheck from checks.libs.wmi.sampler import WMISampler +from utils.timeout import TimeoutException WMIMetric = namedtuple('WMIMetric', ['name', 'value', 'tags']) @@ -37,6 +38,8 @@ class WMICheck(AgentCheck): """ def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) + + # Cache self.wmi_samplers = {} self.wmi_props = {} @@ -69,13 +72,25 @@ def check(self, instance): wmi_class, properties, filters=filters, host=host, namespace=namespace, - username=username, password=password + username=username, password=password, ) # Sample, extract & submit metrics - wmi_sampler.sample() - metrics = self._extract_metrics(wmi_sampler, tag_by, tag_queries, constant_tags) - self._submit_metrics(metrics, metric_name_and_type_by_property) + metrics = [] + try: + wmi_sampler.sample() + metrics = self._extract_metrics(wmi_sampler, tag_by, tag_queries, constant_tags) + except TimeoutException: + self.log.warning( + u"WMI query timed out." + u" class={wmi_class} - properties={wmi_properties} -" + u" filters={filters} - tag_queries={tag_queries}".format( + wmi_class=wmi_class, wmi_properties=properties, + filters=filters, tag_queries=tag_queries + ) + ) + else: + self._submit_metrics(metrics, metric_name_and_type_by_property) def _format_tag_query(self, sampler, wmi_obj, tag_query): """ @@ -177,7 +192,10 @@ def _extract_metrics(self, wmi_sampler, tag_by, tag_queries, constant_tags): """ Extract and tag metrics from the WMISampler. - Raise when multiple WMIObject were returned by the sampler with no `tag_by` specified. + Raise + * `MissingTagBy` when multiple WMIObject were returned by the sampler + with no `tag_by` specified. + * `TimeoutException` on WMI query timeouts. Returns: List of WMIMetric ``` diff --git a/checks/libs/wmi/sampler.py b/checks/libs/wmi/sampler.py index 9a9ae8e925..7706153c9a 100644 --- a/checks/libs/wmi/sampler.py +++ b/checks/libs/wmi/sampler.py @@ -146,11 +146,12 @@ def sample(self): self.previous_sample = self.current_sample self.current_sample = self._query() except TimeoutException: - self.logger.warning( + self.logger.debug( u"Query timeout after {timeout}s".format( timeout=self._timeout_duration ) ) + raise else: self._sampling = False self.logger.debug(u"Sample: {0}".format(self.current_sample)) @@ -161,7 +162,9 @@ def __len__(self): """ # No data is returned while sampling if self._sampling: - return 0 + raise TypeError( + u"Sampling `WMISampler` object has no len()" + ) return len(self.current_sample) @@ -171,7 +174,9 @@ def __iter__(self): """ # No data is returned while sampling if self._sampling: - return + raise TypeError( + u"Sampling `WMISampler` object is not iterable" + ) if self.is_raw_perf_class: # Format required diff --git a/checks/system/win32.py b/checks/system/win32.py index 6de8672bf3..cdf940e674 100644 --- a/checks/system/win32.py +++ b/checks/system/win32.py @@ -16,6 +16,9 @@ def WMISampler(*args, **kwargs): """ return +# datadog +from utils.timeout import TimeoutException + # Device WMI drive types class DriveType(object): @@ -44,7 +47,14 @@ def __init__(self, logger): self.gauge('system.proc.count') def check(self, agentConfig): - self.wmi_sampler.sample() + try: + self.wmi_sampler.sample() + except TimeoutException: + self.logger.warning( + u"Timeout while querying Win32_PerfRawData_PerfOS_System WMI class." + u" Processes metrics will be returned at next iteration." + ) + return if not (len(self.wmi_sampler)): self.logger.info('Missing Win32_PerfRawData_PerfOS_System WMI class.' @@ -100,7 +110,14 @@ def __init__(self, logger): self.gauge('system.mem.pct_usable') def check(self, agentConfig): - self.os_wmi_sampler.sample() + try: + self.os_wmi_sampler.sample() + except TimeoutException: + self.logger.warning( + u"Timeout while querying Win32_OperatingSystem WMI class." + u" Memory metrics will be returned at next iteration." + ) + return if not (len(self.os_wmi_sampler)): self.logger.info('Missing Win32_OperatingSystem WMI class.' @@ -123,7 +140,14 @@ def check(self, agentConfig): self.save_sample('system.mem.free', free) self.save_sample('system.mem.used', total - free) - self.mem_wmi_sampler.sample() + try: + self.mem_wmi_sampler.sample() + except TimeoutException: + self.logger.warning( + u"Timeout while querying Win32_PerfRawData_PerfOS_Memory WMI class." + u" Memory metrics will be returned at next iteration." + ) + return if not (len(self.mem_wmi_sampler)): self.logger.info('Missing Win32_PerfRawData_PerfOS_Memory WMI class.' @@ -173,8 +197,14 @@ def __init__(self, logger): self.counter('system.cpu.system') def check(self, agentConfig): - - self.wmi_sampler.sample() + try: + self.wmi_sampler.sample() + except TimeoutException: + self.logger.warning( + u"Timeout while querying Win32_PerfRawData_PerfOS_Processor WMI class." + u" CPU metrics will be returned at next iteration." + ) + return if not (len(self.wmi_sampler)): self.logger.info('Missing Win32_PerfRawData_PerfOS_Processor WMI class.' @@ -230,7 +260,14 @@ def __init__(self, logger): self.gauge('system.net.bytes_sent') def check(self, agentConfig): - self.wmi_sampler.sample() + try: + self.wmi_sampler.sample() + except TimeoutException: + self.logger.warning( + u"Timeout while querying Win32_PerfRawData_Tcpip_NetworkInterface WMI class." + u" Network metrics will be returned at next iteration." + ) + return if not (len(self.wmi_sampler)): self.logger.info('Missing Win32_PerfRawData_Tcpip_NetworkInterface WMI class.' @@ -271,7 +308,14 @@ def __init__(self, logger): self.gauge('system.io.avg_q_sz') def check(self, agentConfig): - self.wmi_sampler.sample() + try: + self.wmi_sampler.sample() + except TimeoutException: + self.logger.warning( + u"Timeout while querying Win32_PerfRawData_PerfDisk_LogicalDiskUnable WMI class." + u" I/O metrics will be returned at next iteration." + ) + return if not (len(self.wmi_sampler)): self.logger.info('Missing Win32_PerfRawData_PerfDisk_LogicalDiskUnable WMI class.' diff --git a/tests/checks/mock/test_wmi_check.py b/tests/checks/mock/test_wmi_check.py index 0b021592c6..97b5be3ff8 100644 --- a/tests/checks/mock/test_wmi_check.py +++ b/tests/checks/mock/test_wmi_check.py @@ -3,7 +3,7 @@ # project from tests.checks.common import AgentCheckTest -from tests.core.test_wmi import TestCommonWMI +from tests.core.test_wmi import SWbemServices, TestCommonWMI class WMITestCase(AgentCheckTest, TestCommonWMI): @@ -188,6 +188,35 @@ def test_missing_property(self): self.run_check(config, mocks={'log': logger}) self.assertTrue(logger.warning.called) + def test_query_timeouts(self): + """ + Gracefully handle WMI query timeouts. + """ + def __patched_init__(*args, **kwargs): + """ + Force `timeout_duration` value. + """ + kwargs['timeout_duration'] = 0.5 + return wmi_constructor(*args, **kwargs) + + # Increase WMI queries' runtime + SWbemServices._exec_query_run_time = 0.5 + + # Patch WMISampler to decrease timeout tolerancy + WMISampler = self.load_class("WMISampler") + wmi_constructor = WMISampler.__init__ + WMISampler.__init__ = __patched_init__ + + # Set up the check + config = { + 'instances': [self.WMI_CONFIG] + } + logger = Mock() + + # No exception is raised but a WARNING is logged + self.run_check(config, mocks={'log': logger}) + self.assertTrue(logger.warning.called) + def test_mandatory_tag_by(self): """ Exception is raised when the result returned by the WMI query contains multiple rows diff --git a/tests/core/test_wmi.py b/tests/core/test_wmi.py index b4fe572b97..662340229a 100644 --- a/tests/core/test_wmi.py +++ b/tests/core/test_wmi.py @@ -10,6 +10,7 @@ # project from tests.checks.common import Fixtures +from utils.timeout import TimeoutException log = logging.getLogger(__name__) @@ -17,13 +18,6 @@ WMISampler = None -# Thoughts -# Log WMI activity -# Mechanism to timeout -# Check when pywintypes.com_error are raised -# Check the role of the flags - - def load_fixture(f, args=None): """ Build a WMI query result from a file and given parameters. @@ -100,9 +94,11 @@ def __init__(self, wmi_conn_args): @classmethod def reset(cls): """ - FIXME - Dirty patch to reset `SWbemServices.ExecQuery` to 0. + Dirty patch to reset `SWbemServices.ExecQuery.call_count` and + `SWbemServices._exec_query_run_time` to 0. """ cls._exec_query_call_count.reset() + cls._exec_query_run_time = 0 def get_conn_args(self): """ @@ -451,7 +447,7 @@ def test_wmi_sampler_iterator_getter(self): def test_wmi_sampler_timeout(self): """ - Gracefully handle WMI queries' timeouts. + Gracefully handle WMI query timeouts. """ from checks.libs.wmi.sampler import WMISampler logger = Mock() @@ -462,14 +458,14 @@ def test_wmi_sampler_timeout(self): timeout_duration=0.5) SWbemServices._exec_query_run_time = 0.5 - # Gracefully timeout with a warning message but no exception - wmi_sampler.sample() + # `TimeoutException` exception is raised, DEBUG message logged + self.assertRaises(TimeoutException, wmi_sampler.sample) self.assertTrue(wmi_sampler._sampling) - self.assertTrue(logger.warning.called) + self.assertTrue(logger.debug.called) - # Show no data - self.assertEquals(len(wmi_sampler), 0) - self.assertEquals(sum(1 for _ in wmi_sampler), 0) + # Cannot iterate on data + self.assertRaises(TypeError, lambda: len(wmi_sampler)) + self.assertRaises(TypeError, lambda: sum(1 for _ in wmi_sampler)) # Recover from timeout at next iteration wmi_sampler.sample()