From 5abbb7d0b3fef3f5139e2603c05d65990343291d Mon Sep 17 00:00:00 2001 From: Olivier Vielpeau Date: Tue, 25 Aug 2015 14:15:19 -0400 Subject: [PATCH 1/4] [network] Use `ss` instead of `netstat` on linux `netstat` is deprecated on many distributions, `ss` is its replacement. It brings perf improvements especially on systems that have lots of connections. We still use `netstat` as a fallback if `ss` is not installed. --- checks.d/network.py | 129 +++++++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/checks.d/network.py b/checks.d/network.py index 852482fbe9..febb467390 100644 --- a/checks.d/network.py +++ b/checks.d/network.py @@ -27,20 +27,35 @@ class Network(AgentCheck): SOURCE_TYPE_NAME = 'system' TCP_STATES = { - "ESTABLISHED": "established", - "SYN_SENT": "opening", - "SYN_RECV": "opening", - "FIN_WAIT1": "closing", - "FIN_WAIT2": "closing", - "TIME_WAIT": "time_wait", - "CLOSE": "closing", - "CLOSE_WAIT": "closing", - "LAST_ACK": "closing", - "LISTEN": "listening", - "CLOSING": "closing", + "ss": { + "ESTAB": "established", + "SYN-SENT": "opening", + "SYN-RECV": "opening", + "FIN-WAIT-1": "closing", + "FIN-WAIT-2": "closing", + "TIME-WAIT": "time_wait", + "UNCONN": "closing", + "CLOSE-WAIT": "closing", + "LAST-ACK": "closing", + "LISTEN": "listening", + "CLOSING": "closing", + }, + "netstat": { + "ESTABLISHED": "established", + "SYN_SENT": "opening", + "SYN_RECV": "opening", + "FIN_WAIT1": "closing", + "FIN_WAIT2": "closing", + "TIME_WAIT": "time_wait", + "CLOSE": "closing", + "CLOSE_WAIT": "closing", + "LAST_ACK": "closing", + "LISTEN": "listening", + "CLOSING": "closing", + } } - NETSTAT_GAUGE = { + CX_STATE_GAUGE = { ('udp4', 'connections') : 'system.net.udp4.connections', ('udp6', 'connections') : 'system.net.udp6.connections', ('tcp4', 'established') : 'system.net.tcp4.established', @@ -122,39 +137,51 @@ def _submit_regexed_values(self, output, regex_list): def _check_linux(self, instance): if self._collect_cx_state: - netstat = subprocess.Popen(["netstat", "-n", "-u", "-t", "-a"], - stdout=subprocess.PIPE, - close_fds=True).communicate()[0] - # Active Internet connections (w/o servers) - # Proto Recv-Q Send-Q Local Address Foreign Address State - # tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV - # tcp 0 0 46.105.75.4:143 90.56.111.177:56867 ESTABLISHED - # tcp 0 0 46.105.75.4:50468 107.20.207.175:443 TIME_WAIT - # tcp6 0 0 46.105.75.4:80 93.15.237.188:58038 FIN_WAIT2 - # tcp6 0 0 46.105.75.4:80 79.220.227.193:2029 ESTABLISHED - # udp 0 0 0.0.0.0:123 0.0.0.0:* - # udp6 0 0 :::41458 :::* - - lines = netstat.split("\n") - - metrics = dict.fromkeys(self.NETSTAT_GAUGE.values(), 0) - for l in lines[2:-1]: - cols = l.split() - # 0 1 2 3 4 5 + try: + self.log.debug("Using `ss` to collect connection state") + # Try using `ss` for increased performance over `netstat` + for ip_version in ['4', '6']: + # Call `ss` for each IP version because there's no built-in way of distinguishing + # between the IP versions in the output + ss = subprocess.Popen(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], + stdout=subprocess.PIPE, + close_fds=True).communicate()[0] + # Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port + # udp UNCONN 0 0 127.0.0.1:8125 *:* + # udp ESTAB 0 0 127.0.0.1:37036 127.0.0.1:8125 + # udp UNCONN 0 0 fe80::a00:27ff:fe1c:3c4:123 :::* + # tcp TIME-WAIT 0 0 90.56.111.177:56867 46.105.75.4:143 + # tcp LISTEN 0 0 ::ffff:127.0.0.1:33217 ::ffff:127.0.0.1:7199 + # tcp ESTAB 0 0 ::ffff:127.0.0.1:58975 ::ffff:127.0.0.1:2181 + + lines = ss.split("\n") + + metrics = self._parse_linux_cx_state(lines[1:-1], self.TCP_STATES['ss'], 1, ip_version=ip_version) + # Only send the metrics which match the loop iteration's ip version + for stat, metric in self.CX_STATE_GAUGE.iteritems(): + if stat[0].endswith(ip_version): + self.gauge(metric, metrics.get(metric)) + + except OSError: + self.log.info("`ss` not found: using `netstat` as a fallback") + netstat = subprocess.Popen(["netstat", "-n", "-u", "-t", "-a"], + stdout=subprocess.PIPE, + close_fds=True).communicate()[0] + # Active Internet connections (w/o servers) + # Proto Recv-Q Send-Q Local Address Foreign Address State + # tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV # tcp 0 0 46.105.75.4:143 90.56.111.177:56867 ESTABLISHED - if cols[0].startswith("tcp"): - protocol = ("tcp4", "tcp6")[cols[0] == "tcp6"] - if cols[5] in self.TCP_STATES: - metric = self.NETSTAT_GAUGE[protocol, self.TCP_STATES[cols[5]]] - metrics[metric] += 1 - elif cols[0].startswith("udp"): - protocol = ("udp4", "udp6")[cols[0] == "udp6"] - metric = self.NETSTAT_GAUGE[protocol, 'connections'] - metrics[metric] += 1 + # tcp 0 0 46.105.75.4:50468 107.20.207.175:443 TIME_WAIT + # tcp6 0 0 46.105.75.4:80 93.15.237.188:58038 FIN_WAIT2 + # tcp6 0 0 46.105.75.4:80 79.220.227.193:2029 ESTABLISHED + # udp 0 0 0.0.0.0:123 0.0.0.0:* + # udp6 0 0 :::41458 :::* - for metric, value in metrics.iteritems(): - self.gauge(metric, value) + lines = netstat.split("\n") + metrics = self._parse_linux_cx_state(lines[2:-1], self.TCP_STATES['netstat'], 5) + for metric, value in metrics.iteritems(): + self.gauge(metric, value) proc = open('/proc/net/dev', 'r') try: @@ -223,6 +250,24 @@ def _check_linux(self, instance): # On Openshift, /proc/net/snmp is only readable by root self.log.debug("Unable to read /proc/net/snmp.") + # Parse the output of the command that retrieves the connection state (either `ss` or `netstat`) + # Returns a dict metric_name -> value + def _parse_linux_cx_state(self, lines, tcp_states, state_col, ip_version=None): + metrics = dict.fromkeys(self.CX_STATE_GAUGE.values(), 0) + for l in lines: + cols = l.split() + if cols[0].startswith('tcp'): + protocol = "tcp{0}".format(ip_version) if ip_version else ("tcp4", "tcp6")[cols[0] == "tcp6"] + if cols[state_col] in tcp_states: + metric = self.CX_STATE_GAUGE[protocol, tcp_states[cols[state_col]]] + metrics[metric] += 1 + elif cols[0].startswith('udp'): + protocol = "udp{0}".format(ip_version) if ip_version else ("udp4", "udp6")[cols[0] == "udp6"] + metric = self.CX_STATE_GAUGE[protocol, 'connections'] + metrics[metric] += 1 + + return metrics + def _check_bsd(self, instance): netstat_flags = ['-i', '-b'] From 11590017e32384ad5e76895a56fef9956afc4f96 Mon Sep 17 00:00:00 2001 From: Olivier Vielpeau Date: Tue, 25 Aug 2015 19:47:19 -0400 Subject: [PATCH 2/4] [network] Add mock test to verify `ss`/`netstat` equivalence --- tests/checks/fixtures/network/netstat | 16 ++++++ tests/checks/fixtures/network/ss_ipv4 | 8 +++ tests/checks/fixtures/network/ss_ipv6 | 8 +++ tests/checks/mock/test_network.py | 71 +++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 tests/checks/fixtures/network/netstat create mode 100644 tests/checks/fixtures/network/ss_ipv4 create mode 100644 tests/checks/fixtures/network/ss_ipv6 create mode 100644 tests/checks/mock/test_network.py diff --git a/tests/checks/fixtures/network/netstat b/tests/checks/fixtures/network/netstat new file mode 100644 index 0000000000..d1d907b1f7 --- /dev/null +++ b/tests/checks/fixtures/network/netstat @@ -0,0 +1,16 @@ +Active Internet connections (servers and established) +Proto Recv-Q Send-Q Local Address Foreign Address State +tcp 0 128 0.0.0.0:6379 0.0.0.0:* LISTEN +tcp 0 128 0.0.0.0:6380 0.0.0.0:* LISTEN +tcp 0 0 127.0.0.1:80 127.0.0.1:51650 TIME_WAIT +tcp 0 0 127.0.0.1:58414 127.0.0.1:9200 TIME_WAIT +tcp 0 0 10.0.2.15:45637 10.0.2.15:9300 ESTABLISHED +tcp6 0 128 :::6380 :::* LISTEN +tcp6 0 0 127.0.0.1:58488 127.0.0.1:7199 TIME_WAIT +tcp6 0 0 127.0.0.1:42395 127.0.0.1:2181 ESTABLISHED +tcp6 0 0 127.0.0.1:58439 127.0.0.1:7199 CLOSING +udp 0 0 127.0.0.1:48135 127.0.0.1:8125 ESTABLISHED +udp 0 0 127.0.0.1:8125 0.0.0.0:* +udp6 0 0 fe80::a00:27ff:fee9:123 :::* ESTABLISHED +udp6 0 0 fe80::a00:27ff:fe1c:123 :::* +udp6 0 0 :::111 :::* diff --git a/tests/checks/fixtures/network/ss_ipv4 b/tests/checks/fixtures/network/ss_ipv4 new file mode 100644 index 0000000000..20a9c28229 --- /dev/null +++ b/tests/checks/fixtures/network/ss_ipv4 @@ -0,0 +1,8 @@ +Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port +udp ESTAB 0 0 127.0.0.1:48135 127.0.0.1:8125 +udp UNCONN 0 0 127.0.0.1:8125 *:* +tcp LISTEN 0 128 *:6379 *:* +tcp LISTEN 0 128 *:6380 *:* +tcp TIME-WAIT 0 0 127.0.0.1:80 127.0.0.1:51650 +tcp TIME-WAIT 0 0 127.0.0.1:58414 127.0.0.1:9200 +tcp ESTAB 0 0 10.0.2.15:45637 10.0.2.15:9300 diff --git a/tests/checks/fixtures/network/ss_ipv6 b/tests/checks/fixtures/network/ss_ipv6 new file mode 100644 index 0000000000..71d0380630 --- /dev/null +++ b/tests/checks/fixtures/network/ss_ipv6 @@ -0,0 +1,8 @@ +Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port +udp UNCONN 0 0 :::111 :::* +udp UNCONN 0 0 fe80::a00:27ff:fe1c:3c4:123 :::* +udp ESTAB 0 0 fe80::a00:27ff:fee9:10ee:123 :::* +tcp LISTEN 0 128 :::6380 :::* +tcp TIME-WAIT 0 0 ::ffff:127.0.0.1:58488 ::ffff:127.0.0.1:7199 +tcp ESTAB 0 0 ::ffff:127.0.0.1:42395 ::ffff:127.0.0.1:2181 +tcp CLOSING 0 0 ::ffff:127.0.0.1:58439 ::ffff:127.0.0.1:7199 diff --git a/tests/checks/mock/test_network.py b/tests/checks/mock/test_network.py new file mode 100644 index 0000000000..ff5fbe96d5 --- /dev/null +++ b/tests/checks/mock/test_network.py @@ -0,0 +1,71 @@ +# 3p +import mock + +# project +from tests.checks.common import AgentCheckTest, Fixtures + + +def ss_popen_mock(*args, **kwargs): + popen_mock = mock.Mock() + if args[0][-1] == '-4': + popen_mock.communicate.return_value = (Fixtures.read_file('ss_ipv4'), None) + elif args[0][-1] == '-6': + popen_mock.communicate.return_value = (Fixtures.read_file('ss_ipv6'), None) + + return popen_mock + + +def netstat_popen_mock(*args, **kwargs): + if args[0][0] == 'ss': + raise OSError + elif args[0][0] == 'netstat': + popen_mock = mock.Mock() + popen_mock.communicate.return_value = (Fixtures.read_file('netstat'), None) + return popen_mock + + +class TestCheckNetwork(AgentCheckTest): + CHECK_NAME = 'network' + + def setUp(self): + self.config = { + "instances": [ + { + "collect_connection_state": True + } + ] + } + self.load_check(self.config) + + CX_STATE_GAUGES_VALUES = { + 'system.net.udp4.connections': 2, + 'system.net.udp6.connections': 3, + 'system.net.tcp4.established': 1, + 'system.net.tcp4.opening': 0, + 'system.net.tcp4.closing': 0, + 'system.net.tcp4.listening': 2, + 'system.net.tcp4.time_wait': 2, + 'system.net.tcp6.established': 1, + 'system.net.tcp6.opening': 0, + 'system.net.tcp6.closing': 1, + 'system.net.tcp6.listening': 1, + 'system.net.tcp6.time_wait': 1, + } + + @mock.patch('subprocess.Popen', side_effect=ss_popen_mock) + @mock.patch('network.Platform.is_linux', return_value=True) + def test_cx_state_linux_ss(self, mock_popen, mock_platform): + self.run_check({}) + + # Assert metrics + for metric, value in self.CX_STATE_GAUGES_VALUES.iteritems(): + self.assertMetric(metric, value=value) + + @mock.patch('subprocess.Popen', side_effect=netstat_popen_mock) + @mock.patch('network.Platform.is_linux', return_value=True) + def test_cx_state_linux_netstat(self, mock_popen, mock_platform): + self.run_check({}) + + # Assert metrics + for metric, value in self.CX_STATE_GAUGES_VALUES.iteritems(): + self.assertMetric(metric, value=value) From 21ab3fba9e3622a398ad88ed2f525667c1511027 Mon Sep 17 00:00:00 2001 From: Olivier Vielpeau Date: Thu, 10 Sep 2015 17:51:15 -0400 Subject: [PATCH 3/4] [network] Use `subprocess_output` util function for `ss` and `netstat` --- checks.d/network.py | 9 +++------ tests/checks/mock/test_network.py | 23 +++++++++-------------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/checks.d/network.py b/checks.d/network.py index febb467390..0aad34baf9 100644 --- a/checks.d/network.py +++ b/checks.d/network.py @@ -8,6 +8,7 @@ # project from checks import AgentCheck from utils.platform import Platform +from utils.subprocess_output import get_subprocess_output BSD_TCP_METRICS = [ (re.compile("^\s*(\d+) data packets \(\d+ bytes\) retransmitted\s*$"), 'system.net.tcp.retrans_packs'), @@ -143,9 +144,7 @@ def _check_linux(self, instance): for ip_version in ['4', '6']: # Call `ss` for each IP version because there's no built-in way of distinguishing # between the IP versions in the output - ss = subprocess.Popen(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], - stdout=subprocess.PIPE, - close_fds=True).communicate()[0] + ss = get_subprocess_output(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], self.log) # Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port # udp UNCONN 0 0 127.0.0.1:8125 *:* # udp ESTAB 0 0 127.0.0.1:37036 127.0.0.1:8125 @@ -164,9 +163,7 @@ def _check_linux(self, instance): except OSError: self.log.info("`ss` not found: using `netstat` as a fallback") - netstat = subprocess.Popen(["netstat", "-n", "-u", "-t", "-a"], - stdout=subprocess.PIPE, - close_fds=True).communicate()[0] + netstat = get_subprocess_output(["netstat", "-n", "-u", "-t", "-a"], self.log) # Active Internet connections (w/o servers) # Proto Recv-Q Send-Q Local Address Foreign Address State # tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV diff --git a/tests/checks/mock/test_network.py b/tests/checks/mock/test_network.py index ff5fbe96d5..977243f05c 100644 --- a/tests/checks/mock/test_network.py +++ b/tests/checks/mock/test_network.py @@ -5,23 +5,18 @@ from tests.checks.common import AgentCheckTest, Fixtures -def ss_popen_mock(*args, **kwargs): - popen_mock = mock.Mock() +def ss_subprocess_mock(*args, **kwargs): if args[0][-1] == '-4': - popen_mock.communicate.return_value = (Fixtures.read_file('ss_ipv4'), None) + return Fixtures.read_file('ss_ipv4') elif args[0][-1] == '-6': - popen_mock.communicate.return_value = (Fixtures.read_file('ss_ipv6'), None) + return Fixtures.read_file('ss_ipv6') - return popen_mock - -def netstat_popen_mock(*args, **kwargs): +def netstat_subprocess_mock(*args, **kwargs): if args[0][0] == 'ss': raise OSError elif args[0][0] == 'netstat': - popen_mock = mock.Mock() - popen_mock.communicate.return_value = (Fixtures.read_file('netstat'), None) - return popen_mock + return Fixtures.read_file('netstat') class TestCheckNetwork(AgentCheckTest): @@ -52,18 +47,18 @@ def setUp(self): 'system.net.tcp6.time_wait': 1, } - @mock.patch('subprocess.Popen', side_effect=ss_popen_mock) + @mock.patch('network.get_subprocess_output', side_effect=ss_subprocess_mock) @mock.patch('network.Platform.is_linux', return_value=True) - def test_cx_state_linux_ss(self, mock_popen, mock_platform): + def test_cx_state_linux_ss(self, mock_subprocess, mock_platform): self.run_check({}) # Assert metrics for metric, value in self.CX_STATE_GAUGES_VALUES.iteritems(): self.assertMetric(metric, value=value) - @mock.patch('subprocess.Popen', side_effect=netstat_popen_mock) + @mock.patch('network.get_subprocess_output', side_effect=netstat_subprocess_mock) @mock.patch('network.Platform.is_linux', return_value=True) - def test_cx_state_linux_netstat(self, mock_popen, mock_platform): + def test_cx_state_linux_netstat(self, mock_subprocess, mock_platform): self.run_check({}) # Assert metrics From 0fb9eb3ce0e4a123be73773dd0eb833d35dc6d65 Mon Sep 17 00:00:00 2001 From: Olivier Vielpeau Date: Thu, 10 Sep 2015 18:14:13 -0400 Subject: [PATCH 4/4] [network] Use `splitlines()` instead of `split("\n")` --- checks.d/network.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/checks.d/network.py b/checks.d/network.py index 0aad34baf9..247bea59ed 100644 --- a/checks.d/network.py +++ b/checks.d/network.py @@ -129,7 +129,7 @@ def _parse_value(self, v): return 0 def _submit_regexed_values(self, output, regex_list): - lines = output.split("\n") + lines = output.splitlines() for line in lines: for regex, metric in regex_list: value = re.match(regex, line) @@ -144,7 +144,7 @@ def _check_linux(self, instance): for ip_version in ['4', '6']: # Call `ss` for each IP version because there's no built-in way of distinguishing # between the IP versions in the output - ss = get_subprocess_output(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], self.log) + lines = get_subprocess_output(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], self.log).splitlines() # Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port # udp UNCONN 0 0 127.0.0.1:8125 *:* # udp ESTAB 0 0 127.0.0.1:37036 127.0.0.1:8125 @@ -153,9 +153,7 @@ def _check_linux(self, instance): # tcp LISTEN 0 0 ::ffff:127.0.0.1:33217 ::ffff:127.0.0.1:7199 # tcp ESTAB 0 0 ::ffff:127.0.0.1:58975 ::ffff:127.0.0.1:2181 - lines = ss.split("\n") - - metrics = self._parse_linux_cx_state(lines[1:-1], self.TCP_STATES['ss'], 1, ip_version=ip_version) + metrics = self._parse_linux_cx_state(lines[1:], self.TCP_STATES['ss'], 1, ip_version=ip_version) # Only send the metrics which match the loop iteration's ip version for stat, metric in self.CX_STATE_GAUGE.iteritems(): if stat[0].endswith(ip_version): @@ -163,7 +161,7 @@ def _check_linux(self, instance): except OSError: self.log.info("`ss` not found: using `netstat` as a fallback") - netstat = get_subprocess_output(["netstat", "-n", "-u", "-t", "-a"], self.log) + lines = get_subprocess_output(["netstat", "-n", "-u", "-t", "-a"], self.log).splitlines() # Active Internet connections (w/o servers) # Proto Recv-Q Send-Q Local Address Foreign Address State # tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV @@ -174,9 +172,7 @@ def _check_linux(self, instance): # udp 0 0 0.0.0.0:123 0.0.0.0:* # udp6 0 0 :::41458 :::* - lines = netstat.split("\n") - - metrics = self._parse_linux_cx_state(lines[2:-1], self.TCP_STATES['netstat'], 5) + metrics = self._parse_linux_cx_state(lines[2:], self.TCP_STATES['netstat'], 5) for metric, value in metrics.iteritems(): self.gauge(metric, value) @@ -294,7 +290,7 @@ def _check_bsd(self, instance): # ham0 1404 seneca.loca fe80:6::7879:5ff: 30100 - 6815204 18742 - 8494811 - # ham0 1404 2620:9b::54 2620:9b::54d:bff5 30100 - 6815204 18742 - 8494811 - - lines = netstat.split("\n") + lines = netstat.splitlines() headers = lines[0].split() # Given the irregular structure of the table above, better to parse from the end of each line @@ -461,7 +457,7 @@ def _parse_solaris_netstat(self, netstat_output): 'oerrors':'packets_out.error', } - lines = [l for l in netstat_output.split("\n") if len(l) > 0] + lines = [l for l in netstat_output.splitlines() if len(l) > 0] metrics_by_interface = {}