Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[network] Use ss instead of netstat on linux #1859

Merged
merged 4 commits into from
Sep 24, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 84 additions & 46 deletions checks.d/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# project
from checks import AgentCheck
from utils.platform import Platform
from utils.subprocess_output import get_subprocess_output

BSD_TCP_METRICS = [
(re.compile("^\s*(\d+) data packets \(\d+ bytes\) retransmitted\s*$"), 'system.net.tcp.retrans_packs'),
Expand All @@ -27,20 +28,35 @@ class Network(AgentCheck):
SOURCE_TYPE_NAME = 'system'

TCP_STATES = {
"ESTABLISHED": "established",
"SYN_SENT": "opening",
"SYN_RECV": "opening",
"FIN_WAIT1": "closing",
"FIN_WAIT2": "closing",
"TIME_WAIT": "time_wait",
"CLOSE": "closing",
"CLOSE_WAIT": "closing",
"LAST_ACK": "closing",
"LISTEN": "listening",
"CLOSING": "closing",
"ss": {
"ESTAB": "established",
"SYN-SENT": "opening",
"SYN-RECV": "opening",
"FIN-WAIT-1": "closing",
"FIN-WAIT-2": "closing",
"TIME-WAIT": "time_wait",
"UNCONN": "closing",
"CLOSE-WAIT": "closing",
"LAST-ACK": "closing",
"LISTEN": "listening",
"CLOSING": "closing",
},
"netstat": {
"ESTABLISHED": "established",
"SYN_SENT": "opening",
"SYN_RECV": "opening",
"FIN_WAIT1": "closing",
"FIN_WAIT2": "closing",
"TIME_WAIT": "time_wait",
"CLOSE": "closing",
"CLOSE_WAIT": "closing",
"LAST_ACK": "closing",
"LISTEN": "listening",
"CLOSING": "closing",
}
}

NETSTAT_GAUGE = {
CX_STATE_GAUGE = {
('udp4', 'connections') : 'system.net.udp4.connections',
('udp6', 'connections') : 'system.net.udp6.connections',
('tcp4', 'established') : 'system.net.tcp4.established',
Expand Down Expand Up @@ -113,7 +129,7 @@ def _parse_value(self, v):
return 0

def _submit_regexed_values(self, output, regex_list):
lines = output.split("\n")
lines = output.splitlines()
for line in lines:
for regex, metric in regex_list:
value = re.match(regex, line)
Expand All @@ -122,39 +138,43 @@ def _submit_regexed_values(self, output, regex_list):

def _check_linux(self, instance):
if self._collect_cx_state:
netstat = subprocess.Popen(["netstat", "-n", "-u", "-t", "-a"],
stdout=subprocess.PIPE,
close_fds=True).communicate()[0]
# Active Internet connections (w/o servers)
# Proto Recv-Q Send-Q Local Address Foreign Address State
# tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV
# tcp 0 0 46.105.75.4:143 90.56.111.177:56867 ESTABLISHED
# tcp 0 0 46.105.75.4:50468 107.20.207.175:443 TIME_WAIT
# tcp6 0 0 46.105.75.4:80 93.15.237.188:58038 FIN_WAIT2
# tcp6 0 0 46.105.75.4:80 79.220.227.193:2029 ESTABLISHED
# udp 0 0 0.0.0.0:123 0.0.0.0:*
# udp6 0 0 :::41458 :::*

lines = netstat.split("\n")

metrics = dict.fromkeys(self.NETSTAT_GAUGE.values(), 0)
for l in lines[2:-1]:
cols = l.split()
# 0 1 2 3 4 5
try:
self.log.debug("Using `ss` to collect connection state")
# Try using `ss` for increased performance over `netstat`
for ip_version in ['4', '6']:
# Call `ss` for each IP version because there's no built-in way of distinguishing
# between the IP versions in the output
lines = get_subprocess_output(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], self.log).splitlines()
# Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
# udp UNCONN 0 0 127.0.0.1:8125 *:*
# udp ESTAB 0 0 127.0.0.1:37036 127.0.0.1:8125
# udp UNCONN 0 0 fe80::a00:27ff:fe1c:3c4:123 :::*
# tcp TIME-WAIT 0 0 90.56.111.177:56867 46.105.75.4:143
# tcp LISTEN 0 0 ::ffff:127.0.0.1:33217 ::ffff:127.0.0.1:7199
# tcp ESTAB 0 0 ::ffff:127.0.0.1:58975 ::ffff:127.0.0.1:2181

metrics = self._parse_linux_cx_state(lines[1:], self.TCP_STATES['ss'], 1, ip_version=ip_version)
# Only send the metrics which match the loop iteration's ip version
for stat, metric in self.CX_STATE_GAUGE.iteritems():
if stat[0].endswith(ip_version):
self.gauge(metric, metrics.get(metric))

except OSError:
self.log.info("`ss` not found: using `netstat` as a fallback")
lines = get_subprocess_output(["netstat", "-n", "-u", "-t", "-a"], self.log).splitlines()
# Active Internet connections (w/o servers)
# Proto Recv-Q Send-Q Local Address Foreign Address State
# tcp 0 0 46.105.75.4:80 79.220.227.193:2032 SYN_RECV
# tcp 0 0 46.105.75.4:143 90.56.111.177:56867 ESTABLISHED
if cols[0].startswith("tcp"):
protocol = ("tcp4", "tcp6")[cols[0] == "tcp6"]
if cols[5] in self.TCP_STATES:
metric = self.NETSTAT_GAUGE[protocol, self.TCP_STATES[cols[5]]]
metrics[metric] += 1
elif cols[0].startswith("udp"):
protocol = ("udp4", "udp6")[cols[0] == "udp6"]
metric = self.NETSTAT_GAUGE[protocol, 'connections']
metrics[metric] += 1

for metric, value in metrics.iteritems():
self.gauge(metric, value)
# tcp 0 0 46.105.75.4:50468 107.20.207.175:443 TIME_WAIT
# tcp6 0 0 46.105.75.4:80 93.15.237.188:58038 FIN_WAIT2
# tcp6 0 0 46.105.75.4:80 79.220.227.193:2029 ESTABLISHED
# udp 0 0 0.0.0.0:123 0.0.0.0:*
# udp6 0 0 :::41458 :::*

metrics = self._parse_linux_cx_state(lines[2:], self.TCP_STATES['netstat'], 5)
for metric, value in metrics.iteritems():
self.gauge(metric, value)

proc = open('/proc/net/dev', 'r')
try:
Expand Down Expand Up @@ -223,6 +243,24 @@ def _check_linux(self, instance):
# On Openshift, /proc/net/snmp is only readable by root
self.log.debug("Unable to read /proc/net/snmp.")

# Parse the output of the command that retrieves the connection state (either `ss` or `netstat`)
# Returns a dict metric_name -> value
def _parse_linux_cx_state(self, lines, tcp_states, state_col, ip_version=None):
metrics = dict.fromkeys(self.CX_STATE_GAUGE.values(), 0)
for l in lines:
cols = l.split()
if cols[0].startswith('tcp'):
protocol = "tcp{0}".format(ip_version) if ip_version else ("tcp4", "tcp6")[cols[0] == "tcp6"]
if cols[state_col] in tcp_states:
metric = self.CX_STATE_GAUGE[protocol, tcp_states[cols[state_col]]]
metrics[metric] += 1
elif cols[0].startswith('udp'):
protocol = "udp{0}".format(ip_version) if ip_version else ("udp4", "udp6")[cols[0] == "udp6"]
metric = self.CX_STATE_GAUGE[protocol, 'connections']
metrics[metric] += 1

return metrics

def _check_bsd(self, instance):
netstat_flags = ['-i', '-b']

Expand Down Expand Up @@ -252,7 +290,7 @@ def _check_bsd(self, instance):
# ham0 1404 seneca.loca fe80:6::7879:5ff: 30100 - 6815204 18742 - 8494811 -
# ham0 1404 2620:9b::54 2620:9b::54d:bff5 30100 - 6815204 18742 - 8494811 -

lines = netstat.split("\n")
lines = netstat.splitlines()
headers = lines[0].split()

# Given the irregular structure of the table above, better to parse from the end of each line
Expand Down Expand Up @@ -419,7 +457,7 @@ def _parse_solaris_netstat(self, netstat_output):
'oerrors':'packets_out.error',
}

lines = [l for l in netstat_output.split("\n") if len(l) > 0]
lines = [l for l in netstat_output.splitlines() if len(l) > 0]

metrics_by_interface = {}

Expand Down
16 changes: 16 additions & 0 deletions tests/checks/fixtures/network/netstat
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address Foreign Address State
tcp 0 128 0.0.0.0:6379 0.0.0.0:* LISTEN
tcp 0 128 0.0.0.0:6380 0.0.0.0:* LISTEN
tcp 0 0 127.0.0.1:80 127.0.0.1:51650 TIME_WAIT
tcp 0 0 127.0.0.1:58414 127.0.0.1:9200 TIME_WAIT
tcp 0 0 10.0.2.15:45637 10.0.2.15:9300 ESTABLISHED
tcp6 0 128 :::6380 :::* LISTEN
tcp6 0 0 127.0.0.1:58488 127.0.0.1:7199 TIME_WAIT
tcp6 0 0 127.0.0.1:42395 127.0.0.1:2181 ESTABLISHED
tcp6 0 0 127.0.0.1:58439 127.0.0.1:7199 CLOSING
udp 0 0 127.0.0.1:48135 127.0.0.1:8125 ESTABLISHED
udp 0 0 127.0.0.1:8125 0.0.0.0:*
udp6 0 0 fe80::a00:27ff:fee9:123 :::* ESTABLISHED
udp6 0 0 fe80::a00:27ff:fe1c:123 :::*
udp6 0 0 :::111 :::*
8 changes: 8 additions & 0 deletions tests/checks/fixtures/network/ss_ipv4
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
udp ESTAB 0 0 127.0.0.1:48135 127.0.0.1:8125
udp UNCONN 0 0 127.0.0.1:8125 *:*
tcp LISTEN 0 128 *:6379 *:*
tcp LISTEN 0 128 *:6380 *:*
tcp TIME-WAIT 0 0 127.0.0.1:80 127.0.0.1:51650
tcp TIME-WAIT 0 0 127.0.0.1:58414 127.0.0.1:9200
tcp ESTAB 0 0 10.0.2.15:45637 10.0.2.15:9300
8 changes: 8 additions & 0 deletions tests/checks/fixtures/network/ss_ipv6
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
udp UNCONN 0 0 :::111 :::*
udp UNCONN 0 0 fe80::a00:27ff:fe1c:3c4:123 :::*
udp ESTAB 0 0 fe80::a00:27ff:fee9:10ee:123 :::*
tcp LISTEN 0 128 :::6380 :::*
tcp TIME-WAIT 0 0 ::ffff:127.0.0.1:58488 ::ffff:127.0.0.1:7199
tcp ESTAB 0 0 ::ffff:127.0.0.1:42395 ::ffff:127.0.0.1:2181
tcp CLOSING 0 0 ::ffff:127.0.0.1:58439 ::ffff:127.0.0.1:7199
66 changes: 66 additions & 0 deletions tests/checks/mock/test_network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# 3p
import mock

# project
from tests.checks.common import AgentCheckTest, Fixtures


def ss_subprocess_mock(*args, **kwargs):
if args[0][-1] == '-4':
return Fixtures.read_file('ss_ipv4')
elif args[0][-1] == '-6':
return Fixtures.read_file('ss_ipv6')


def netstat_subprocess_mock(*args, **kwargs):
if args[0][0] == 'ss':
raise OSError
elif args[0][0] == 'netstat':
return Fixtures.read_file('netstat')


class TestCheckNetwork(AgentCheckTest):
CHECK_NAME = 'network'

def setUp(self):
self.config = {
"instances": [
{
"collect_connection_state": True
}
]
}
self.load_check(self.config)

CX_STATE_GAUGES_VALUES = {
'system.net.udp4.connections': 2,
'system.net.udp6.connections': 3,
'system.net.tcp4.established': 1,
'system.net.tcp4.opening': 0,
'system.net.tcp4.closing': 0,
'system.net.tcp4.listening': 2,
'system.net.tcp4.time_wait': 2,
'system.net.tcp6.established': 1,
'system.net.tcp6.opening': 0,
'system.net.tcp6.closing': 1,
'system.net.tcp6.listening': 1,
'system.net.tcp6.time_wait': 1,
}

@mock.patch('network.get_subprocess_output', side_effect=ss_subprocess_mock)
@mock.patch('network.Platform.is_linux', return_value=True)
def test_cx_state_linux_ss(self, mock_subprocess, mock_platform):
self.run_check({})

# Assert metrics
for metric, value in self.CX_STATE_GAUGES_VALUES.iteritems():
self.assertMetric(metric, value=value)

@mock.patch('network.get_subprocess_output', side_effect=netstat_subprocess_mock)
@mock.patch('network.Platform.is_linux', return_value=True)
def test_cx_state_linux_netstat(self, mock_subprocess, mock_platform):
self.run_check({})

# Assert metrics
for metric, value in self.CX_STATE_GAUGES_VALUES.iteritems():
self.assertMetric(metric, value=value)