Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support XDR metrics for Aerospike Enterprise 5.0+ #8696

Merged
merged 19 commits into from
Apr 6, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions aerospike/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ files:
- migrate_rx_objs
- migrate_tx_objs
- name: namespaces
description: all namespaces are collected by default. If you want to collect specific namespaces specify them here.
description: All namespaces are collected by default. If you want to collect specific namespaces specify them here.
value:
type: array
items:
Expand All @@ -81,7 +81,11 @@ files:
- example_namespace
- another_example_namespace
- name: datacenters
description: all datacenters are collected by default. If you want to collect specific datacenters specify them here.
description: |
For Aerospike v4 or lower, all datacenters are collected by default.
If you want to collect specific datacenters specify them here.

Aerospike v5 or higher, you must specify a list of datacenters to monitor XDR metrics.
value:
type: array
items:
Expand Down
65 changes: 55 additions & 10 deletions aerospike/datadog_checks/aerospike/aerospike.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
DATACENTER_SERVICE_CHECK_CONNECT = '%s.datacenter.can_connect' % SOURCE_TYPE_NAME
CLUSTER_METRIC_TYPE = SOURCE_TYPE_NAME
DATACENTER_METRIC_TYPE = '%s.datacenter' % SOURCE_TYPE_NAME
XDR_DATACENTER_METRIC_TYPE = '%s.xdr_dc' % SOURCE_TYPE_NAME
NAMESPACE_METRIC_TYPE = '%s.namespace' % SOURCE_TYPE_NAME
NAMESPACE_TPS_METRIC_TYPE = '%s.namespace.tps' % SOURCE_TYPE_NAME
NAMESPACE_LATENCY_METRIC_TYPE = '%s.namespace.latency' % SOURCE_TYPE_NAME
Expand Down Expand Up @@ -150,22 +151,24 @@ def check(self, _):
self.log.debug("Could not determine version, assuming Aerospike v5.1")
version = V5_1

# Handle metric compatibility for latency/throughput
if version < V5_1:
self.collect_throughput(namespaces)
self.collect_latency(namespaces)

if version < V5_0:
try:
datacenters = self.get_datacenters()

for dc in datacenters:
self.collect_datacenter(dc)

except Exception as e:
self.log.debug("There were no datacenters found: %s", e)
else:
self.collect_latencies(namespaces)

# Handle metric compatibility for xdr/dc
if version >= V5_0:
self.collect_xdr()
else:
try:
datacenters = self.get_datacenters()
for dc in datacenters:
self.collect_datacenter(dc)
except Exception as e:
self.log.debug("There were no datacenters found: %s", e)

self.service_check(SERVICE_CHECK_UP, self.OK, tags=self._tags)

def collect_version(self):
Expand Down Expand Up @@ -226,6 +229,46 @@ def get_datacenters(self):

return datacenters

def collect_xdr(self):
"""
XDR metrics are available from the get-stats command as of Aerospike 5.0.0

https://www.aerospike.com/docs/reference/info/#get-stats
"""
if self._required_datacenters:
for dc in self._required_datacenters:
datacenter_tags = ['datacenter:{}'.format(dc)]
data = self.get_info('get-stats:context=xdr;dc={}'.format(dc), separator=None)
if not data:
self.log.debug("Got invalid data for dc %s", dc)
continue
self.log.debug("Got data for dc `%s`: %s", dc, data)
parsed_data = data.split("\n")
tags = list()
for line in parsed_data:
line = line.strip()
if line:
if 'returned' in line:
# Parse remote dc host and port from
# `ip-10-10-17-247.ec2.internal:3000 (10.10.17.247) returned:`
remote_dc = line.split(" (")[0].split(":")
tags = [
'remote_dc_host:{}'.format(remote_dc[0]),
'remote_dc_port:{}'.format(remote_dc[1]),
] + datacenter_tags
else:
# Parse metrics from
# lag=0;in_queue=0;in_progress=0;success=98344698;abandoned=0;not_found=0;filtered_out=0;...
xdr_metrics = line.split(';')
self.log.debug("For dc host tags %s, got: %s", tags, xdr_metrics)
for item in xdr_metrics:
metric = item.split('=')
key = metric[0]
value = metric[1]
self.send(XDR_DATACENTER_METRIC_TYPE, key, value, tags)
else:
self.log.debug("No datacenters were specified to collect XDR metrics: %s", self._required_datacenters)

def get_client(self):
client_config = {'hosts': [self._host]}
if self._tls_config:
Expand Down Expand Up @@ -255,11 +298,13 @@ def get_info(self, command, separator=';'):
except Exception as e:
self.log.warning("Command `%s` was unsuccessful: %s", command, str(e))
return []

# Get rid of command and whitespace
data = data[len(command) :].strip()

if not separator:
return data

if not data:
return []

Expand Down
7 changes: 5 additions & 2 deletions aerospike/datadog_checks/aerospike/data/conf.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,17 @@ instances:
# - migrate_tx_objs

## @param namespaces - list of strings - optional
## all namespaces are collected by default. If you want to collect specific namespaces specify them here.
## All namespaces are collected by default. If you want to collect specific namespaces specify them here.
#
# namespaces:
# - example_namespace
# - another_example_namespace

## @param datacenters - list of strings - optional
## all datacenters are collected by default. If you want to collect specific datacenters specify them here.
## For Aerospike v4 or lower, all datacenters are collected by default.
## If you want to collect specific datacenters specify them here.
##
## Aerospike v5 or higher, you must specify a list of datacenters to monitor XDR metrics.
#
# datacenters:
# - example_datacenter
Expand Down
25 changes: 25 additions & 0 deletions aerospike/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@
'dc_as_size=2',
]

MOCK_XDR_DATACENTER_METRICS = """
ip-10-10-17-247.ec2.internal:3000 (10.10.17.247) returned:\n
lag=0;in_queue=0;in_progress=0;success=98344698;abandoned=0;not_found=0;filtered_out=0;retry_no_node=0;retry_conn_reset=775483;retry_dest=0;recoveries=293;recoveries_pending=0;hot_keys=20291210;uncompressed_pct=0.000;compression_ratio=1.000;throughput=0;latency_ms=17;lap_us=348
"""

DATACENTER_METRICS = [
'aerospike.datacenter.dc_timelag',
Expand All @@ -123,3 +127,24 @@
'aerospike.datacenter.dc_as_open_conn',
'aerospike.datacenter.dc_as_size',
]

XDR_DC_METRICS = [
'aerospike.xdr_dc.lag',
'aerospike.xdr_dc.in_queue',
'aerospike.xdr_dc.in_progress',
'aerospike.xdr_dc.success',
'aerospike.xdr_dc.abandoned',
'aerospike.xdr_dc.not_found',
'aerospike.xdr_dc.filtered_out',
'aerospike.xdr_dc.retry_no_node',
'aerospike.xdr_dc.retry_conn_reset',
'aerospike.xdr_dc.retry_dest',
'aerospike.xdr_dc.recoveries',
'aerospike.xdr_dc.recoveries_pending',
'aerospike.xdr_dc.hot_keys',
'aerospike.xdr_dc.uncompressed_pct',
'aerospike.xdr_dc.compression_ratio',
'aerospike.xdr_dc.throughput',
'aerospike.xdr_dc.latency_ms',
'aerospike.xdr_dc.lap_us',
]
18 changes: 18 additions & 0 deletions aerospike/tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,24 @@ def mock_get_info(command, separator=";"):
aggregator.assert_metric(metric)


def test_xdr_metrics(aggregator):
check = AerospikeCheck('aerospike', {}, [common.INSTANCE])
check.get_info = mock.MagicMock(
return_value="ip-10-10-17-247.ec2.internal:3000 (10.10.17.247) returned:\nlag=0;in_queue=0;in_progress=0;"
"success=98344698;abandoned=0;not_found=0;filtered_out=0;retry_no_node=0;retry_conn_reset=775483;"
"retry_dest=0;recoveries=293;recoveries_pending=0;hot_keys=20291210;uncompressed_pct=0.000;"
"compression_ratio=1.000;throughput=0;latency_ms=17;lap_us=348 \n\nip-10-10-17-144.ec2.internal"
":3000 (10.10.17.144) returned:\nlag=0;in_queue=0;in_progress=0;success=98294822;abandoned=0;"
"not_found=0;filtered_out=0;retry_no_node=0;retry_conn_reset=813513;retry_dest=0;recoveries=293;"
"recoveries_pending=0;hot_keys=20286479;uncompressed_pct=0.000;compression_ratio=1.000;"
"throughput=0;latency_ms=14;lap_us=232\n\n"
)
check.collect_xdr()

for metric in common.XDR_DC_METRICS:
aggregator.assert_metric(metric)


def test_connection_uses_tls():
instance = copy.deepcopy(common.INSTANCE)
tls_config = {'cafile': 'my-ca-file', 'certfile': 'my-certfile', 'keyfile': 'my-keyfile'}
Expand Down