-
Notifications
You must be signed in to change notification settings - Fork 814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add service check to the SNMP check #1236
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,9 @@ def reply_invalid(oid): | |
class SnmpCheck(AgentCheck): | ||
|
||
cmd_generator = None | ||
# pysnmp default values | ||
RETRIES = 5 | ||
TIMEOUT = 1 | ||
|
||
def __init__(self, name, init_config, agentConfig, instances=None): | ||
AgentCheck.__init__(self, name, init_config, agentConfig, instances) | ||
|
@@ -91,15 +94,15 @@ def get_auth_data(cls, instance): | |
raise Exception("An authentication method needs to be provided") | ||
|
||
@classmethod | ||
def get_transport_target(cls, instance): | ||
def get_transport_target(cls, instance, timeout, retries): | ||
''' | ||
Generate a Transport target object based on the instance's configuration | ||
''' | ||
if "ip_address" not in instance: | ||
raise Exception("An IP address needs to be specified") | ||
ip_address = instance["ip_address"] | ||
port = instance.get("port", 161) # Default SNMP port | ||
return cmdgen.UdpTransportTarget((ip_address, port)) | ||
return cmdgen.UdpTransportTarget((ip_address, port), timeout=timeout, retries=retries) | ||
|
||
def check_table(self, instance, oids, lookup_names): | ||
''' | ||
|
@@ -112,7 +115,7 @@ def check_table(self, instance, oids, lookup_names): | |
dict[oid/metric_name][row index] = value | ||
In case of scalar objects, the row index is just 0 | ||
''' | ||
transport_target = self.get_transport_target(instance) | ||
transport_target = self.get_transport_target(instance, self.TIMEOUT, self.RETRIES) | ||
auth_data = self.get_auth_data(instance) | ||
|
||
snmp_command = self.cmd_generator.nextCmd | ||
|
@@ -126,12 +129,16 @@ def check_table(self, instance, oids, lookup_names): | |
|
||
results = defaultdict(dict) | ||
if error_indication: | ||
raise Exception("{0} for instance {1}".format(error_indication, | ||
instance["ip_address"])) | ||
message = "{0} for instance {1}".format(error_indication, | ||
instance["ip_address"]) | ||
instance["service_check_error"] = message | ||
raise Exception(message) | ||
else: | ||
if error_status: | ||
self.log.warning("{0} for instance {1}".format(error_status.prettyPrint(), | ||
instance["ip_address"])) | ||
message = "{0} for instance {1}".format(error_status.prettyPrint(), | ||
instance["ip_address"]) | ||
instance["service_check_error"] = message | ||
self.log.warning(message) | ||
else: | ||
for table_row in var_binds: | ||
for result_oid, value in table_row: | ||
|
@@ -177,16 +184,29 @@ def check(self, instance): | |
raw_oids.append(metric['OID']) | ||
else: | ||
raise Exception('Unsupported metric in config file: %s' % metric) | ||
|
||
if table_oids: | ||
self.log.debug("Querying device %s for %s oids", ip_address, len(table_oids)) | ||
table_results = self.check_table(instance, table_oids, True) | ||
self.report_table_metrics(instance, table_results) | ||
|
||
if raw_oids: | ||
self.log.debug("Querying device %s for %s oids", ip_address, len(raw_oids)) | ||
raw_results = self.check_table(instance, raw_oids, False) | ||
self.report_raw_metrics(instance, raw_results) | ||
try: | ||
if table_oids: | ||
self.log.debug("Querying device %s for %s oids", ip_address, len(table_oids)) | ||
table_results = self.check_table(instance, table_oids, True) | ||
self.report_table_metrics(instance, table_results) | ||
|
||
if raw_oids: | ||
self.log.debug("Querying device %s for %s oids", ip_address, len(raw_oids)) | ||
raw_results = self.check_table(instance, raw_oids, False) | ||
self.report_raw_metrics(instance, raw_results) | ||
except Exception as e: | ||
if "service_check_error" not in instance: | ||
instance["service_check_error"] = "Fail to collect metrics: {0}".format(e) | ||
raise | ||
finally: | ||
# Report service checks | ||
service_check_name = "snmp.can_check" | ||
tags = ["snmp_device:%s" % ip_address] | ||
if "service_check_error" in instance: | ||
self.service_check(service_check_name, AgentCheck.CRITICAL, tags=tags, | ||
message=instance["service_check_error"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we look at line 140/141, we can set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kept the previous logic around error_indication and error_status, however in both case I'll return a ERROR service check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You sneaky deepcopy 🙈 |
||
else: | ||
self.service_check(service_check_name, AgentCheck.OK, tags=tags) | ||
|
||
def report_raw_metrics(self, instance, results): | ||
''' | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you set this variable as a class-level variable as it's done for the other checks for consistency