diff --git a/tests/unit/test_vmware_exporter.py b/tests/unit/test_vmware_exporter.py index 557f892..e05bc5b 100644 --- a/tests/unit/test_vmware_exporter.py +++ b/tests/unit/test_vmware_exporter.py @@ -656,8 +656,6 @@ def test_collect_hosts(): }, 'triggeredAlarmState': '', 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo': '', }, 'host:2': { 'id': 'host:2', @@ -667,8 +665,6 @@ def test_collect_hosts(): 'summary.customValue': {}, 'triggeredAlarmState': '', 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo': '', }, 'host:3': { 'id': 'host:3', @@ -690,8 +686,6 @@ def test_collect_hosts(): 'summary.customValue': {}, 'triggeredAlarmState': '', 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo': '', }, 'host:4': { 'id': 'host:4', @@ -713,8 +707,6 @@ def test_collect_hosts(): 'summary.customValue': {}, 'triggeredAlarmState': '', 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo': '', - 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo': '', }, 'host:5': { 'id': 'host:5', @@ -740,9 +732,27 @@ def test_collect_hosts(): 'triggeredAlarm:HostCPUUsageAlarm:yellow' ) ), - 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo': 'sensorInfo:OtherAlarm:red', - 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo': 'memoryStatusInfo:OtherAlarm:yellow', - 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo': 'cpuStatusInfo:OtherAlarm:yellow' + 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo': ','.join( + ( + 'numericSensorInfo:name=Fan Device 12 System Fan ' + '6B:type=fan:sensorStatus=yellow:value=821700:unitModifier=-2:unit=rpm', + 'numericSensorInfo:name=Power Supply 2 PS2 ' + 'Temperature:type=temperature:sensorStatus=green:value=2900:unitModifier=-2:unit=degrees c', + 'numericSensorInfo:name=System Board 1 VR Watchdog ' + '0:type=voltage:sensorStatus=red:value=2000:unitModifier=0:unit=volts', + 'numericSensorInfo:name=Power Supply 2 Current ' + '2:type=power:sensorStatus=green:value=20:unitModifier=-2:unit=amps', + 'numericSensorInfo:name=System Board 1 Pwr ' + 'Consumption:type=power:sensorStatus=green:value=7000:unitModifier=-2:unit=watts', + 'numericSensorInfo:name=Cooling Unit 1 Fan Redundancy ' + '0:type=power:sensorStatus=green:value=1:unitModifier=0:unit=redundancy-discrete', + 'numericSensorInfo:name=Management Controller Firmware 2 NM ' + 'Capabilities:type=other:sensorStatus=unknown:value=5:unitModifier=0:unit=unspecified', + 'cpuStatusInfo:name=CPU 1:type=n/a:sensorStatus=green:value=n/a:unitModifier=n/a:unit=n/a', + 'memoryStatusInfo:name=Memory 12:type=n/a:sensorStatus=yellow:value=n/a:unitModifier=n/a' + ':unit=n/a', + ) + ), }, }) yield collector._vmware_get_hosts(metrics) @@ -845,8 +855,8 @@ def test_collect_hosts(): } # Host:5 testing alarms - assert metrics['vmware_host_yellow_alarms'].samples[4][2] == 3 - assert metrics['vmware_host_red_alarms'].samples[4][2] == 2 + assert metrics['vmware_host_yellow_alarms'].samples[4][2] == 1 + assert metrics['vmware_host_red_alarms'].samples[4][2] == 1 assert metrics['vmware_host_yellow_alarms'].samples[4][1] == { 'cluster_name': 'cluster', @@ -854,7 +864,99 @@ def test_collect_hosts(): 'customValue2': 'n/a', 'dc_name': 'dc', 'host_name': 'host-5', - 'alarms': 'triggeredAlarm:HostCPUUsageAlarm,cpuStatusInfo:OtherAlarm,memoryStatusInfo:OtherAlarm' + 'alarms': 'triggeredAlarm:HostCPUUsageAlarm' + } + + # Host:5 testing sensors + assert len(metrics['vmware_host_sensor_state'].samples) == 9 + assert metrics['vmware_host_sensor_state'].samples[3][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'Power Supply 2 Current 2', + 'type': 'power' + } + + assert metrics['vmware_host_sensor_fan'].samples[0][2] == 8217 + assert metrics['vmware_host_sensor_fan'].samples[0][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'Fan Device 12 System Fan 6B', + } + + assert metrics['vmware_host_sensor_temperature'].samples[0][2] == 29 + assert metrics['vmware_host_sensor_temperature'].samples[0][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'Power Supply 2 PS2 Temperature', + } + + assert metrics['vmware_host_sensor_power_voltage'].samples[0][2] == 2000 + assert metrics['vmware_host_sensor_power_voltage'].samples[0][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'System Board 1 VR Watchdog 0', + } + + assert metrics['vmware_host_sensor_power_current'].samples[0][2] == 0.2 + assert metrics['vmware_host_sensor_power_current'].samples[0][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'Power Supply 2 Current 2', + } + + assert metrics['vmware_host_sensor_power_watt'].samples[0][2] == 70 + assert metrics['vmware_host_sensor_power_watt'].samples[0][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'System Board 1 Pwr Consumption', + } + + assert metrics['vmware_host_sensor_redundancy'].samples[0][2] == 1 + assert metrics['vmware_host_sensor_redundancy'].samples[0][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'Cooling Unit 1 Fan Redundancy 0', + } + + assert metrics['vmware_host_sensor_state'].samples[7][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'CPU 1', + 'type': 'n/a' + } + + assert metrics['vmware_host_sensor_state'].samples[8][1] == { + 'cluster_name': 'cluster', + 'customValue1': 'n/a', + 'customValue2': 'n/a', + 'dc_name': 'dc', + 'host_name': 'host-5', + 'name': 'Memory 12', + 'type': 'n/a' } diff --git a/vmware_exporter/helpers.py b/vmware_exporter/helpers.py index 3779afe..537d48a 100644 --- a/vmware_exporter/helpers.py +++ b/vmware_exporter/helpers.py @@ -107,55 +107,41 @@ def batch_fetch_properties(content, obj_type, properties): elif 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo' == prop.name: """ - host hardware sensors alarms + handle numericSensorInfo """ - try: - alarms = list( - 'sensorInfo:{}:{}'.format(item.name.replace(' ', ''), item.healthState.key.lower()) - for item in prop.val if item.healthState.key.lower() not in ('green', 'unknown') - ) - except Exception: - alarms = ['sensorInfo:AlarmsUnavailable:yellow'] - - properties[prop.name] = ','.join(alarms) - - elif 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo' == prop.name: - """ - cpu status info alarms - """ - try: - alarms = list( - 'cpuStatusInfo:{}:{}'.format(item.name.replace(' ', ''), item.status.key.lower()) - for item in prop.val if item.status.key.lower() not in ('green', 'unknown') + sensors = list( + 'numericSensorInfo:name={}:type={}:sensorStatus={}:value={}:unitModifier={}:unit={}'.format( + item.name, + item.sensorType, + item.healthState.key, + item.currentReading, + item.unitModifier, + item.baseUnits.lower() ) - except Exception: - alarms = ['cpuStatusInfo:AlarmsUnavailable:yellow'] - - properties[prop.name] = ','.join(alarms) + for item in prop.val + ) + properties[prop.name] = ','.join(sensors) - elif 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo' == prop.name: + elif prop.name in [ + 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', + 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo', + ]: """ - memory status info alarms + handle hardwareStatusInfo """ - try: - alarms = list( - 'memoryStatusInfo:{}:{}'.format(item.name.replace(' ', ''), item.status.key.lower()) - for item in prop.val if item.status.key.lower() not in ('green', 'unknown') + sensors = list( + 'numericSensorInfo:name={}:type={}:sensorStatus={}:value={}:unitModifier={}:unit={}'.format( + item.name, + "n/a", + item.status.key, + "n/a", + "n/a", + "n/a", ) - except Exception: - alarms = ['memoryStatusInfo:AlarmsUnavailable:yellow'] - - properties[prop.name] = ','.join(alarms) + for item in prop.val + ) + properties[prop.name] = ','.join(sensors) - # storage status info alarms - not included because they made no sense in here - # sine there are specific datastore alarms - # - # elif 'runtime.healthSystemRuntime.hardwareStatusInfo.storageStatusInfo' == prop.name: - # alarms = list( - # 'storageStatusInfo:{}:{}'.format(item.name.replace(' ',''), item.status.key.lower()) - # for item in prop.val if item.status.key.lower() not in ('green', 'unknown') - # ) - # properties[prop.name] = ','.join(alarms) else: properties[prop.name] = prop.val diff --git a/vmware_exporter/vmware_exporter.py b/vmware_exporter/vmware_exporter.py index bfafc85..706b482 100755 --- a/vmware_exporter/vmware_exporter.py +++ b/vmware_exporter/vmware_exporter.py @@ -23,6 +23,7 @@ disable annoying urllib3 warning messages for connecting to servers with non verified certificate Doh! """ from requests.packages.urllib3.exceptions import InsecureRequestWarning + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) """ @@ -51,16 +52,16 @@ class VmwareCollector(): def __init__( - self, - host, - username, - password, - collect_only, - specs_size, - fetch_custom_attributes=False, - ignore_ssl=False, - fetch_tags=False, - fetch_alarms=False + self, + host, + username, + password, + collect_only, + specs_size, + fetch_custom_attributes=False, + ignore_ssl=False, + fetch_tags=False, + fetch_alarms=False ): self.host = host @@ -261,13 +262,41 @@ def _create_metric_containers(self): 'vmware_host_hardware_info', 'A metric with a constant "1" value labeled by model and cpu model from the host.', labels=self._labelNames['hosts'] + ['hardware_model', 'hardware_cpu_model']), + 'vmware_host_sensor_state': GaugeMetricFamily( + 'vmware_host_sensor_state', + 'VMWare sensor state value (0=red / 1=yellow / 2=green / 3=unknown) labeled by sensor name and type ' + 'from the host.', + labels=self._labelNames['hosts'] + ['name', 'type']), + 'vmware_host_sensor_fan': GaugeMetricFamily( + 'vmware_host_sensor_fan', + 'VMWare sensor fan speed value in RPM labeled by sensor name from the host.', + labels=self._labelNames['hosts'] + ['name']), + 'vmware_host_sensor_temperature': GaugeMetricFamily( + 'vmware_host_sensor_temperature', + 'VMWare sensor temperature value in degree C labeled by sensor name from the host.', + labels=self._labelNames['hosts'] + ['name']), + 'vmware_host_sensor_power_voltage': GaugeMetricFamily( + 'vmware_host_sensor_power_voltage', + 'VMWare sensor power voltage value in volt labeled by sensor name from the host.', + labels=self._labelNames['hosts'] + ['name']), + 'vmware_host_sensor_power_current': GaugeMetricFamily( + 'vmware_host_sensor_power_current', + 'VMWare sensor power current value in amp labeled by sensor name from the host.', + labels=self._labelNames['hosts'] + ['name']), + 'vmware_host_sensor_power_watt': GaugeMetricFamily( + 'vmware_host_sensor_power_watt', + 'VMWare sensor power watt value in watt labeled by sensor name from the host.', + labels=self._labelNames['hosts'] + ['name']), + 'vmware_host_sensor_redundancy': GaugeMetricFamily( + 'vmware_host_sensor_redundancy', + 'VMWare sensor redundancy value (1=ok / 0=ko) labeled by sensor name from the host.', + labels=self._labelNames['hosts'] + ['name']), } """ if alarms are being retrieved, metrics have to been created here """ if self.fetch_alarms: - """ for hosts """ @@ -389,7 +418,7 @@ def collect(self): # Collect Datastore metrics if collect_only['datastores'] is True: - tasks.append(self._vmware_get_datastores(metrics,)) + tasks.append(self._vmware_get_datastores(metrics, )) if collect_only['hosts'] is True: tasks.append(self._vmware_get_hosts(metrics)) @@ -401,7 +430,7 @@ def collect(self): logging.info("Finished collecting metrics from {vsphere_host}".format(vsphere_host=vsphere_host)) - return list(metrics.values()) # noqa: F705 + return list(metrics.values()) # noqa: F705 def _to_epoch(self, my_date): """ convert to epoch time """ @@ -652,6 +681,9 @@ def host_system_inventory(self): 'summary.quickStats.overallMemoryUsage', 'summary.hardware.cpuModel', 'summary.hardware.model', + 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo', + 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', + 'runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo', ] """ @@ -668,10 +700,6 @@ def host_system_inventory(self): """ if self.fetch_alarms: properties.append('triggeredAlarmState') - properties.append('runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo') - properties.append('runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo') - properties.append('runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo') - # properties.append('runtime.healthSystemRuntime.hardwareStatusInfo.storageStatusInfo') host_systems = yield self.batch_fetch_properties( vim.HostSystem, @@ -1641,15 +1669,7 @@ def _vmware_get_hosts(self, host_metrics): filter red and yellow alarms """ if self.fetch_alarms: - - alarms = host.get('triggeredAlarmState').split(',') + \ - host.get('runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo').split(',') + \ - host.get('runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', '').split(',') + \ - host.get('runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo', '').split(',') - - # host.get('runtime.healthSystemRuntime.hardwareStatusInfo.storageStatusInfo', '').split(',') - - alarms = [a for a in alarms if ':' in a] + alarms = [a for a in host.get('triggeredAlarmState', '').split(',') if ':' in a] # Red alarms red_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'red'] @@ -1667,6 +1687,70 @@ def _vmware_get_hosts(self, host_metrics): len(yellow_alarms) ) + # Numeric Sensor Info + sensors = host.get('runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo', '').split(',') + \ + host.get('runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', '').split(',') + \ + host.get('runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo', '').split(',') + + sensors = [s for s in sensors if ':' in s] + + for s in sensors: + sensor = dict(item.split("=") for item in s.split(":")[1:]) + + sensor_status = { + 'red': 0, + 'yellow': 1, + 'green': 2, + 'unknown': 3, + }[sensor['sensorStatus']] + + host_metrics['vmware_host_sensor_state'].add_metric( + labels + [sensor['name'], sensor['type']], + sensor_status + ) + + # FAN speed + if sensor["unit"] == 'rpm': + host_metrics['vmware_host_sensor_fan'].add_metric( + labels + [sensor['name']], + int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + ) + + # Temperature + if sensor["unit"] == 'degrees c': + host_metrics['vmware_host_sensor_temperature'].add_metric( + labels + [sensor['name']], + int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + ) + + # Power Voltage + if sensor["unit"] == 'volts': + host_metrics['vmware_host_sensor_power_voltage'].add_metric( + labels + [sensor['name']], + int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + ) + + # Power Current + if sensor["unit"] == 'amps': + host_metrics['vmware_host_sensor_power_current'].add_metric( + labels + [sensor['name']], + int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + ) + + # Power Watt + if sensor["unit"] == 'watts': + host_metrics['vmware_host_sensor_power_watt'].add_metric( + labels + [sensor['name']], + int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + ) + + # Redundancy + if sensor["unit"] == 'redundancy-discrete': + host_metrics['vmware_host_sensor_redundancy'].add_metric( + labels + [sensor['name']], + int(sensor['value']) + ) + # Standby Mode standby_mode = 1 if host.get('runtime.standbyMode') == 'in' else 0 standby_mode_state = host.get('runtime.standbyMode', 'unknown') @@ -1697,7 +1781,6 @@ def _vmware_get_hosts(self, host_metrics): continue if host.get('runtime.bootTime'): - # Host uptime host_metrics['vmware_host_boot_timestamp_seconds'].add_metric( labels, @@ -1760,7 +1843,6 @@ def collect(self): class VMWareMetricsResource(Resource): - isLeaf = True def __init__(self, args): @@ -1893,7 +1975,6 @@ def generate_latest_metrics(self, request): class HealthzResource(Resource): - isLeaf = True def render_GET(self, request):