-
Notifications
You must be signed in to change notification settings - Fork 3
/
ipmi_exporter.py
81 lines (69 loc) · 2.79 KB
/
ipmi_exporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import subprocess
import itertools
import time
import logging
import os
from multiprocessing import Process, Manager
from prometheus_client import start_http_server, Summary
from prometheus_client.core import GaugeMetricFamily, REGISTRY
try:
IPS = os.getenv('TARGET_IPS').split(',')
except AttributeError:
raise Exception("Mandatory `TARGET_IPS` environment variable is not set")
IPMI_USER = os.getenv('IPMI_USER', 'ADMIN')
IPMI_PASSWD = os.getenv('IPMI_PASSWD', 'ADMIN')
REQURED = [
"CPU1 Temp",
"System Temp",
"FAN1"
]
# Create a metric to track time spent and requests made.
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')
def _run_cmd(ip, raw):
logging.info("Collecting from target %s", ip)
proc = subprocess.Popen(["ipmitool",
"-H", ip,
"-U", IPMI_USER,
"-P", IPMI_PASSWD,
"sdr"], stdout=subprocess.PIPE)
out = proc.communicate()[0]
raw += [x.rstrip() for x in out.split('|')]
class IpmiCollector(object):
@REQUEST_TIME.time()
def collect(self):
sys_metrics = {
'cpu_temp': GaugeMetricFamily('ipmi_cpu_temp', 'CPU temp', labels=['ip']),
'system_temp': GaugeMetricFamily('ipmi_system_temp', 'System temp', labels=['ip']),
'fan_speed': GaugeMetricFamily('ipmi_fan_speed', 'Fan speed', labels=['ip'])
}
raw = Manager().list([])
for ip in IPS:
# This is an attempt to run the `ipmi` tool in parallel
# to avoid timeouts in Prometheus
p = Process(target=_run_cmd, args=(ip, raw))
logging.info("Start collecting the metrics")
p.start()
p.join()
all_metrics = dict(itertools.izip_longest(*[iter(raw)] * 2, fillvalue=""))
for k, v in all_metrics.items():
for r in REQURED:
if r in k:
value = [int(s) for s in v.split() if s.isdigit()][0]
if 'CPU' in k:
sys_metrics['cpu_temp'].add_metric([ip], value)
elif 'System' in k:
sys_metrics['system_temp'].add_metric([ip], value)
elif 'FAN' in k:
sys_metrics['fan_speed'].add_metric([ip], value)
else:
logging.error("Undefined metric: %s", k)
for metric in sys_metrics.values():
yield metric
def main():
REGISTRY.register(IpmiCollector())
start_http_server(8000)
while True:
time.sleep(5)
if __name__ == "__main__":
logging.basicConfig(format='ts=%(asctime)s level=%(levelname)s msg=%(message)s', level=logging.DEBUG)
main()