Skip to content

Commit

Permalink
[BFN]: Implement getting psu related sensors in sonic_platform direct…
Browse files Browse the repository at this point in the history
…ly from BMC (sonic-net#12786)

Why I did it
Platform interface doesn't provide all sensors and using it isn't effective

How I did it
Request sensors via http from BMC server and parse the result

How to verify it
Related daemon in pmon populates redis db, run this command to view the contents
  • Loading branch information
dmytroxIntel authored and pull[bot] committed Jun 6, 2024
1 parent dffae6e commit 29d2663
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 6 deletions.
4 changes: 2 additions & 2 deletions dockers/docker-platform-monitor/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ RUN apt-get update && \
RUN pip3 install grpcio==1.39.0 \
grpcio-tools==1.39.0

# Barefoot platform vendors' sonic_platform packages import the Python 'thrift' library
RUN pip3 install thrift==0.13.0
# Barefoot platform vendors' sonic_platform packages import these Python libraries
RUN pip3 install thrift==0.13.0 netifaces

# We install the libpci module in order to be able to do PCI transactions
RUN pip3 install libpci
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from netifaces import ifaddresses, AF_INET6
from subprocess import Popen, PIPE, DEVNULL
import json
import os

class Metric(object):

def __init__(self, sensor_id, sensor_key, value, label):
self._value = self.parse_value(value)
self._sensor_id = sensor_id
self._sensor_key = sensor_key
self._label = label

@classmethod
def parse_value(cls, value):
parse = getattr(cls, "parse")
return parse(value)

# For debug purposes
def __repr__(self):
return "%s, %s: %s %s [%s]" % (
self._sensor_id,
self._sensor_key,
self._value,
getattr(self, "unit", "?"),
self._label)

class Temperature(Metric):
parse = float
unit = "°C"

class FanRpm(Metric):
parse = float
unit = "RPM"

class FanFault(Metric):
parse = float

class Voltage(Metric):
parse = float
unit = "V"

class Power(Metric):
parse = float
unit = "W"

class Current(Metric):
parse = float
unit = "A"

def get_metric_value(metrics, name):
label, sensor_id, sensor_key = name.split("_")
for metric in metrics:
if metric._label == label and metric._sensor_id == sensor_id and metric._sensor_key == sensor_key:
return metric._value
return None

def get_link_local_interface():
cdc_ether_path = "/sys/bus/usb/drivers/cdc_ether"
for ether in os.listdir(cdc_ether_path):
concrete_ether = os.path.join(cdc_ether_path, ether)
if os.path.isdir(concrete_ether):
concrete_ether_net = os.path.join(concrete_ether, 'net')
if os.path.exists(concrete_ether_net):
return os.listdir(concrete_ether_net)[0]

def get_link_local_address(link_local_interface):
for addr in ifaddresses(link_local_interface)[AF_INET6]:
address = addr['addr'].split('%')[0]
# according to rfc4291 this ipv6 address is used for link local connection
if address.startswith('fe80:'):
# first address is taken for BMC and second for this host
return address[:-1] + '1'
return None

def get_psu_metrics():
link_local_interface = get_link_local_interface()
link_local_address = get_link_local_address(link_local_interface)

http_address = "http://[%s%%%s]:8080" % (link_local_address, link_local_interface)
args = "/api/sys/bmc/sensors/%20-A%20-u%20"
cmd = "curl " + http_address + args
output = Popen(cmd.split(), stdout=PIPE, stderr=DEVNULL).stdout.read()
output = json.loads(output.decode())["Information"]["Description"][0].strip()
sections = output.split("\n\n")

metrics = []
# iterating through drivers and their sensors
for section in sections:
fields = section.split("\n")

label = None
# iterating through sensors and their inputs
for field in fields[1:]: # skipping driver name
# parsing input sensor
if field.startswith(" "):
field = field.replace(" ", "")
# split sensor into name and value
field_key, field_value = field.split(": ")
if "_" in field_key:
sensor_id, sensor_key = field_key.split("_", 1)
if sensor_key == "input":
if sensor_id.startswith("temp"):
metrics.append(
Temperature(sensor_id, sensor_key, field_value, label=label))
elif sensor_id.startswith("in"):
metrics.append(
Voltage(sensor_id, sensor_key, field_value, label=label))
elif sensor_id.startswith("power"):
metrics.append(
Power(sensor_id, sensor_key, field_value, label=label))
elif sensor_id.startswith("curr"):
metrics.append(
Current(sensor_id, sensor_key, field_value, label=label))
elif sensor_id.startswith("fan"):
metrics.append(
FanRpm(sensor_id, sensor_key, field_value, label=label))
elif sensor_key == "fault":
if sensor_id.startswith("fan"):
metrics.append(
FanFault(sensor_id, sensor_key, field_value, label=label))
elif field.startswith("ERROR"):
syslog.syslog(syslog.LOG_INFO, field)
else:
label = field[:-1] # strip off trailing ":" character

return metrics
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import time
import signal
import syslog
import logging
import threading

sys.path.append(os.path.dirname(__file__))

Expand All @@ -14,12 +16,18 @@
from sonic_platform_base.psu_base import PsuBase
from sonic_platform.thermal import psu_thermals_list_get
from platform_utils import cancel_on_sigterm
from sonic_platform.bfn_extensions.psu_sensors import get_psu_metrics
from sonic_platform.bfn_extensions.psu_sensors import get_metric_value
except ImportError as e:
raise ImportError (str(e) + "- required module not found")

class Psu(PsuBase):
"""Platform-specific PSU class"""

__lock = threading.Lock()
__sensors_info = None
__timestamp = 0

sigterm = False
sigterm_default_handler = None
cls_inited = False
Expand Down Expand Up @@ -48,6 +56,20 @@ def signal_handler(cls, sig, frame):
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
cls.sigterm = True

@classmethod
def __sensors_get(cls, cached=True):
cls.__lock.acquire()
if time.time() > cls.__timestamp + 15:
# Update cache once per 15 seconds
try:
cls.__sensors_info = get_psu_metrics()
cls.__timestamp = time.time()
except Exception as e:
logging.warning("Failed to update sensors cache: " + str(e))
info = cls.__sensors_info
cls.__lock.release()
return info

'''
Units of returned info object values:
vin - V
Expand Down Expand Up @@ -105,8 +127,7 @@ def get_voltage(self):
A float number, the output voltage in volts,
e.g. 12.1
"""
info = self.__info_get()
return float(info.vout) if info else 0
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Voltage_in1_input" % self.__index)

def get_current(self):
"""
Expand All @@ -115,8 +136,24 @@ def get_current(self):
Returns:
A float number, the electric current in amperes, e.g 15.4
"""
info = self.__info_get()
return info.iout / 1000 if info else 0
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Current_curr2_input" % self.__index)

def get_input_voltage(self):
"""
Retrieves current PSU voltage input
Returns:
A float number, the input voltage in volts,
e.g. 220
"""
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Voltage_in0_input" % self.__index)

def get_input_current(self):
"""
Retrieves the input current draw of the power supply
Returns:
A float number, the electric current in amperes, e.g 0.8
"""
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Current_curr1_input" % self.__index)

def get_power(self):
"""
Expand Down

0 comments on commit 29d2663

Please sign in to comment.