Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial version of pcied #60

Merged
merged 14 commits into from
Jul 17, 2020
156 changes: 156 additions & 0 deletions sonic-pcied/scripts/pcied
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/usr/bin/env python2

"""
pcied
PCIe device monitoring daemon for SONiC
"""

try:
import os
import sys
import signal
import threading
import subprocess
jleveque marked this conversation as resolved.
Show resolved Hide resolved
from sonic_daemon_base.daemon_base import Logger
from sonic_daemon_base.daemon_base import DaemonBase
import swsssdk
except ImportError, e:
raise ImportError(str(e) + " - required module not found")

#
# Constants ====================================================================
#
SYSLOG_IDENTIFIER = "pcied"

PCIE_RESULT_REGIX = "PCIe Device Checking All Test"
jleveque marked this conversation as resolved.
Show resolved Hide resolved
PCIE_TABLE_NAME = "PCIE_STATUS"

PLATFORM_ROOT_PATH = '/usr/share/sonic/device'
PCIE_CONF_FILE = 'pcie.yaml'
SONIC_CFGGEN_PATH = '/usr/local/bin/sonic-cfggen'
HWSKU_KEY = 'DEVICE_METADATA.localhost.hwsku'
PLATFORM_KEY = 'DEVICE_METADATA.localhost.platform'

PCIED_MAIN_THREAD_SLEEP_SECS = 60
REDIS_HOSTIP = "127.0.0.1"

# Global logger class instance
logger = Logger(SYSLOG_IDENTIFIER)

#
# Helper Function ==============================================================
#


def get_platform_and_hwsku():
jleveque marked this conversation as resolved.
Show resolved Hide resolved
jleveque marked this conversation as resolved.
Show resolved Hide resolved
try:
proc = subprocess.Popen([SONIC_CFGGEN_PATH, '-H', '-v', PLATFORM_KEY],
stdout=subprocess.PIPE,
shell=False,
stderr=subprocess.STDOUT)
stdout = proc.communicate()[0]
proc.wait()
platform = stdout.rstrip('\n')

proc = subprocess.Popen([SONIC_CFGGEN_PATH, '-d', '-v', HWSKU_KEY],
stdout=subprocess.PIPE,
shell=False,
stderr=subprocess.STDOUT)
stdout = proc.communicate()[0]
proc.wait()
hwsku = stdout.rstrip('\n')
except OSError, e:
raise OSError("Cannot detect platform")

return (platform, hwsku)

#
# Daemon =======================================================================
#


class DaemonPcied(DaemonBase):
def __init__(self):
DaemonBase.__init__(self)

self.timeout = PCIED_MAIN_THREAD_SLEEP_SECS
self.stop_event = threading.Event()

self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP)
self.state_db.connect("STATE_DB")

def check_pcie_devices(self):
platform, hwsku = get_platform_and_hwsku()
pciefilePath = "/".join([PLATFORM_ROOT_PATH, platform, "plugins", PCIE_CONF_FILE])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is PCIE_CONF_FILE a plugin or a config file? If it's not a plugin I would suggest putting it under platform folder.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I'm planing to work on additional feature on sonic-pcie again. I will fix this with that.

sys.path.append(os.path.abspath(pciefilePath))
if not os.path.exists(pciefilePath):
logger.log_error("Platform pcie configuration file doesn't exist! exit pcied")
sys.exit("Platform PCIe Configuration file doesn't exist!")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest moving this check outside this function and do it before entering the loop, or it will periodically print out error msg on the platform which doesn't have this supported?


tmp_pcie_status_f = "/tmp/pcie_status_f"
os.system('sudo pcieutil pcie-check | grep "%s" > %s' % (PCIE_RESULT_REGIX, tmp_pcie_status_f))
jleveque marked this conversation as resolved.
Show resolved Hide resolved
f = open(tmp_pcie_status_f, "r")
pcie_status = f.readline()
jleveque marked this conversation as resolved.
Show resolved Hide resolved
if "PASSED" in pcie_status:
self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "PASSED")
logger.log_info("PCIe device status check : PASSED")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want it to be printed out each 60s?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it's ok to indicate the pcie status every 60 seconds. Do you have any other suggestion? I was also thinking about logging only when its status changes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO logging only on when status change is better.

else:
self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "FAILED")
logger.log_info("PCIe device status check : FAILED")

def update_state_db(self, key1, key2, value):
self.state_db.set('STATE_DB', key1, key2, value)

# Signal handler
def signal_handler(self, sig, frame):
if sig == signal.SIGHUP:
logger.log_info("Caught SIGHUP - ignoring...")
elif sig == signal.SIGINT:
logger.log_info("Caught SIGINT - exiting...")
self.stop_event.set()
elif sig == signal.SIGTERM:
logger.log_info("Caught SIGTERM - exiting...")
self.stop_event.set()
else:
logger.log_warning("Caught unhandled signal '" + sig + "'")

# Initialize daemon
def init(self):
logger.log_info("Start daemon init...")

# Deinitialize daemon
def deinit(self):
logger.log_info("Start daemon deinit...")

# Run daemon
def run(self):
logger.log_info("Starting up...")

# Start daemon initialization sequence
self.init()

# Start main loop
logger.log_info("Start daemon main loop")

while not self.stop_event.wait(self.timeout):
# Check the Pcie device status
self.check_pcie_devices()

logger.log_info("Stop daemon main loop")

# Start daemon deinitialization sequence
self.deinit()

logger.log_info("Shutting down...")

#
# Main =========================================================================
#


def main():
pcied = DaemonPcied()
pcied.run()

if __name__ == '__main__':
main()
29 changes: 29 additions & 0 deletions sonic-pcied/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from setuptools import setup

setup(
name='sonic-pcied',
version='1.0',
description='PCIe check daemon for SONiC',
license='Apache 2.0',
author='SONiC Team',
author_email='[email protected]',
url='https://github.com/Azure/sonic-platform-daemons',
maintainer='Sujin Kang',
maintainer_email='[email protected]',
scripts=[
'scripts/pcied',
],
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: No Input/Output (Daemon)',
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
'Intended Audience :: System Administrators',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
'Operating System :: POSIX :: Linux',
'Programming Language :: Python :: 2.7',
'Topic :: System :: Hardware',
],
keywords='sonic SONiC PCIe pcie PCIED pcied',
)