Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script to periodically update oper status of management interface #21245

Merged
merged 9 commits into from
Jan 9, 2025
2 changes: 2 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ sudo cp $IMAGE_CONFIGS/monit/arp_update_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/arp_update_checker
sudo cp $IMAGE_CONFIGS/monit/control_plane_drop_check $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/control_plane_drop_check
sudo cp $IMAGE_CONFIGS/monit/mgmt_oper_status $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mgmt_oper_status

# Installed smartmontools version should match installed smartmontools in docker-platform-monitor Dockerfile
# TODO: are mismatching versions fine for bookworm?
Expand Down
5 changes: 5 additions & 0 deletions files/image_config/monit/conf.d/sonic-host
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,8 @@ check program arp_update_checker with path "/usr/bin/arp_update_checker" every 1
check program controlPlaneDropCheck with path "/usr/bin/control_plane_drop_check"
every 5 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles

# Periodically update oper status of mgmt interface in STATE_DB
check program mgmtOperStatus with path "/usr/bin/mgmt_oper_status"
every 1 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have we tested out config relolad/minigraph scenario. Are we not getting monit error as that can impact nightly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verified reload/reboot, did not see the monit log message, is there any other specific concern on why this error might get logged?

39 changes: 39 additions & 0 deletions files/image_config/monit/mgmt_oper_status
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env python3

"""
"""

import docker
import sys
import subprocess
import syslog

from sonic_py_common import multi_asic, device_info
from swsscommon.swsscommon import SonicV2Connector


def main():
db = SonicV2Connector(use_unix_socket_path=True)
db.connect('CONFIG_DB')
db.connect('STATE_DB')
mgmt_ports_keys = db.keys(db.CONFIG_DB, 'MGMT_PORT|*' )
if not mgmt_ports_keys:
syslog.syslog(syslog.LOG_DEBUG, 'No management interface found')
else:
try:
mgmt_ports = [key.split('MGMT_PORT|')[-1] for key in mgmt_ports_keys]
for port in mgmt_ports:
state_db_key = "MGMT_PORT_TABLE|{}".format(port)
# Reset status of mgmt port before updating with latest status
db.set(db.STATE_DB, state_db_key, 'oper_status', 'unknown')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do a read first and only when there is a change of oper state, the log there is a change whether it be from UP to Down or Down to UP transition.
Let's not reset it to unknown as we want to avoid making any changes unless there is a need to do so (state change detected). For the state cahnge from UP to Down, let.s make it as a warning. For state change from down to up let's make it as a INFO level syslog.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modified as suggested

port_operstate_path = '/sys/class/net/{}/operstate'.format(port)
oper_status = subprocess.run(['cat', port_operstate_path], capture_output=True, text=True)
db.set(db.STATE_DB, state_db_key, 'oper_status', oper_status.stdout.strip())
syslog.syslog(syslog.LOG_INFO, "mgmt_oper_status_check: {}".format(oper_status.stdout.strip()))
except Exception as e:
syslog.syslog(syslog.LOG_ERR, "mgmt_oper_status_check exception : {}".format(str(e)))


if __name__ == "__main__":
main()
sys.exit(0)
60 changes: 60 additions & 0 deletions files/image_config/monit/tests/test_mgmt_oper_status_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
from unittest.mock import patch, MagicMock
import subprocess
import syslog
import sys
import mgmt_oper_status

class TestMgmtOperStatusCheck(unittest.TestCase):

@patch('your_script_name.SonicV2Connector')
@patch('your_script_name.subprocess.run')
@patch('your_script_name.syslog.syslog')
def test_main_no_mgmt_ports(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mock_db.keys.return_value = []

mgmt_oper_status.main()

mock_syslog.assert_called_with(syslog.LOG_DEBUG, 'No management interface found')

@patch('your_script_name.SonicV2Connector')
@patch('your_script_name.subprocess.run')
@patch('your_script_name.syslog.syslog')
def test_main_with_mgmt_ports(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0', 'MGMT_PORT|eth1']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.return_value = subprocess.CompletedProcess(args=['cat', '/sys/class/net/eth0/operstate'], returncode=0, stdout='up', stderr='')

mgmt_oper_status.main()

mock_syslog.assert_any_call(syslog.LOG_INFO, 'mgmt_oper_status: up')
mock_syslog.assert_any_call(syslog.LOG_INFO, 'mgmt_oper_status: up')

mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'up')
mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth1', 'oper_status', 'up')

@patch('your_script_name.SonicV2Connector')
@patch('your_script_name.subprocess.run')
@patch('your_script_name.syslog.syslog')
def test_main_exception_handling(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.side_effect = Exception("File not found")

mgmt_oper_status.main()

mock_syslog.assert_called_with(syslog.LOG_ERR, "mgmt_oper_status exception : File not found")
mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'unknown')

if __name__ == '__main__':
unittest.main()
Loading