diff --git a/scripts/disk_check.py b/scripts/disk_check.py new file mode 100644 index 0000000000..8417f50918 --- /dev/null +++ b/scripts/disk_check.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +What: + There have been cases, where disk turns Read-only due to kernel bug. + In Read-only state, system blocks new remote user login via TACACS. + This utility is to check & make transient recovery as needed. + +How: + check for Read-Write permission. If Read-only, create writable overlay using tmpfs. + + By default "/etc" & "/home" are checked and if in Read-only state, make them Read-Write + using overlay on top of tmpfs. + + Making /etc & /home as writable lets successful new remote user login. + + If in Read-only state or in Read-Write state with the help of tmpfs overlay, + syslog ERR messages are written, to help raise alerts. + + Monit may be used to invoke it periodically, to help scan & fix and + report via syslog. + +Tidbit: + If you would like to test this script, you could simulate a RO disk + with the following command. Reboot will revert the effect. + sudo bash -c "echo u > /proc/sysrq-trigger" + +""" + +import argparse +import os +import sys +import syslog +import subprocess + +UPPER_DIR = "/run/mount/upper" +WORK_DIR = "/run/mount/work" +MOUNTS_FILE = "/proc/mounts" + +chk_log_level = syslog.LOG_ERR + +def _log_msg(lvl, pfx, msg): + if lvl <= chk_log_level: + print("{}: {}".format(pfx, msg)) + syslog.syslog(lvl, msg) + +def log_err(m): + _log_msg(syslog.LOG_ERR, "Err", m) + + +def log_info(m): + _log_msg(syslog.LOG_INFO, "Info", m) + + +def log_debug(m): + _log_msg(syslog.LOG_DEBUG, "Debug", m) + + +def test_writable(dirs): + for d in dirs: + rw = os.access(d, os.W_OK) + if not rw: + log_err("{} is not read-write".format(d)) + return False + else: + log_debug("{} is Read-Write".format(d)) + return True + + +def run_cmd(cmd): + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + (output, err) = p.communicate() + ## Wait for end of command. Get return returncode ## + ret = p.wait() + if ret: + log_err("failed: ret={} cmd={}".format(ret, cmd)) + else: + log_info("ret={} cmd: {}".format(ret, cmd)) + + if output: + log_info("stdout: {}".format(output.decode("utf-8"))) + if err: + log_info("stderr: {}".format(err.decode("utf-8"))) + + return ret + + +def get_dname(path_name): + return os.path.basename(os.path.normpath(path_name)) + + +def do_mnt(dirs): + if os.path.exists(UPPER_DIR): + log_err("Already mounted") + return 1 + + for i in (UPPER_DIR, WORK_DIR): + try: + os.mkdir(i) + except OSError as error: + log_err("Failed to create {}".format(i)) + return 1 + + for d in dirs: + d_name = get_dname(d) + d_upper = os.path.join(UPPER_DIR, d_name) + d_work = os.path.join(WORK_DIR, d_name) + os.mkdir(d_upper) + os.mkdir(d_work) + + ret = run_cmd("mount -t overlay overlay_{} -o lowerdir={}," + "upperdir={},workdir={} {}".format( + d_name, d, d_upper, d_work, d)) + if ret: + break + + if ret: + for i in (UPPER_DIR, WORK_DIR): + if os.path.exists(i): + ret = run_cmd("rm -rf {}".format(i)) + if ret: + log_err("Failed to remove {}".format(i)) + + log_err("Failed to mount {} as Read-Write".format(dirs)) + else: + log_info("{} are mounted as Read-Write".format(dirs)) + return ret + + +def is_mounted(dirs): + if not os.path.exists(UPPER_DIR): + return False + + onames = set() + for d in dirs: + onames.add("overlay_{}".format(get_dname(d))) + + with open(MOUNTS_FILE, "r") as s: + for ln in s.readlines(): + n = ln.strip().split()[0] + if n in onames: + log_debug("Mount exists for {}".format(n)) + return True + return False + + +def do_check(skip_mount, dirs): + ret = 0 + if not test_writable(dirs): + if not skip_mount: + ret = do_mnt(dirs) + + # Check if mounted + if (not ret) and is_mounted(dirs): + log_err("READ-ONLY: Mounted {} to make Read-Write".format(dirs)) + + return ret + + +def main(): + global chk_log_level + + parser=argparse.ArgumentParser( + description="check disk for Read-Write and mount etc & home as Read-Write") + parser.add_argument('-s', "--skip-mount", action='store_true', default=False, + help="Skip mounting /etc & /home as Read-Write") + parser.add_argument('-d', "--dirs", default="/etc,/home", + help="dirs to mount") + parser.add_argument('-l', "--loglvl", default=syslog.LOG_ERR, type=int, + help="log level") + args = parser.parse_args() + + chk_log_level = args.loglvl + ret = do_check(args.skip_mount, args.dirs.split(",")) + return ret + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/setup.py b/setup.py index 28ca7376fd..b21e73cec1 100644 --- a/setup.py +++ b/setup.py @@ -85,6 +85,7 @@ 'scripts/db_migrator.py', 'scripts/decode-syseeprom', 'scripts/dropcheck', + 'scripts/disk_check.py', 'scripts/dropconfig', 'scripts/dropstat', 'scripts/dump_nat_entries.py', diff --git a/tests/disk_check_test.py b/tests/disk_check_test.py new file mode 100644 index 0000000000..ce4faad900 --- /dev/null +++ b/tests/disk_check_test.py @@ -0,0 +1,180 @@ +import sys +import syslog +from unittest.mock import patch +import pytest +import subprocess + +sys.path.append("scripts") +import disk_check + +disk_check.MOUNTS_FILE = "/tmp/proc_mounts" + +test_data = { + "0": { + "desc": "All good as /tmp is read-write", + "args": ["", "-d", "/tmp"], + "err": "" + }, + "1": { + "desc": "Not good as /tmpx is not read-write; But fix skipped", + "args": ["", "-d", "/tmpx", "-s"], + "err": "/tmpx is not read-write" + }, + "2": { + "desc": "Not good as /tmpx is not read-write; expect mount", + "args": ["", "-d", "/tmpx"], + "upperdir": "/tmp/tmpx", + "workdir": "/tmp/tmpy", + "mounts": "overlay_tmpx blahblah", + "err": "/tmpx is not read-write|READ-ONLY: Mounted ['/tmpx'] to make Read-Write", + "cmds": ['mount -t overlay overlay_tmpx -o lowerdir=/tmpx,upperdir=/tmp/tmpx/tmpx,workdir=/tmp/tmpy/tmpx /tmpx'] + }, + "3": { + "desc": "Not good as /tmpx is not read-write; mount fail as create of upper fails", + "args": ["", "-d", "/tmpx"], + "upperdir": "/tmpx", + "expect_ret": 1 + }, + "4": { + "desc": "Not good as /tmpx is not read-write; mount fail as upper exist", + "args": ["", "-d", "/tmpx"], + "upperdir": "/tmp", + "err": "/tmpx is not read-write|Already mounted", + "expect_ret": 1 + }, + "5": { + "desc": "/tmp is read-write, but as well mount exists; hence report", + "args": ["", "-d", "/tmp"], + "upperdir": "/tmp", + "mounts": "overlay_tmp blahblah", + "err": "READ-ONLY: Mounted ['/tmp'] to make Read-Write" + }, + "6": { + "desc": "Test another code path for good case", + "args": ["", "-d", "/tmp"], + "upperdir": "/tmp" + } +} + +err_data = "" +max_log_lvl = -1 +cmds = [] +current_tc = None + +def mount_file(d): + with open(disk_check.MOUNTS_FILE, "w") as s: + s.write(d) + + +def report_err_msg(lvl, m): + global err_data + global max_log_lvl + + if lvl > max_log_lvl: + max_log_lvl = lvl + + if lvl == syslog.LOG_ERR: + if err_data: + err_data += "|" + err_data += m + + +class proc: + returncode = 0 + stdout = None + stderr = None + + def __init__(self, proc_upd = None): + if proc_upd: + self.returncode = proc_upd.get("ret", 0) + self.stdout = proc_upd.get("stdout", None) + self.stderr = proc_upd.get("stderr", None) + + +def mock_subproc_run(cmd, shell, stdout): + global cmds + + assert shell == True + assert stdout == subprocess.PIPE + + upd = (current_tc["proc"][len(cmds)] + if len(current_tc.get("proc", [])) > len(cmds) else None) + cmds.append(cmd) + + return proc(upd) + + +def init_tc(tc): + global err_data, cmds, current_tc + + err_data = "" + cmds = [] + mount_file(tc.get("mounts", "")) + current_tc = tc + + +def swap_upper(tc): + tmp_u = tc["upperdir"] + tc["upperdir"] = disk_check.UPPER_DIR + disk_check.UPPER_DIR = tmp_u + + +def swap_work(tc): + tmp_w = tc["workdir"] + tc["upperdir"] = disk_check.WORK_DIR + disk_check.WORK_DIR = tmp_w + + +class TestDiskCheck(object): + def setup(self): + pass + + + @patch("disk_check.syslog.syslog") + @patch("disk_check.subprocess.run") + def test_readonly(self, mock_proc, mock_log): + global err_data, cmds, max_log_lvl + + mock_proc.side_effect = mock_subproc_run + mock_log.side_effect = report_err_msg + + with patch('sys.argv', ["", "-l", "7", "-d", "/tmp"]): + disk_check.main() + assert max_log_lvl == syslog.LOG_DEBUG + max_log_lvl = -1 + + for i, tc in test_data.items(): + print("-----------Start tc {}---------".format(i)) + init_tc(tc) + + with patch('sys.argv', tc["args"]): + if "upperdir" in tc: + swap_upper(tc) + + if "workdir" in tc: + # restore + swap_work(tc) + + ret = disk_check.main() + + if "upperdir" in tc: + # restore + swap_upper(tc) + + if "workdir" in tc: + # restore + swap_work(tc) + + print("ret = {}".format(ret)) + print("err_data={}".format(err_data)) + print("cmds: {}".format(cmds)) + + assert ret == tc.get("expect_ret", 0) + if "err" in tc: + assert err_data == tc["err"] + assert cmds == tc.get("cmds", []) + print("-----------End tc {}-----------".format(i)) + + + assert max_log_lvl == syslog.LOG_ERR +