Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

disk_Check: Scan & mount as RW when disk turns into Read-only #1872

Merged
merged 5 commits into from
Nov 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions scripts/disk_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
What:
There have been cases, where disk turns Read-only due to kernel bug.
In Read-only state, system blocks new remote user login via TACACS.
This utility is to check & make transient recovery as needed.

How:
check for Read-Write permission. If Read-only, create writable overlay using tmpfs.

By default "/etc" & "/home" are checked and if in Read-only state, make them Read-Write
using overlay on top of tmpfs.

Making /etc & /home as writable lets successful new remote user login.

If in Read-only state or in Read-Write state with the help of tmpfs overlay,
syslog ERR messages are written, to help raise alerts.

Monit may be used to invoke it periodically, to help scan & fix and
report via syslog.

Tidbit:
If you would like to test this script, you could simulate a RO disk
with the following command. Reboot will revert the effect.
sudo bash -c "echo u > /proc/sysrq-trigger"

"""

import argparse
import os
import sys
import syslog
import subprocess

UPPER_DIR = "/run/mount/upper"
WORK_DIR = "/run/mount/work"
MOUNTS_FILE = "/proc/mounts"

chk_log_level = syslog.LOG_ERR

def _log_msg(lvl, pfx, msg):
if lvl <= chk_log_level:
print("{}: {}".format(pfx, msg))
syslog.syslog(lvl, msg)

def log_err(m):
_log_msg(syslog.LOG_ERR, "Err", m)


def log_info(m):
_log_msg(syslog.LOG_INFO, "Info", m)


def log_debug(m):
_log_msg(syslog.LOG_DEBUG, "Debug", m)


def test_writable(dirs):
for d in dirs:
rw = os.access(d, os.W_OK)
if not rw:
log_err("{} is not read-write".format(d))
return False
else:
log_debug("{} is Read-Write".format(d))
return True


def run_cmd(cmd):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
(output, err) = p.communicate()
## Wait for end of command. Get return returncode ##
ret = p.wait()
if ret:
log_err("failed: ret={} cmd={}".format(ret, cmd))
else:
log_info("ret={} cmd: {}".format(ret, cmd))

if output:
log_info("stdout: {}".format(output.decode("utf-8")))
if err:
log_info("stderr: {}".format(err.decode("utf-8")))

return ret


def get_dname(path_name):
return os.path.basename(os.path.normpath(path_name))


def do_mnt(dirs):
if os.path.exists(UPPER_DIR):
log_err("Already mounted")
return 1

for i in (UPPER_DIR, WORK_DIR):
try:
os.mkdir(i)
except OSError as error:
log_err("Failed to create {}".format(i))
return 1

for d in dirs:
d_name = get_dname(d)
d_upper = os.path.join(UPPER_DIR, d_name)
d_work = os.path.join(WORK_DIR, d_name)
os.mkdir(d_upper)
os.mkdir(d_work)

ret = run_cmd("mount -t overlay overlay_{} -o lowerdir={},"
"upperdir={},workdir={} {}".format(
d_name, d, d_upper, d_work, d))
if ret:
break

if ret:
for i in (UPPER_DIR, WORK_DIR):
if os.path.exists(i):
ret = run_cmd("rm -rf {}".format(i))
if ret:
log_err("Failed to remove {}".format(i))

log_err("Failed to mount {} as Read-Write".format(dirs))
else:
log_info("{} are mounted as Read-Write".format(dirs))
return ret


def is_mounted(dirs):
if not os.path.exists(UPPER_DIR):
return False

onames = set()
for d in dirs:
onames.add("overlay_{}".format(get_dname(d)))

with open(MOUNTS_FILE, "r") as s:
for ln in s.readlines():
n = ln.strip().split()[0]
if n in onames:
log_debug("Mount exists for {}".format(n))
return True
return False


def do_check(skip_mount, dirs):
ret = 0
if not test_writable(dirs):
if not skip_mount:
ret = do_mnt(dirs)

# Check if mounted
if (not ret) and is_mounted(dirs):
log_err("READ-ONLY: Mounted {} to make Read-Write".format(dirs))

return ret


def main():
global chk_log_level

parser=argparse.ArgumentParser(
description="check disk for Read-Write and mount etc & home as Read-Write")
parser.add_argument('-s', "--skip-mount", action='store_true', default=False,
help="Skip mounting /etc & /home as Read-Write")
parser.add_argument('-d', "--dirs", default="/etc,/home",
help="dirs to mount")
parser.add_argument('-l', "--loglvl", default=syslog.LOG_ERR, type=int,
help="log level")
args = parser.parse_args()

chk_log_level = args.loglvl
ret = do_check(args.skip_mount, args.dirs.split(","))
return ret


if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
'scripts/db_migrator.py',
'scripts/decode-syseeprom',
'scripts/dropcheck',
'scripts/disk_check.py',
'scripts/dropconfig',
'scripts/dropstat',
'scripts/dump_nat_entries.py',
Expand Down
180 changes: 180 additions & 0 deletions tests/disk_check_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import sys
import syslog
from unittest.mock import patch
import pytest
import subprocess

sys.path.append("scripts")
import disk_check

disk_check.MOUNTS_FILE = "/tmp/proc_mounts"

test_data = {
"0": {
"desc": "All good as /tmp is read-write",
"args": ["", "-d", "/tmp"],
"err": ""
},
"1": {
"desc": "Not good as /tmpx is not read-write; But fix skipped",
"args": ["", "-d", "/tmpx", "-s"],
"err": "/tmpx is not read-write"
},
"2": {
"desc": "Not good as /tmpx is not read-write; expect mount",
"args": ["", "-d", "/tmpx"],
"upperdir": "/tmp/tmpx",
"workdir": "/tmp/tmpy",
"mounts": "overlay_tmpx blahblah",
"err": "/tmpx is not read-write|READ-ONLY: Mounted ['/tmpx'] to make Read-Write",
"cmds": ['mount -t overlay overlay_tmpx -o lowerdir=/tmpx,upperdir=/tmp/tmpx/tmpx,workdir=/tmp/tmpy/tmpx /tmpx']
},
"3": {
"desc": "Not good as /tmpx is not read-write; mount fail as create of upper fails",
"args": ["", "-d", "/tmpx"],
"upperdir": "/tmpx",
"expect_ret": 1
},
"4": {
"desc": "Not good as /tmpx is not read-write; mount fail as upper exist",
"args": ["", "-d", "/tmpx"],
"upperdir": "/tmp",
"err": "/tmpx is not read-write|Already mounted",
"expect_ret": 1
},
"5": {
"desc": "/tmp is read-write, but as well mount exists; hence report",
"args": ["", "-d", "/tmp"],
"upperdir": "/tmp",
"mounts": "overlay_tmp blahblah",
"err": "READ-ONLY: Mounted ['/tmp'] to make Read-Write"
},
"6": {
"desc": "Test another code path for good case",
"args": ["", "-d", "/tmp"],
"upperdir": "/tmp"
}
}

err_data = ""
max_log_lvl = -1
cmds = []
current_tc = None

def mount_file(d):
with open(disk_check.MOUNTS_FILE, "w") as s:
s.write(d)


def report_err_msg(lvl, m):
global err_data
global max_log_lvl

if lvl > max_log_lvl:
max_log_lvl = lvl

if lvl == syslog.LOG_ERR:
if err_data:
err_data += "|"
err_data += m


class proc:
returncode = 0
stdout = None
stderr = None

def __init__(self, proc_upd = None):
if proc_upd:
self.returncode = proc_upd.get("ret", 0)
self.stdout = proc_upd.get("stdout", None)
self.stderr = proc_upd.get("stderr", None)


def mock_subproc_run(cmd, shell, stdout):
global cmds

assert shell == True
assert stdout == subprocess.PIPE

upd = (current_tc["proc"][len(cmds)]
if len(current_tc.get("proc", [])) > len(cmds) else None)
cmds.append(cmd)

return proc(upd)


def init_tc(tc):
global err_data, cmds, current_tc

err_data = ""
cmds = []
mount_file(tc.get("mounts", ""))
current_tc = tc


def swap_upper(tc):
tmp_u = tc["upperdir"]
tc["upperdir"] = disk_check.UPPER_DIR
disk_check.UPPER_DIR = tmp_u


def swap_work(tc):
tmp_w = tc["workdir"]
tc["upperdir"] = disk_check.WORK_DIR
disk_check.WORK_DIR = tmp_w


class TestDiskCheck(object):
def setup(self):
pass


@patch("disk_check.syslog.syslog")
@patch("disk_check.subprocess.run")
def test_readonly(self, mock_proc, mock_log):
global err_data, cmds, max_log_lvl

mock_proc.side_effect = mock_subproc_run
mock_log.side_effect = report_err_msg

with patch('sys.argv', ["", "-l", "7", "-d", "/tmp"]):
disk_check.main()
assert max_log_lvl == syslog.LOG_DEBUG
max_log_lvl = -1

for i, tc in test_data.items():
print("-----------Start tc {}---------".format(i))
init_tc(tc)

with patch('sys.argv', tc["args"]):
if "upperdir" in tc:
swap_upper(tc)

if "workdir" in tc:
# restore
swap_work(tc)

ret = disk_check.main()

if "upperdir" in tc:
# restore
swap_upper(tc)

if "workdir" in tc:
# restore
swap_work(tc)

print("ret = {}".format(ret))
print("err_data={}".format(err_data))
print("cmds: {}".format(cmds))

assert ret == tc.get("expect_ret", 0)
if "err" in tc:
assert err_data == tc["err"]
assert cmds == tc.get("cmds", [])
print("-----------End tc {}-----------".format(i))


assert max_log_lvl == syslog.LOG_ERR