Skip to content

Commit

Permalink
disk_check: Check & mount RO as RW using tmpfs (sonic-net#1569)
Browse files Browse the repository at this point in the history
What I did
There is a bug that occasionally turn root-overlay as RO. This makes /etc & /home as RO. This blocks any new remote user login, as that needs to write into /etc & /home.

This tool scans /etc & /home (or given dirs) as in RW or RO state. If RO, it could create a writable overlay using tmpfs.
This is transient and stays until next reboot. Any write after the overlay will be lost upon reboot.

But this allows new remote users login.

How I did it
Create upper & work dirs in /run/mount (tmpfs). Mount /etc & /home as lowerdirs and use the same name for final merge. This allows anyone opening a file in /etc or /home to operate on the merged overlay, transparently.

How to verify it
Mount any dir on tmpfs ( mount -t tmpfs tmpfs test_dir)
remount as RO (mount -o remount,ro test_dir)
Pass that dir to this script. (disk_check.py -d ./test_dir)
Now it should be RW
  • Loading branch information
renukamanavalan committed Oct 11, 2021
1 parent 3b78032 commit 82b02ec
Show file tree
Hide file tree
Showing 3 changed files with 313 additions and 0 deletions.
151 changes: 151 additions & 0 deletions scripts/disk_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
What:
There have been cases, where disk turns Read-only due to kernel bug.
In Read-only state, system blocks new remote user login via TACACS.
This utility is to check & make transient recovery as needed.
How:
check for Read-Write permission. If Read-only, create writable overlay using tmpfs.
By default "/etc" & "/home" are checked and if in Read-only state, make them Read-Write
using overlay on top of tmpfs.
Making /etc & /home as writable lets successful new remote user login.
If in Read-only state or in Read-Write state with the help of tmpfs overlay,
syslog ERR messages are written, to help raise alerts.
Monit may be used to invoke it periodically, to help scan & fix and
report via syslog.
"""

import argparse
import os
import sys
import syslog
import subprocess

UPPER_DIR = "/run/mount/upper"
WORK_DIR = "/run/mount/work"
MOUNTS_FILE = "/proc/mounts"

def log_err(m):
print("Err: {}".format(m), file=sys.stderr)
syslog.syslog(syslog.LOG_ERR, m)


def log_info(m):
print("Info: {}".format(m))
syslog.syslog(syslog.LOG_INFO, m)


def log_debug(m):
print("debug: {}".format(m))
syslog.syslog(syslog.LOG_DEBUG, m)


def test_writable(dirs):
for d in dirs:
rw = os.access(d, os.W_OK)
if not rw:
log_err("{} is not read-write".format(d))
return False
else:
log_debug("{} is Read-Write".format(d))
return True


def run_cmd(cmd):
proc = subprocess.run(cmd, shell=True, text=True, capture_output=True)
ret = proc.returncode
if ret:
log_err("failed: ret={} cmd={}".format(ret, cmd))
else:
log_info("ret={} cmd: {}".format(ret, cmd))

if proc.stdout:
log_info("stdout: {}".format(str(proc.stdout)))
if proc.stderr:
log_info("stderr: {}".format(str(proc.stderr)))
return ret


def get_dname(path_name):
return os.path.basename(os.path.normpath(path_name))


def do_mnt(dirs):
if os.path.exists(UPPER_DIR):
log_err("Already mounted")
return 1

for i in (UPPER_DIR, WORK_DIR):
try:
os.mkdir(i)
except OSError as error:
log_err("Failed to create {}".format(i))
return 1

for d in dirs:
ret = run_cmd("mount -t overlay overlay_{} -o lowerdir={},"
"upperdir={},workdir={} {}".format(
get_dname(d), d, UPPER_DIR, WORK_DIR, d))
if ret:
break

if ret:
log_err("Failed to mount {} as Read-Write".format(dirs))
else:
log_info("{} are mounted as Read-Write".format(dirs))
return ret


def is_mounted(dirs):
if not os.path.exists(UPPER_DIR):
return False

onames = set()
for d in dirs:
onames.add("overlay_{}".format(get_dname(d)))

with open(MOUNTS_FILE, "r") as s:
for ln in s.readlines():
n = ln.strip().split()[0]
if n in onames:
log_debug("Mount exists for {}".format(n))
return True
return False


def do_check(skip_mount, dirs):
ret = 0
if not test_writable(dirs):
if not skip_mount:
ret = do_mnt(dirs)

# Check if mounted
if (not ret) and is_mounted(dirs):
log_err("READ-ONLY: Mounted {} to make Read-Write".format(dirs))

return ret


def main():
parser=argparse.ArgumentParser(
description="check disk for Read-Write and mount etc & home as Read-Write")
parser.add_argument('-s', "--skip-mount", action='store_true', default=False,
help="Skip mounting /etc & /home as Read-Write")
parser.add_argument('-d', "--dirs", default="/etc,/home",
help="dirs to mount")
args = parser.parse_args()

ret = do_check(args.skip_mount, args.dirs.split(","))
return ret


if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
'scripts/db_migrator.py',
'scripts/decode-syseeprom',
'scripts/dropcheck',
'scripts/disk_check.py',
'scripts/dropconfig',
'scripts/dropstat',
'scripts/dump_nat_entries.py',
Expand Down
161 changes: 161 additions & 0 deletions tests/disk_check_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import sys
import syslog
from unittest.mock import patch
import pytest

sys.path.append("scripts")
import disk_check

disk_check.MOUNTS_FILE = "/tmp/proc_mounts"

test_data = {
"0": {
"desc": "All good as /tmp is read-write",
"args": ["", "-d", "/tmp"],
"err": ""
},
"1": {
"desc": "Not good as /tmpx is not read-write; But fix skipped",
"args": ["", "-d", "/tmpx", "-s"],
"err": "/tmpx is not read-write"
},
"2": {
"desc": "Not good as /tmpx is not read-write; expect mount",
"args": ["", "-d", "/tmpx"],
"upperdir": "/tmp/tmpx",
"workdir": "/tmp/tmpy",
"mounts": "overlay_tmpx blahblah",
"err": "/tmpx is not read-write|READ-ONLY: Mounted ['/tmpx'] to make Read-Write",
"cmds": ['mount -t overlay overlay_tmpx -o lowerdir=/tmpx,upperdir=/tmp/tmpx,workdir=/tmp/tmpy /tmpx']
},
"3": {
"desc": "Not good as /tmpx is not read-write; mount fail as create of upper fails",
"args": ["", "-d", "/tmpx"],
"upperdir": "/tmpx",
"expect_ret": 1
},
"4": {
"desc": "Not good as /tmpx is not read-write; mount fail as upper exist",
"args": ["", "-d", "/tmpx"],
"upperdir": "/tmp",
"err": "/tmpx is not read-write|Already mounted",
"expect_ret": 1
},
"5": {
"desc": "/tmp is read-write, but as well mount exists; hence report",
"args": ["", "-d", "/tmp"],
"upperdir": "/tmp",
"mounts": "overlay_tmp blahblah",
"err": "READ-ONLY: Mounted ['/tmp'] to make Read-Write"
},
"6": {
"desc": "Test another code path for good case",
"args": ["", "-d", "/tmp"],
"upperdir": "/tmp"
}
}

err_data = ""
cmds = []
current_tc = None

def mount_file(d):
with open(disk_check.MOUNTS_FILE, "w") as s:
s.write(d)


def report_err_msg(lvl, m):
global err_data
if lvl == syslog.LOG_ERR:
if err_data:
err_data += "|"
err_data += m


class proc:
returncode = 0
stdout = None
stderr = None

def __init__(self, proc_upd = None):
if proc_upd:
self.returncode = proc_upd.get("ret", 0)
self.stdout = proc_upd.get("stdout", None)
self.stderr = proc_upd.get("stderr", None)


def mock_subproc_run(cmd, shell, text, capture_output):
global cmds

upd = (current_tc["proc"][len(cmds)]
if len(current_tc.get("proc", [])) > len(cmds) else None)
cmds.append(cmd)

return proc(upd)


def init_tc(tc):
global err_data, cmds, current_tc

err_data = ""
cmds = []
mount_file(tc.get("mounts", ""))
current_tc = tc


def swap_upper(tc):
tmp_u = tc["upperdir"]
tc["upperdir"] = disk_check.UPPER_DIR
disk_check.UPPER_DIR = tmp_u


def swap_work(tc):
tmp_w = tc["workdir"]
tc["upperdir"] = disk_check.WORK_DIR
disk_check.WORK_DIR = tmp_w


class TestDiskCheck(object):
def setup(self):
pass


@patch("disk_check.syslog.syslog")
@patch("disk_check.subprocess.run")
def test_readonly(self, mock_proc, mock_log):
global err_data, cmds

mock_proc.side_effect = mock_subproc_run
mock_log.side_effect = report_err_msg

for i, tc in test_data.items():
print("-----------Start tc {}---------".format(i))
init_tc(tc)

with patch('sys.argv', tc["args"]):
if "upperdir" in tc:
swap_upper(tc)

if "workdir" in tc:
# restore
swap_work(tc)

ret = disk_check.main()

if "upperdir" in tc:
# restore
swap_upper(tc)

if "workdir" in tc:
# restore
swap_work(tc)

print("ret = {}".format(ret))
print("err_data={}".format(err_data))
print("cmds: {}".format(cmds))

assert ret == tc.get("expect_ret", 0)
if "err" in tc:
assert err_data == tc["err"]
assert cmds == tc.get("cmds", [])
print("-----------End tc {}-----------".format(i))

0 comments on commit 82b02ec

Please sign in to comment.