Skip to content

Commit

Permalink
[config] Adding sanity checks for config reload (#1664)
Browse files Browse the repository at this point in the history
Fixed config reload to add some system sanity checks

1) To check if the system is in running state
2) Check if the services which are grouped under delayed target up
3) Check if swss is running for at least 120 seconds
To force config reload and to avoid these checks an extra option -f/--force is added
  • Loading branch information
dgsudharsan authored Jun 21, 2021
1 parent 2cdadb5 commit 9041ba0
Showing 1 changed file with 41 additions and 1 deletion.
42 changes: 41 additions & 1 deletion config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,32 @@ def _restart_services():
click.echo("Reloading Monit configuration ...")
clicommon.run_command("sudo monit reload")

def _get_delay_timers():
out = clicommon.run_command("systemctl list-dependencies sonic-delayed.target --plain |sed '1d'", return_cmd=True)
return [timer.strip() for timer in out.splitlines()]

def _delay_timers_elapsed():
for timer in _get_delay_timers():
out = clicommon.run_command("systemctl show {} --property=LastTriggerUSecMonotonic --value".format(timer), return_cmd=True)
if out.strip() == "0":
return False
return True

def _swss_ready():
out = clicommon.run_command("systemctl show swss.service --property ActiveState --value", return_cmd=True)
if out.strip() != "active":
return False
out = clicommon.run_command("systemctl show swss.service --property ActiveEnterTimestampMonotonic --value", return_cmd=True)
swss_up_time = float(out.strip())/1000000
now = time.monotonic()
if (now - swss_up_time > 120):
return True
else:
return False

def _system_running():
out = clicommon.run_command("sudo systemctl is-system-running", return_cmd=True)
return out.strip() == "running"

def interface_is_in_vlan(vlan_member_table, interface_name):
""" Check if an interface is in a vlan """
Expand Down Expand Up @@ -1190,12 +1216,26 @@ def list_checkpoints(ctx, verbose):
@click.option('-l', '--load-sysinfo', is_flag=True, help='load system default information (mac, portmap etc) first.')
@click.option('-n', '--no_service_restart', default=False, is_flag=True, help='Do not restart docker services')
@click.option('-d', '--disable_arp_cache', default=False, is_flag=True, help='Do not cache ARP table before reloading (applies to dual ToR systems only)')
@click.option('-f', '--force', default=False, is_flag=True, help='Force config reload without system checks')
@click.argument('filename', required=False)
@clicommon.pass_db
def reload(db, filename, yes, load_sysinfo, no_service_restart, disable_arp_cache):
def reload(db, filename, yes, load_sysinfo, no_service_restart, disable_arp_cache, force):
"""Clear current configuration and import a previous saved config DB dump file.
<filename> : Names of configuration file(s) to load, separated by comma with no spaces in between
"""
if not force and not no_service_restart:
if not _system_running():
click.echo("System is not up. Retry later or use -f to avoid system checks")
return

if not _delay_timers_elapsed():
click.echo("Relevant services are not up. Retry later or use -f to avoid system checks")
return

if not _swss_ready():
click.echo("SwSS container is not ready. Retry later or use -f to avoid system checks")
return

if filename is None:
message = 'Clear current config and reload config from the default config file(s) ?'
else:
Expand Down

1 comment on commit 9041ba0

@JulyLuoyang
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello, I would like to ask why the minimum runtime check for the swss container is set to 120 seconds.Thank you!

Please sign in to comment.