Skip to content

Commit

Permalink
[config] Implement a process level lock (sonic-net#857)
Browse files Browse the repository at this point in the history
Changes:
1.) Implement a class, which uses hsetnx for lock.
2.) lock is expired within timeout period or will be released by owner.
3.) After -y prompt, lock is reacquired, because timer could have expired,
    before user enters yes.
Signed-off-by: Praveen Chaudhary pchaudhary@linkedin.com
  • Loading branch information
Praveen Chaudhary authored Apr 14, 2020
1 parent cff92dd commit 3f651dc
Showing 1 changed file with 107 additions and 0 deletions.
107 changes: 107 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,98 @@ def is_ipaddress(val):
return False
return True

# class for locking entire config process
class ConfigDbLock():
def __init__(self):
self.lockName = "LOCK|configDbLock"
self.timeout = 10
self.pid = os.getpid()
self.client = None

self._acquireLock()
return

def _acquireLock(self):
try:
# connect to db
db_kwargs = dict()
configdb = ConfigDBConnector(**db_kwargs)
configdb.connect()

self.client = configdb.get_redis_client('CONFIG_DB')
# Set lock and expire time. Process may get killed b/w set lock and
# expire call.
if self.client.hsetnx(self.lockName, "PID", self.pid):
self.client.expire(self.lockName, self.timeout)
# if lock exists but expire timer not running, run expire time and
# abort.
elif not self.client.ttl(self.lockName):
click.echo(":::Unable to acquire lock. Resetting timer and aborting:::");
self.client.expire(self.lockName, self.timeout)
sys.exit(1)
else:
click.echo(":::Unable to acquire lock. Aborting:::");
sys.exit(1)
except Exception as e:
click.echo(":::Exception: {}:::".format(e))
sys.exit(1)
return

def reacquireLock(self):
try:
# Try to set lock first
if self.client.hsetnx(self.lockName, "PID", self.pid):
self.client.expire(self.lockName, self.timeout)
# if lock exists, check who owns it
else:
p = self.client.pipeline(True)
# watch, we do not want to work on modified lock
p.watch(self.lockName)
# if current process holding then extend the timer
if p.hget(self.lockName, "PID") == str(self.pid):
self.client.expire(self.lockName, self.timeout)
p.unwatch()
return
else:
# some other process is holding the lock.
click.echo(":::Unable to reacquire lock (lock PID: {}, self.pid: {}):::".\
format(p.hget(self.lockName, "PID"), self.pid))
p.unwatch()
sys.exit(1)
except Exception as e:
click.echo(":::Exception: {}:::".format(e))
sys.exit(1)
return

def _releaseLock(self):
try:
p = self.client.pipeline(True)
# watch, we do not want to work on modified lock
p.watch(self.lockName)
# if current process holding the lock then release it.
if p.hget(self.lockName, "PID") == str(self.pid):
p.multi()
p.delete(self.lockName)
p.execute()
return
# lock may be None, if timer has expired before releasing lock.
elif not self.lockName:
return
else:
# some other process is holding the lock.
click.echo(":::Unable to release lock (lock PID: {}, self.pid: {}):::".\
format(p.hget(self.lockName, "PID"), self.pid))
p.unwatch()
except Exception as e:
click.echo(":::Exception: {}:::".format(e))
return

def __del__(self):
self._releaseLock()
return
# end of class configdblock

configdb_lock = ConfigDbLock()
# This is our main entrypoint - the main 'config' command
@click.group(context_settings=CONTEXT_SETTINGS)
def config():
Expand All @@ -547,6 +638,8 @@ def config():
@click.argument('filename', default='/etc/sonic/config_db.json', type=click.Path())
def save(filename):
"""Export current config DB to a file on disk."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "{} -d --print-data > {}".format(SONIC_CFGGEN_PATH, filename)
run_command(command, display_cmd=True)

Expand All @@ -557,6 +650,9 @@ def load(filename, yes):
"""Import a previous saved config DB dump file."""
if not yes:
click.confirm('Load config from the file %s?' % filename, abort=True)
# reacquire lock after prompt
configdb_lock.reacquireLock()

command = "{} -j {} --write-to-db".format(SONIC_CFGGEN_PATH, filename)
run_command(command, display_cmd=True)

Expand All @@ -568,6 +664,8 @@ def reload(filename, yes, load_sysinfo):
"""Clear current configuration and import a previous saved config DB dump file."""
if not yes:
click.confirm('Clear current config and reload config from the file %s?' % filename, abort=True)
# reacquire lock after prompt
configdb_lock.reacquireLock()

log_info("'reload' executing...")

Expand Down Expand Up @@ -617,6 +715,8 @@ def reload(filename, yes, load_sysinfo):
@click.argument('filename', default='/etc/sonic/device_desc.xml', type=click.Path(exists=True))
def load_mgmt_config(filename):
"""Reconfigure hostname and mgmt interface based on device description file."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "{} -M {} --write-to-db".format(SONIC_CFGGEN_PATH, filename)
run_command(command, display_cmd=True)
#FIXME: After config DB daemon for hostname and mgmt interface is implemented, we'll no longer need to do manual configuration here
Expand All @@ -639,7 +739,10 @@ def load_mgmt_config(filename):
@click.option('-y', '--yes', is_flag=True, callback=_abort_if_false,
expose_value=False, prompt='Reload config from minigraph?')
def load_minigraph():

"""Reconfigure based on minigraph."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
log_info("'load_minigraph' executing...")

# get the device type
Expand Down Expand Up @@ -2304,6 +2407,8 @@ def ztp():
@click.argument('run', required=False, type=click.Choice(["run"]))
def run(run):
"""Restart ZTP of the device."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "ztp run -y"
run_command(command, display_cmd=True)

Expand All @@ -2313,6 +2418,8 @@ def run(run):
@click.argument('disable', required=False, type=click.Choice(["disable"]))
def disable(disable):
"""Administratively Disable ZTP."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "ztp disable -y"
run_command(command, display_cmd=True)

Expand Down

0 comments on commit 3f651dc

Please sign in to comment.