From f9be17a14b5ac8f9d7fa218b0fc0884c6d819bf2 Mon Sep 17 00:00:00 2001 From: Rodny Molina Date: Wed, 5 Sep 2018 00:35:10 -0700 Subject: [PATCH] Adjusting schema as per latest discussions Signed-off-by: Rodny Molina --- doc/swss-schema.md | 36 ++++++++++++++++++++++++-------- tests/test_warm_reboot.py | 40 ++++++++++++++++++------------------ warmrestart/warm_restart.cpp | 31 ++++++++++++++-------------- 3 files changed, 63 insertions(+), 44 deletions(-) diff --git a/doc/swss-schema.md b/doc/swss-schema.md index 7a5169a3e4d..ea89e29fb1a 100644 --- a/doc/swss-schema.md +++ b/doc/swss-schema.md @@ -647,6 +647,15 @@ Equivalent RedisDB entry: ; and push the delta to appDB ; Valid value is 1-9999. 0 is invalid. + bgp_timer = 1*4DIGIT ; bgp_timer holds the time interval utilized by fpmsyncd during warm-restart episodes. + ; During this interval fpmsyncd will recover all the routing state previously pushed to + ; AppDB, as well as all the new state coming from zebra/bgpd. Upon expiration of this + ; timer, fpmsyncd will execute the reconciliation logic to eliminate all the staled + ; state from AppDB. This timer should match the BGP-GR restart-timer configured within + ; the elected routing-stack. + ; Supported range: 1-9999. + + ### VXLAN\_TUNNEL Stores vxlan tunnels configuration Status: ready @@ -674,15 +683,25 @@ Status: ready ;Status: work in progress key = WARM_RESTART_TABLE:process_name ; process_name is a unique process identifier. - restart_count = 1*10DIGIT ; a number between 0 and 2147483647, - ; count of warm start times. - state = "init" / "restored" / "reconciled" ; init: process init with warm start enabled. - ; restored: process restored to the previous - ; state using saved data. - ; reconciled: process reconciled with up to date - ; dynanic data like port state, neighbor, routes - ; and so on. + restore_count = 1*10DIGIT ; a value between 0 and 2147483647 to keep track + ; of the number of times that an application has + ; 'restored' its state from its associated redis + ; data-store; which is equivalent to the number + ; of times an application has iterated through + ; a warm-restart cycle. + + state = "initialized" / "restored" / "reconciled" ; initialized: default/initial state for processes + ; with warm-restart capabilities turned on. This + ; state will be applied permanently for processes + ; with warm-restart feature being turned off. + ; + ; restored: process restored to the previous + ; state using saved data. + ; + ; reconciled: process reconciled with updated + ; dynanic data like port state, neighbor, routes + ; and so on. ## Configuration files What configuration files should we have? Do apps, orch agent each need separate files? @@ -692,4 +711,3 @@ What configuration files should we have? Do apps, orch agent each need separate portsyncd reads from port_config.ini and updates PORT_TABLE in APP_DB All other apps (intfsyncd) read from PORT_TABLE in APP_DB - diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index 50057646bd1..32830de4407 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -4,9 +4,9 @@ import time import json -# Get restart count of all processes supporting warm restart -def swss_get_RestartCount(state_db): - restart_count = {} +# Get restore count of all processes supporting warm restart +def swss_get_RestoreCount(state_db): + restore_count = {} warmtbl = swsscommon.Table(state_db, swsscommon.STATE_WARM_RESTART_TABLE_NAME) keys = warmtbl.getKeys() assert len(keys) != 0 @@ -14,13 +14,13 @@ def swss_get_RestartCount(state_db): (status, fvs) = warmtbl.get(key) assert status == True for fv in fvs: - if fv[0] == "restart_count": - restart_count[key] = int(fv[1]) - print(restart_count) - return restart_count + if fv[0] == "restore_count": + restore_count[key] = int(fv[1]) + print(restore_count) + return restore_count -# function to check the restart count incremented by 1 for all processes supporting warm restart -def swss_check_RestartCount(state_db, restart_count): +# function to check the restore count incremented by 1 for all processes supporting warm restart +def swss_check_RestoreCount(state_db, restore_count): warmtbl = swsscommon.Table(state_db, swsscommon.STATE_WARM_RESTART_TABLE_NAME) keys = warmtbl.getKeys() print(keys) @@ -29,8 +29,8 @@ def swss_check_RestartCount(state_db, restart_count): (status, fvs) = warmtbl.get(key) assert status == True for fv in fvs: - if fv[0] == "restart_count": - assert int(fv[1]) == restart_count[key] + 1 + if fv[0] == "restore_count": + assert int(fv[1]) == restore_count[key] + 1 elif fv[0] == "state": assert fv[1] == "reconciled" @@ -46,12 +46,12 @@ def check_port_oper_status(appl_db, port_name, state): break assert oper_status == state -# function to check the restart count incremented by 1 for a single process -def swss_app_check_RestartCount_single(state_db, restart_count, name): +# function to check the restore count incremented by 1 for a single process +def swss_app_check_RestoreCount_single(state_db, restore_count, name): warmtbl = swsscommon.Table(state_db, swsscommon.STATE_WARM_RESTART_TABLE_NAME) keys = warmtbl.getKeys() print(keys) - print(restart_count) + print(restore_count) assert len(keys) > 0 for key in keys: if key != name: @@ -59,8 +59,8 @@ def swss_app_check_RestartCount_single(state_db, restart_count, name): (status, fvs) = warmtbl.get(key) assert status == True for fv in fvs: - if fv[0] == "restart_count": - assert int(fv[1]) == restart_count[key] + 1 + if fv[0] == "restore_count": + assert int(fv[1]) == restore_count[key] + 1 elif fv[0] == "state": assert fv[1] == "reconciled" def create_entry(tbl, key, pairs): @@ -146,7 +146,7 @@ def test_PortSyncdWarmRestart(dvs): (status, fvs) = neighTbl.get("Ethernet20:11.0.0.10") assert status == True - restart_count = swss_get_RestartCount(state_db) + restore_count = swss_get_RestoreCount(state_db) # restart portsyncd dvs.runcmd(['sh', '-c', 'pkill -x portsyncd; cp /var/log/swss/sairedis.rec /var/log/swss/sairedis.rec.b; echo > /var/log/swss/sairedis.rec']) @@ -175,7 +175,7 @@ def test_PortSyncdWarmRestart(dvs): check_port_oper_status(appl_db, "Ethernet24", "up") - swss_app_check_RestartCount_single(state_db, restart_count, "portsyncd") + swss_app_check_RestoreCount_single(state_db, restore_count, "portsyncd") def test_VlanMgrdWarmRestart(dvs): @@ -263,7 +263,7 @@ def test_VlanMgrdWarmRestart(dvs): (exitcode, bv_before) = dvs.runcmd("bridge vlan") print(bv_before) - restart_count = swss_get_RestartCount(state_db) + restore_count = swss_get_RestoreCount(state_db) dvs.runcmd(['sh', '-c', 'pkill -x vlanmgrd; cp /var/log/swss/sairedis.rec /var/log/swss/sairedis.rec.b; echo > /var/log/swss/sairedis.rec']) dvs.runcmd(['sh', '-c', 'supervisorctl start vlanmgrd']) @@ -284,4 +284,4 @@ def test_VlanMgrdWarmRestart(dvs): (status, fvs) = tbl.get("Vlan20:11.0.0.11") assert status == True - swss_app_check_RestartCount_single(state_db, restart_count, "vlanmgrd") + swss_app_check_RestoreCount_single(state_db, restore_count, "vlanmgrd") diff --git a/warmrestart/warm_restart.cpp b/warmrestart/warm_restart.cpp index d3e5d6327b2..0e8013d09c6 100644 --- a/warmrestart/warm_restart.cpp +++ b/warmrestart/warm_restart.cpp @@ -55,13 +55,14 @@ void WarmStart::initialize(const std::string &app_name, * No need to check docker level knobs in this case since the whole system is being rebooted . * <2> Upon docker service start, first to check system knob. - * if enabled, docker warm start should be performed, otherwise system warm reboot will be ruined. - * If system knob disabled, while docker knob enabled, this is likely an individual docker warm restart request. + * if enabled, docker warm-start should be performed, otherwise system warm-reboot will be ruined. + * If system knob disabled, while docker knob enabled, this is likely an individual docker + * warm-restart request. * Within each application which should take care warm start case, * when the system knob or docker knob enabled, we do further check on the - * actual warm start state ( restart_count), if no warm start state data available, - * the database has been flushed, do cold start. Otherwise warm start. + * actual warm-start state ( restore_count), if no warm-start state data available, + * the database has been flushed, do cold start. Otherwise warm-start. */ /* @@ -95,31 +96,31 @@ bool WarmStart::checkWarmStart(const std::string &app_name, // Create the entry for this app here. if (!warmStart.m_enabled) { - warmStart.m_stateWarmRestartTable->hset(app_name, "restart_count", "0"); + warmStart.m_stateWarmRestartTable->hset(app_name, "restore_count", "0"); return false; } - uint32_t restart_count = 0; - warmStart.m_stateWarmRestartTable->hget(app_name, "restart_count", value); + uint32_t restore_count = 0; + warmStart.m_stateWarmRestartTable->hget(app_name, "restore_count", value); if (value == "") { - SWSS_LOG_WARN("%s doing warm start, but restart_count not found in stateDB %s table, fall back to cold start", + SWSS_LOG_WARN("%s doing warm start, but restore_count not found in stateDB %s table, fall back to cold start", app_name.c_str(), STATE_WARM_RESTART_TABLE_NAME); warmStart.m_enabled = false; - warmStart.m_stateWarmRestartTable->hset(app_name, "restart_count", "0"); + warmStart.m_stateWarmRestartTable->hset(app_name, "restore_count", "0"); return false; } else { - restart_count = (uint32_t)stoul(value); + restore_count = (uint32_t)stoul(value); } - restart_count++; - warmStart.m_stateWarmRestartTable->hset(app_name, "restart_count", - std::to_string(restart_count)); + restore_count++; + warmStart.m_stateWarmRestartTable->hset(app_name, "restore_count", + std::to_string(restore_count)); - SWSS_LOG_NOTICE("%s doing warm start, restart count %d", app_name.c_str(), - restart_count); + SWSS_LOG_NOTICE("%s doing warm start, restore count %d", app_name.c_str(), + restore_count); return true; }