Skip to content

Commit

Permalink
[TSA] Reliable TSA: Addressing pizza box issues (sonic-net#19217)
Browse files Browse the repository at this point in the history
* [TSA] Reliable TSA: Addressing pizza box issues

- Why I did it
Implement HLD https://github.com/skeesara-nokia/SONiC/blob/master/doc/voq/Reliable_TSA.md

OB- How I did it
A new attribute "tsa_enabled" has been added in CHASSIS_APP_DB the value of which changes whenever TSA/TSB is issued in the supervisor (default value is false). bgpcfgd subscribes to CHASSIS_APP_DB to receive updates on the newly added "tsa_enabled" attribute and in conjunction with the CONFIG_DB "tsa_enabled" attribute value, determine the BGP operational state is determined to be in TSA or TSB.

Signed-off-by: fountzou <ioannis.fountzoulas@nokia.com>
  • Loading branch information
fountzou authored Jun 19, 2024
1 parent 76069eb commit 99e0e1a
Show file tree
Hide file tree
Showing 11 changed files with 365 additions and 19 deletions.
22 changes: 18 additions & 4 deletions dockers/docker-fpm-frr/base_image_files/TS
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,31 @@
[ -f /etc/sonic/sonic-environment ] && . /etc/sonic/sonic-environment

PLATFORM=${PLATFORM:-`sonic-cfggen -H -v DEVICE_METADATA.localhost.platform`}
switch_type=`sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'switch_type'`
TSA_CHASSIS_STATE=false

if [[ $switch_type == 'voq' ]]; then
TSA_CHASSIS_STATE="$(sonic-db-cli CHASSIS_APP_DB HGET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled)"
fi

if [[ $1 == "TSA" ]]; then
TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "true"}}}'
log_msg='System Mode: Normal -> Maintenance'
if [[ $TSA_CHASSIS_STATE == true ]]; then
log_msg='System Mode: Maintenance -> Maintenance'
else
log_msg='System Mode: Normal -> Maintenance'
fi
err_msg='System is already in Maintenance'
desired_tsa_state=true
desired_tsa_state=true
elif [[ $1 == "TSB" ]]; then
TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "false"}}}'
log_msg='System Mode: Maintenance -> Normal'
if [[ $TSA_CHASSIS_STATE == true ]]; then
log_msg='System Mode: Maintenance -> Maintenance'
else
log_msg='System Mode: Maintenance -> Normal'
fi
err_msg='System is already in Normal mode'
desired_tsa_state=false
desired_tsa_state=false
fi

# Parse the device specific asic conf file, if it exists
Expand Down
13 changes: 12 additions & 1 deletion dockers/docker-fpm-frr/base_image_files/TSA
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@ if [ "$EUID" -ne 0 ] ; then
fi

if [ -f /etc/sonic/chassisdb.conf ]; then
rexec all -c "sudo TSA chassis"
CHASSIS_TSA_STATE_UPDATE="CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL\|STATE" tsa_enabled "true""
CONFIG_DB_TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "true"}}}'
current_tsa_state="$(sonic-cfggen -d -v BGP_DEVICE_GLOBAL.STATE.tsa_enabled)"
if [[ $current_tsa_state == true ]]; then
echo "Chassis is already in Maintenance"
logger -t TSA -p user.info "Chassis is already in Maintenance"
else
sonic-db-cli $CHASSIS_TSA_STATE_UPDATE
sonic-cfggen -a "$CONFIG_DB_TSA_STATE_UPDATE" -w
echo "Chassis Mode: Normal -> Maintenance"
logger -t TSA -p user.info "Chassis Mode: Normal -> Maintenance"
fi
echo "Please execute \"rexec all -c 'sudo config save -y'\" to preserve System mode in Maintenance after reboot\
or config reload on all linecards"
exit 0
Expand Down
16 changes: 13 additions & 3 deletions dockers/docker-fpm-frr/base_image_files/TSB
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@ if [ "$EUID" -ne 0 ] ; then
exit 1
fi

# If run on supervisor of chassis, trigger remote execution of TSB on all linecards
if [ -f /etc/sonic/chassisdb.conf ]; then
rexec all -c "sudo TSB chassis"
CHASSIS_TSA_STATE_UPDATE="CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL\|STATE" tsa_enabled "false""
CONFIG_DB_TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "false"}}}'
current_tsa_state="$(sonic-cfggen -d -v BGP_DEVICE_GLOBAL.STATE.tsa_enabled)"
if [[ $current_tsa_state == false ]]; then
echo "Chassis is already in Normal mode"
logger -t TSB -p user.info "Chassis is already in Normal mode"
else
sonic-db-cli $CHASSIS_TSA_STATE_UPDATE
sonic-cfggen -a "$CONFIG_DB_TSA_STATE_UPDATE" -w
echo "Chassis Mode: Maintenance -> Normal"
logger -t TSB -p user.info "Chassis Mode: Maintenance -> Normal"
fi
echo "Please execute \"rexec all -c 'sudo config save -y'\" to preserve System mode in Normal state after reboot\
or config reload on all linecards"
or config reload on all linecards"
exit 0
fi

Expand Down
12 changes: 12 additions & 0 deletions files/build_templates/docker_image_ctl.j2
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,18 @@ function postStartAction()
$SONIC_DB_CLI CONFIG_DB SET "CONFIG_DB_INITIALIZED" "1"
fi

# In SUP, enforce CHASSIS_APP_DB.tsa_enabled to be in sync with BGP_DEVICE_GLOBAL.STATE.tsa_enabled
if [[ -z "$DEV" ]] && [[ -f /etc/sonic/chassisdb.conf ]]; then
tsa_cfg="$($SONIC_DB_CLI CONFIG_DB HGET "BGP_DEVICE_GLOBAL|STATE" "tsa_enabled")"
if [[ -n "$tsa_cfg" ]]; then
docker exec -i ${DOCKERNAME} $SONIC_DB_CLI CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled ${tsa_cfg}
OP_CODE=$?
if [ $OP_CODE -ne 0 ]; then
echo "Err: Cmd failed (exit code $OP_CODE). CHASSIS_APP_DB and CONFIG_DB may be incosistent wrt tsa_enabled"
fi
fi
fi

# Add redis UDS to the redis group and give read/write access to the group
REDIS_SOCK="/var/run/redis${DEV}/redis.sock"
else
Expand Down
13 changes: 13 additions & 0 deletions files/image_config/config-setup/config-setup
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,19 @@ do_db_migration()
/usr/local/bin/db_migrator.py -o migrate
fi
sonic-db-cli CONFIG_DB SET "CONFIG_DB_INITIALIZED" "1"

#Enforce CHASSIS_APP_DB.tsa_enabled to be in sync with BGP_DEVICE_GLOBAL.STATE.tsa_enabled
if [[ -f /etc/sonic/chassisdb.conf ]]; then
tsa_cfg="$(sonic-db-cli CONFIG_DB HGET "BGP_DEVICE_GLOBAL|STATE" "tsa_enabled")"
sonic-db-cli CHASSIS_APP_DB HMSET "BGP_DEVICE_GLOBAL|STATE" tsa_enabled ${tsa_cfg}
OP_CODE=$?

if [ $OP_CODE -ne 0 ]; then
err_msg="Cmd failed (exit code $OP_CODE). CHASSIS_APP_DB and CONFIG_DB may be incosistent wrt tsa_enabled."
echo "$err_msg"
logger -t CHASSIS_APP_DB -p user.info "$err_msg"
fi
fi
}

# Perform configuration migration from backup copy.
Expand Down
6 changes: 6 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import traceback

from swsscommon import swsscommon
from sonic_py_common import device_info

from .config import ConfigMgr
from .directory import Directory
Expand All @@ -20,6 +21,7 @@
from .managers_static_rt import StaticRouteMgr
from .managers_rm import RouteMapMgr
from .managers_device_global import DeviceGlobalCfgMgr
from .managers_chassis_app_db import ChassisAppDbMgr
from .static_rt_timer import StaticRouteTimer
from .runner import Runner, signal_handler
from .template import TemplateFabric
Expand Down Expand Up @@ -74,6 +76,10 @@ def do_work():
# Device Global Manager
DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME),
]

if device_info.is_chassis():
managers.append(ChassisAppDbMgr(common_objs, "CHASSIS_APP_DB", "BGP_DEVICE_GLOBAL"))

runner = Runner(common_objs['cfg_mgr'])
for mgr in managers:
runner.add_manager(mgr)
Expand Down
50 changes: 50 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/managers_chassis_app_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from .manager import Manager
from .managers_device_global import DeviceGlobalCfgMgr
from .log import log_err, log_debug, log_notice
import re
from swsscommon import swsscommon

class ChassisAppDbMgr(Manager):
"""This class responds to change in tsa_enabled state of the supervisor"""

def __init__(self, common_objs, db, table):
"""
Initialize the object
:param common_objs: common object dictionary
:param db: name of the db
:param table: name of the table in the db
"""
self.lc_tsa = ""
self.directory = common_objs['directory']
self.dev_cfg_mgr = DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)
self.directory.subscribe([("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"),], self.on_lc_tsa_status_change)
super(ChassisAppDbMgr, self).__init__(
common_objs,
[],
db,
table,
)

def on_lc_tsa_status_change(self):
if self.directory.path_exist("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"):
self.lc_tsa = self.directory.get_slot("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)["tsa_enabled"]
log_debug("ChassisAppDbMgr:: LC TSA update handler status %s" % self.lc_tsa)

def set_handler(self, key, data):
log_debug("ChassisAppDbMgr:: set handler")

if not data:
log_err("ChassisAppDbMgr:: data is None")
return False

if "tsa_enabled" in data:
if self.lc_tsa == "false":
self.dev_cfg_mgr.cfg_mgr.commit()
self.dev_cfg_mgr.cfg_mgr.update()
self.dev_cfg_mgr.isolate_unisolate_device(data["tsa_enabled"])
return True
return False

def del_handler(self, key):
log_debug("ChassisAppDbMgr:: del handler")
return True
32 changes: 28 additions & 4 deletions src/sonic-bgpcfgd/bgpcfgd/managers_device_global.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .manager import Manager
from .log import log_err, log_debug, log_notice
from swsscommon import swsscommon
from sonic_py_common import device_info

class DeviceGlobalCfgMgr(Manager):
"""This class responds to change in device-specific state"""
Expand All @@ -20,6 +21,7 @@ def __init__(self, common_objs, db, table):
:param table: name of the table in the db
"""
self.switch_type = ""
self.chassis_tsa = ""
self.directory = common_objs['directory']
self.cfg_mgr = common_objs['cfg_mgr']
self.constants = common_objs['constants']
Expand Down Expand Up @@ -97,11 +99,16 @@ def configure_tsa(self, data=None):
if "tsa_enabled" in data:
state = data["tsa_enabled"]

if self.is_update_required("tsa_enabled", state):
self.chassis_tsa = self.get_chassis_tsa_status()
requires_update = self.is_update_required("tsa_enabled", state)

if state in ["true", "false"] and self.directory.path_exist(self.db_name, self.table_name, "tsa_enabled"):
self.directory.put(self.db_name, self.table_name, "tsa_enabled", state)

if requires_update and self.chassis_tsa == "false":
self.cfg_mgr.commit()
self.cfg_mgr.update()
if self.isolate_unisolate_device(state):
self.directory.put(self.db_name, self.table_name, "tsa_enabled", state)
self.isolate_unisolate_device(state)
else:
log_notice("DeviceGlobalCfgMgr:: TSA configuration is up-to-date")

Expand Down Expand Up @@ -167,7 +174,9 @@ def check_state_and_get_tsa_routemaps(self, cfg):
cmd = ""
if self.directory.path_exist("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"):
tsa_status = self.directory.get_slot("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)["tsa_enabled"]
if tsa_status == "true":
chassis_tsa = self.get_chassis_tsa_status()

if tsa_status == "true" or chassis_tsa == "true":
cmds = cfg.replace("#012", "\n").split("\n")
log_notice("DeviceGlobalCfgMgr:: Device is isolated. Applying TSA route-maps")
cmd = self.get_ts_routemaps(cmds, self.tsa_template)
Expand Down Expand Up @@ -228,6 +237,21 @@ def __extract_out_route_map_names(self, cmds):
route_map_names.add(result.group(1))
return route_map_names

def get_chassis_tsa_status(self):
chassis_tsa_status = "false"

if not device_info.is_chassis():
return chassis_tsa_status

try:
ch = swsscommon.SonicV2Connector(use_unix_socket_path=False)
ch.connect(ch.CHASSIS_APP_DB, False)
chassis_tsa_status = ch.get(ch.CHASSIS_APP_DB, "BGP_DEVICE_GLOBAL|STATE", 'tsa_enabled')
except Exception as e:
log_err("Got an exception {}".format(e))

return chassis_tsa_status

def downstream_isolate_unisolate(self, idf_isolation_state):
""" API to apply IDF configuration """

Expand Down
5 changes: 4 additions & 1 deletion src/sonic-bgpcfgd/bgpcfgd/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ def add_manager(self, manager):
table_name = manager.get_table_name()
db = swsscommon.SonicDBConfig.getDbId(db_name)
if db not in self.db_connectors:
self.db_connectors[db] = swsscommon.DBConnector(db_name, 0)
if db_name == "CHASSIS_APP_DB":
self.db_connectors[db] = swsscommon.DBConnector(db_name, 0, True, '')
else:
self.db_connectors[db] = swsscommon.DBConnector(db_name, 0)

if table_name not in self.callbacks[db]:
conn = self.db_connectors[db]
Expand Down
Loading

0 comments on commit 99e0e1a

Please sign in to comment.