Skip to content

Commit

Permalink
[chassisd] Monitor midplane status (#127)
Browse files Browse the repository at this point in the history
Enhance chassisd to monitor midplane status of the cards in modular chassis

HLD: sonic-net/SONiC#646

-What I did
Add monitoring of the midplane or internal ethernet network between supervisor and line-card modules.

-How I did it
Along with status monitoring, also monitor the midplane reachability between supervisor and modules.
It updates the STATE_DB with the status information. 'show chassis-modules midplane-status' will read from the STATE_DB
  • Loading branch information
mprabhu-nokia authored Dec 16, 2020
1 parent b0be7ca commit b674dff
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 5 deletions.
64 changes: 59 additions & 5 deletions sonic-chassisd/scripts/chassisd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ CHASSIS_MODULE_INFO_DESC_FIELD = 'desc'
CHASSIS_MODULE_INFO_SLOT_FIELD = 'slot'
CHASSIS_MODULE_INFO_OPERSTATUS_FIELD = 'oper_status'

CHASSIS_MIDPLANE_INFO_TABLE = 'CHASSIS_MIDPLANE_TABLE'
CHASSIS_MIDPLANE_INFO_KEY_TEMPLATE = 'CHASSIS_MIDPLANE {}'
CHASSIS_MIDPLANE_INFO_NAME_FIELD = 'name'
CHASSIS_MIDPLANE_INFO_IP_FIELD = 'ip_address'
CHASSIS_MIDPLANE_INFO_ACCESS_FIELD = 'access'

CHASSIS_INFO_UPDATE_PERIOD_SECS = 10

CHASSIS_LOAD_ERROR = 1
Expand All @@ -57,6 +63,7 @@ SELECT_TIMEOUT = 1000
NOT_AVAILABLE = 'N/A'
INVALID_SLOT = ModuleBase.MODULE_INVALID_SLOT
INVALID_MODULE_INDEX = -1
INVALID_IP = '0.0.0.0'

MODULE_ADMIN_DOWN = 0
MODULE_ADMIN_UP = 1
Expand All @@ -72,7 +79,8 @@ def try_get(callback, *args, **kwargs):
"""
Handy function to invoke the callback and catch NotImplementedError
:param callback: Callback to be invoked
:param default: Default return value if exception occur
:param args: Arguments to be passed to callback
:param kwargs: Default return value if exception occur
:return: Default return value if exception occur else return value of the callback
"""
default = kwargs.get('default', NOT_AVAILABLE)
Expand Down Expand Up @@ -149,11 +157,16 @@ class ModuleUpdater(logger.Logger):
state_db = daemon_base.db_connect("STATE_DB")
self.chassis_table = swsscommon.Table(state_db, CHASSIS_INFO_TABLE)
self.module_table = swsscommon.Table(state_db, CHASSIS_MODULE_INFO_TABLE)
self.midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE)
self.info_dict_keys = [CHASSIS_MODULE_INFO_NAME_FIELD,
CHASSIS_MODULE_INFO_DESC_FIELD,
CHASSIS_MODULE_INFO_SLOT_FIELD,
CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]

self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False)
if not self.midplane_initialized:
self.log_error("Chassisd midplane intialization failed")

def deinit(self):
"""
Destructor of ModuleUpdater
Expand All @@ -163,6 +176,8 @@ class ModuleUpdater(logger.Logger):
for module_index in range(0, self.num_modules):
name = try_get(self.chassis.get_module(module_index).get_name)
self.module_table._del(name)
if self.midplane_table.get(name) is not None:
self.midplane_table._del(name)

if self.chassis_table is not None:
self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1))
Expand Down Expand Up @@ -218,6 +233,41 @@ class ModuleUpdater(logger.Logger):

return module_info_dict

def _is_supervisor(self):
if self.my_slot == self.supervisor_slot:
return True
else:
return False

def check_midplane_reachability(self):
if not self.midplane_initialized:
return

index = -1
for module in self.chassis.get_all_modules():
index += 1
# Skip fabric cards
if module.get_type() == ModuleBase.MODULE_TYPE_FABRIC:
continue

if self._is_supervisor():
# On supervisor skip checking for supervisor
if module.get_slot() == self.supervisor_slot:
continue
else:
# On line-card check only supervisor
if module.get_slot() != self.supervisor_slot:
continue

module_key = try_get(module.get_name, default='MODULE {}'.format(index))
midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP)
midplane_access = try_get(module.is_midplane_reachable, default=False)

# Update db with midplane information
fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip),
(CHASSIS_MIDPLANE_INFO_ACCESS_FIELD, str(midplane_access))])
self.midplane_table.set(module_key, fvs)

#
# Config Manager task ========================================================
#
Expand Down Expand Up @@ -306,14 +356,17 @@ class ChassisdDaemon(daemon_base.DaemonBase):
self.module_updater.modules_num_update()

# Check for valid slot numbers
my_slot = try_get(platform_chassis.get_my_slot, default=INVALID_SLOT)
supervisor_slot = try_get(platform_chassis.get_supervisor_slot, default=INVALID_SLOT)
if (my_slot == INVALID_SLOT) or (supervisor_slot == INVALID_SLOT):
self.module_updater.my_slot = try_get(platform_chassis.get_my_slot,
default=INVALID_SLOT)
self.module_updater.supervisor_slot = try_get(platform_chassis.get_supervisor_slot,
default=INVALID_SLOT)
if ((self.module_updater.my_slot == INVALID_SLOT) or
(self.module_updater.supervisor_slot == INVALID_SLOT)):
self.log_error("Chassisd not supported for this platform")
sys.exit(CHASSIS_NOT_SUPPORTED)

# Start configuration manager task on supervisor module
if supervisor_slot == my_slot:
if self.module_updater.supervisor_slot == self.module_updater.my_slot:
config_manager = ConfigManagerTask()
config_manager.task_run()

Expand All @@ -322,6 +375,7 @@ class ChassisdDaemon(daemon_base.DaemonBase):

while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS):
self.module_updater.module_db_update()
self.module_updater.check_midplane_reachability()

self.log_info("Stop daemon main loop")

Expand Down
20 changes: 20 additions & 0 deletions sonic-chassisd/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def __init__(self, module_index, module_name, module_desc, module_type, module_s
self.hw_slot = module_slot
self.module_status = ''
self.admin_state = 1
self.supervisor_slot = 16
self.midplane_access = False

def get_name(self):
return self.module_name
Expand All @@ -52,10 +54,25 @@ def set_admin_state(self, up):
def get_admin_state(self):
return self.admin_state

def get_midplane_ip(self):
return self.midplane_ip

def set_midplane_ip(self):
if self.supervisor_slot == self.get_slot():
self.midplane_ip = '192.168.1.100'
else:
self.midplane_ip = '192.168.1.{}'.format(self.get_slot())

def is_midplane_reachable(self):
return self.midplane_access

def set_midplane_reachable(self, up):
self.midplane_access = up

class MockChassis:
def __init__(self):
self.module_list = []
self.midplane_supervisor_access = False

def get_num_modules(self):
return len(self.module_list)
Expand All @@ -72,3 +89,6 @@ def get_module_index(self, module_name):
if module.module_name == module_name:
return module.module_index
return -1

def init_midplane_switch(self):
return True
2 changes: 2 additions & 0 deletions sonic-chassisd/tests/mock_swsscommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ def get(self, key):
return self.mock_dict[key]
return None

def size(self):
return len(self.mock_dict)

class FieldValuePairs:
def __init__(self, fvs):
Expand Down
128 changes: 128 additions & 0 deletions sonic-chassisd/tests/test_chassisd.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,131 @@ def test_configupdater_check_num_modules():
module_updater.deinit()
fvs = module_updater.chassis_table.get(CHASSIS_INFO_KEY_TEMPLATE.format(1))
assert fvs == None

def test_midplane_presence_modules():
chassis = MockChassis()

#Supervisor
index = 0
name = "SUPERVISOR0"
desc = "Supervisor card"
slot = 16
module_type = ModuleBase.MODULE_TYPE_SUPERVISOR
supervisor = MockModule(index, name, desc, module_type, slot)
supervisor.set_midplane_ip()
chassis.module_list.append(supervisor)

#Linecard
index = 1
name = "LINE-CARD0"
desc = "36 port 400G card"
slot = 1
module_type = ModuleBase.MODULE_TYPE_LINE
module = MockModule(index, name, desc, module_type, slot)
module.set_midplane_ip()
chassis.module_list.append(module)

#Fabric-card
index = 1
name = "FABRIC-CARD0"
desc = "Switch fabric card"
slot = 17
module_type = ModuleBase.MODULE_TYPE_FABRIC
fabric = MockModule(index, name, desc, module_type, slot)
chassis.module_list.append(fabric)

#Run on supervisor
module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis)
module_updater.supervisor_slot = supervisor.get_slot()
module_updater.my_slot = supervisor.get_slot()
module_updater.modules_num_update()
module_updater.module_db_update()
module_updater.check_midplane_reachability()

midplane_table = module_updater.midplane_table
#Check only one entry in database
assert 1 == midplane_table.size()

#Check fields in database
name = "LINE-CARD0"
fvs = midplane_table.get(name)
assert fvs != None
assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]

#Set access of line-card to down
module.set_midplane_reachable(False)
module_updater.check_midplane_reachability()
fvs = midplane_table.get(name)
assert fvs != None
assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]

#Deinit
module_updater.deinit()
fvs = midplane_table.get(name)
assert fvs == None

def test_midplane_presence_supervisor():
chassis = MockChassis()

#Supervisor
index = 0
name = "SUPERVISOR0"
desc = "Supervisor card"
slot = 16
module_type = ModuleBase.MODULE_TYPE_SUPERVISOR
supervisor = MockModule(index, name, desc, module_type, slot)
supervisor.set_midplane_ip()
chassis.module_list.append(supervisor)

#Linecard
index = 1
name = "LINE-CARD0"
desc = "36 port 400G card"
slot = 1
module_type = ModuleBase.MODULE_TYPE_LINE
module = MockModule(index, name, desc, module_type, slot)
module.set_midplane_ip()
chassis.module_list.append(module)

#Fabric-card
index = 1
name = "FABRIC-CARD0"
desc = "Switch fabric card"
slot = 17
module_type = ModuleBase.MODULE_TYPE_FABRIC
fabric = MockModule(index, name, desc, module_type, slot)
chassis.module_list.append(fabric)

#Run on supervisor
module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis)
module_updater.supervisor_slot = supervisor.get_slot()
module_updater.my_slot = module.get_slot()
module_updater.modules_num_update()
module_updater.module_db_update()
module_updater.check_midplane_reachability()

midplane_table = module_updater.midplane_table
#Check only one entry in database
assert 1 == midplane_table.size()

#Check fields in database
name = "SUPERVISOR0"
fvs = midplane_table.get(name)
assert fvs != None
assert supervisor.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
assert str(supervisor.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]

#Set access of line-card to down
supervisor.set_midplane_reachable(False)
module_updater.check_midplane_reachability()
fvs = midplane_table.get(name)
assert fvs != None
assert supervisor.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
assert str(supervisor.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]

#Deinit
module_updater.deinit()
fvs = midplane_table.get(name)
assert fvs == None

0 comments on commit b674dff

Please sign in to comment.