Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed bug in chassisd causing incorrect number of ASICs in CHASSIS_STATE_DB #560

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sonic-chassisd/scripts/chassisd
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,13 @@ class ModuleUpdater(logger.Logger):

def module_db_update(self):
notOnlineModules = []
my_index = None

for module_index in range(0, self.num_modules):
module_info_dict = self._get_module_info(module_index)
if self.my_slot == int(module_info_dict['slot']):
my_index = module_index

if module_info_dict is not None:
key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD]

Expand Down Expand Up @@ -353,6 +357,7 @@ class ModuleUpdater(logger.Logger):

# In line card push the hostname of the module and num_asics to the chassis state db.
# The hostname is used as key to access chassis app db entries
module_info_dict = self._get_module_info(my_index)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should move line 360 after the line361. Only call it when it is not Supervisor

if not self._is_supervisor():
hostname_key = "{}{}".format(ModuleBase.MODULE_TYPE_LINE, int(self.my_slot) - 1)
hostname = try_get(device_info.get_hostname, default="None")
Expand Down
88 changes: 76 additions & 12 deletions sonic-chassisd/tests/test_chassisd.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,21 @@ def test_midplane_presence_supervisor():
fvs = midplane_table.get(name)
assert fvs == None

def verify_asic(asic_name, asic_pci_address, module_name, asic_id_in_module, asic_table):
fvs = asic_table.get(asic_name)
if isinstance(fvs, list):
fvs = dict(fvs[-1])
assert fvs[CHASSIS_ASIC_PCI_ADDRESS_FIELD] == asic_pci_address
assert fvs[CHASSIS_MODULE_INFO_NAME_FIELD] == module_name
assert fvs[CHASSIS_ASIC_ID_IN_MODULE_FIELD] == asic_id_in_module

def verify_asic_in_module_table(lc, slot, num_asics, chassis_module_table):
fvs = chassis_module_table.get(lc)
if isinstance(fvs, list):
fvs = dict(fvs[-1])
assert fvs['slot'] == str(slot)
assert fvs['num_asics'] == str(num_asics)

def test_asic_presence():
chassis = MockChassis()

Expand Down Expand Up @@ -603,16 +618,8 @@ def test_asic_presence():
fabric_asic_table = module_updater.asic_table
assert len(fabric_asic_table.getKeys()) == 2

def verify_fabric_asic(asic_name, asic_pci_address, module_name, asic_id_in_module):
fvs = fabric_asic_table.get(asic_name)
if isinstance(fvs, list):
fvs = dict(fvs[-1])
assert fvs[CHASSIS_ASIC_PCI_ADDRESS_FIELD] == asic_pci_address
assert fvs[CHASSIS_MODULE_INFO_NAME_FIELD] == module_name
assert fvs[CHASSIS_ASIC_ID_IN_MODULE_FIELD] == asic_id_in_module

verify_fabric_asic("asic4", "0000:04:00.0", name, "0")
verify_fabric_asic("asic5", "0000:05:00.0", name, "1")
verify_asic("asic4", "0000:04:00.0", name, "0", fabric_asic_table)
verify_asic("asic5", "0000:05:00.0", name, "1", fabric_asic_table)

#Card goes down and asics should be gone
fabric.set_oper_status(ModuleBase.MODULE_STATUS_OFFLINE)
Expand All @@ -626,8 +633,65 @@ def verify_fabric_asic(asic_name, asic_pci_address, module_name, asic_id_in_modu
midplane_table = module_updater.midplane_table
fvs = midplane_table.get(name)
assert fvs == None
verify_fabric_asic("asic4", "0000:04:00.0", name, "0")
verify_fabric_asic("asic5", "0000:05:00.0", name, "1")
verify_asic("asic4", "0000:04:00.0", name, "0", fabric_asic_table)
verify_asic("asic5", "0000:05:00.0", name, "1", fabric_asic_table)

def test_forwarding_asic_presence():
chassis = MockChassis()

#Supervisor
index = 0
name = "SUPERVISOR0"
desc = "Supervisor card"
slot = 16
serial = "RP1000101"
module_type = ModuleBase.MODULE_TYPE_SUPERVISOR
supervisor = MockModule(index, name, desc, module_type, slot, serial)
supervisor.set_midplane_ip()
chassis.module_list.append(supervisor)

#Linecard
index = 1
name = "LINE-CARD0"
desc = "36 port 400G card with 2 ASICs"
slot = 1
serial = "LC1000101"
module_type = ModuleBase.MODULE_TYPE_LINE
asic_list = [("4", "0000:04:00.0"), ("5", "0000:05:00.0")]
module = MockModule(index, name, desc, module_type, slot, serial, asic_list)
module.set_midplane_ip()
chassis.module_list.append(module)

#Run on linecard
module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis,
slot,
module.supervisor_slot)

module_updater.modules_num_update()
module_updater.check_midplane_reachability()
module.set_oper_status(ModuleBase.MODULE_STATUS_ONLINE)
module_updater.module_db_update()
asic_table = module_updater.asic_table
assert len(asic_table.getKeys()) == 2

# Check CHASSIS_ASIC_TABLE
verify_asic("LINE-CARD0|asic4", "0000:04:00.0", name, "0", asic_table)
verify_asic("LINE-CARD0|asic5", "0000:05:00.0", name, "1", asic_table)

# Card goes down and asics should be gone
module.set_oper_status(ModuleBase.MODULE_STATUS_OFFLINE)
module_updater.module_db_update()
assert len(asic_table.getKeys()) == 0

module.set_oper_status(ModuleBase.MODULE_STATUS_ONLINE)
module_updater.module_db_update()
assert len(asic_table.getKeys()) == 2

verify_asic("LINE-CARD0|asic4", "0000:04:00.0", name, "0", asic_table)
verify_asic("LINE-CARD0|asic5", "0000:05:00.0", name, "1", asic_table)

# Check CHASSIS_MODULE_TABLE
verify_asic_in_module_table(name, slot, len(asic_list), module_updater.hostname_table)

def test_signal_handler():
exit_code = 0
Expand Down
Loading