Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

container_checker on supervisor should check containers based on asic presence #11442

Merged
merged 5 commits into from
Aug 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions files/image_config/monit/container_checker
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import swsssdk
from sonic_py_common import multi_asic, device_info
from swsscommon import swsscommon


def get_expected_running_containers():
"""
@summary: This function will get the expected running & always-enabled containers by following the rule:
Expand All @@ -41,7 +40,19 @@ def get_expected_running_containers():

expected_running_containers = set()
always_running_containers = set()


# Get current asic presence list. For multi_asic system, multi instance containers
# should be checked only for asics present.
asics_id_presence = multi_asic.get_asic_presence_list()

# Some services may run all the instances irrespective of asic presence.
# Add those to exception list.
anamehra marked this conversation as resolved.
Show resolved Hide resolved
# database service: Currently services have dependency on all database services to
# be up irrespective of asic presence.
# bgp service: Currently bgp runs all instances. Once this is fixed to be config driven,
# it will be removed from exception list.
run_all_instance_list = ['database', 'bgp']

for container_name in feature_table.keys():
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
if multi_asic.is_multi_asic():
Expand All @@ -50,7 +61,8 @@ def get_expected_running_containers():
if feature_table[container_name]["has_per_asic_scope"] == "True":
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
expected_running_containers.add(container_name + str(asic_id))
if asic_id in asics_id_presence or container_name in run_all_instance_list:
expected_running_containers.add(container_name + str(asic_id))
else:
expected_running_containers.add(container_name)
if feature_table[container_name]["state"] == 'always_enabled':
Expand All @@ -60,9 +72,11 @@ def get_expected_running_containers():
if feature_table[container_name]["has_per_asic_scope"] == "True":
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
always_running_containers.add(container_name + str(asic_id))
if asic_id in asics_id_presence or container_name in run_all_instance_list:
always_running_containers.add(container_name + str(asic_id))
else:
always_running_containers.add(container_name)

if device_info.is_supervisor():
always_running_containers.add("database-chassis")
return expected_running_containers, always_running_containers
Expand Down
31 changes: 30 additions & 1 deletion src/sonic-py-common/sonic_py_common/multi_asic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
NEIGH_DEVICE_METADATA_CFG_DB_TABLE = 'DEVICE_NEIGHBOR_METADATA'
DEFAULT_NAMESPACE = ''
PORT_ROLE = 'role'

CHASSIS_STATE_DB='CHASSIS_STATE_DB'
CHASSIS_ASIC_INFO_TABLE='CHASSIS_ASIC_TABLE'

# Dictionary to cache config_db connection handle per namespace
# to prevent duplicate connections from being opened
Expand Down Expand Up @@ -480,3 +481,31 @@ def validate_namespace(namespace):
return True
else:
return False

def get_asic_presence_list():
"""
@summary: This function will get the asic presence list. On Supervisor, the list includes only the asics
for inserted and detected fabric cards. For non-supervisor cards, e.g. line card, the list should
contain all supported asics by the card. The function gets the asic list from CHASSIS_ASIC_TABLE from
CHASSIS_STATE_DB. The function assumes that the first N asic ids (asic0 to asic(N-1)) in
CHASSIS_ASIC_TABLE belongs to the supervisor, where N is the max number of asics supported by the Chassis
@return: List of asics present
"""
asics_list = []
if is_multi_asic():
if not is_supervisor():
# This is not supervisor, all asics should be present. Assuming that asics
# are not removable entity on Line Cards. Add all asics, 0 - num_asics to the list.
asics_list = list(range(0, get_num_asics()))
else:
# This is supervisor card. Some fabric cards may not be inserted.
# Get asic list from CHASSIS_ASIC_TABLE which lists only the asics
# present based on Fabric card detection by the platform.
db = swsscommon.DBConnector(CHASSIS_STATE_DB, 0, True)
asic_table = swsscommon.Table(db, CHASSIS_ASIC_INFO_TABLE)
if asic_table:
asics_presence_list = list(asic_table.getKeys())
for asic in asics_presence_list:
# asic is asid id: asic0, asic1.... asicN. Get the numeric value.
asics_list.append(int(get_asic_id_from_name(asic)))
return asics_list