You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
duthosts = <tests.common.devices.duthosts.DutHosts object at 0x7eff9ed30890>
enum_dut_feature = 'str2-7050cx3-acs-07|teamd'
enum_rand_one_per_hwsku_frontend_hostname = 'str2-7050cx3-acs-07'
tbinfo = {'auto_recover': 'True', 'comment': 'hellogemini', 'conf-name': 'vms17-dual-t0-7050-1', 'duts': ['str2-7050cx3-acs-06', 'str2-7050cx3-acs-07'], ...}
def test_containers_autorestart(duthosts, enum_dut_feature, enum_rand_one_per_hwsku_frontend_hostname, tbinfo):
"""
@summary: Test the auto-restart feature of each container against two scenarios: killing
a non-critical process to verify the container is still running; killing each
critical process to verify the container will be stopped and restarted
"""
dut_name, feature = decode_dut_port_name(enum_dut_feature)
pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and feature != "unknown",
"Skip test on dut host {} (chosen {}) feature {}"
.format(dut_name, enum_rand_one_per_hwsku_frontend_hostname, feature))
duthost = duthosts[dut_name]
> run_test_on_single_container(duthost, feature, tbinfo)
dut_name = 'str2-7050cx3-acs-07'
duthost = <MultiAsicSonicHost> str2-7050cx3-acs-07
duthosts = <tests.common.devices.duthosts.DutHosts object at 0x7eff9ed30890>
enum_dut_feature = 'str2-7050cx3-acs-07|teamd'
enum_rand_one_per_hwsku_frontend_hostname = 'str2-7050cx3-acs-07'
feature = 'teamd'
tbinfo = {'auto_recover': 'True', 'comment': 'hellogemini', 'conf-name': 'vms17-dual-t0-7050-1', 'duts': ['str2-7050cx3-acs-06', 'str2-7050cx3-acs-07'], ...}
autorestart/test_container_autorestart.py:389:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
duthost = <MultiAsicSonicHost> str2-7050cx3-acs-07, container_name = 'teamd'
tbinfo = {'auto_recover': 'True', 'comment': 'hellogemini', 'conf-name': 'vms17-dual-t0-7050-1', 'duts': ['str2-7050cx3-acs-06', 'str2-7050cx3-acs-07'], ...}
def run_test_on_single_container(duthost, container_name, tbinfo):
container_autorestart_states = duthost.get_container_autorestart_states()
disabled_containers = get_disabled_container_list(duthost)
skip_condition = disabled_containers[:]
skip_condition.append("database")
skip_condition.append("acms")
if tbinfo["topo"]["type"] != "t0":
skip_condition.append("radv")
# Skip testing the database container, radv container on T1 devices and containers/services which are disabled
pytest_require(container_name not in skip_condition,
"Skipping test for container {}".format(container_name))
is_running = is_container_running(duthost, container_name)
pytest_assert(is_running, "Container '{}' is not running. Exiting...".format(container_name))
bgp_neighbors = duthost.get_bgp_neighbors()
up_bgp_neighbors = [ k.lower() for k, v in bgp_neighbors.items() if v["state"] == "established" ]
logger.info("Start testing the container '{}'...".format(container_name))
restore_disabled_state = False
if container_autorestart_states[container_name] == "disabled":
logger.info("Change auto-restart state of container '{}' to be 'enabled'".format(container_name))
duthost.shell("sudo config feature autorestart {} enabled".format(container_name))
restore_disabled_state = True
# Currently we select 'rsyslogd' as non-critical processes for testing based on
# the assumption that every container has an 'rsyslogd' process running and it is not
# considered to be a critical process
program_status, program_pid = get_program_info(duthost, container_name, "rsyslogd")
verify_no_autorestart_with_non_critical_process(duthost, container_name, "rsyslogd",
program_status, program_pid)
critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(container_name)
pytest_assert(succeeded, "Failed to get critical group and process lists of container '{}'".format(container_name))
for critical_process in critical_process_list:
# Skip 'dsserve' process since it was not managed by supervisord
# TODO: Should remove the following two lines once the issue was solved in the image.
if container_name == "syncd" and critical_process == "dsserve":
continue
program_status, program_pid = get_program_info(duthost, container_name, critical_process)
verify_autorestart_with_critical_process(duthost, container_name, critical_process,
program_status, program_pid)
# Sleep 20 seconds in order to let the processes come into live after container is restarted.
# We will uncomment the following line once the "extended" mode is added
# time.sleep(20)
# We are currently only testing one critical process, that is why we use 'break'. Once
# we add the "extended" mode, we will remove this statement
break
for critical_group in critical_group_list:
group_program_info = get_group_program_info(duthost, container_name, critical_group)
for program_name in group_program_info:
verify_autorestart_with_critical_process(duthost, container_name, program_name,
group_program_info[program_name][0],
group_program_info[program_name][1])
# We are currently only testing one critical program for each critical group, which is
# why we use 'break' statement. Once we add the "extended" mode, we will remove this
# statement
break
if restore_disabled_state:
logger.info("Restore auto-restart state of container '{}' to 'disabled'".format(container_name))
duthost.shell("sudo config feature autorestart {} disabled".format(container_name))
if not postcheck_critical_processes_status(duthost, container_autorestart_states, up_bgp_neighbors):
config_reload(duthost)
> pytest.fail("Some post check failed after testing feature {}".format(container_name))
E Failed: Some post check failed after testing feature teamd
bgp_neighbors = {'10.0.1.57': {'accepted prefixes': 2, 'admin': u'up', 'capabilities': {'peer restart timer': 300}, 'connections dropp...ccepted prefixes': 2, 'admin': u'up', 'capabilities': {'peer restart timer': 300}, 'connections dropped': 0, ...}, ...}
container_autorestart_states = {'acms': 'disabled', 'bgp': 'disabled', 'dhcp_relay': 'disabled', 'lldp': 'disabled', ...}
container_name = 'teamd'
critical_group_list = []
critical_process = 'teammgrd'
critical_process_list = ['teammgrd', 'teamsyncd', 'tlm_teamd']
disabled_containers = []
duthost = <MultiAsicSonicHost> str2-7050cx3-acs-07
is_running = True
k = 'fc00::1:7e'
program_pid = 20
program_status = 'RUNNING'
restore_disabled_state = True
skip_condition = ['database', 'acms']
succeeded = True
tbinfo = {'auto_recover': 'True', 'comment': 'hellogemini', 'conf-name': 'vms17-dual-t0-7050-1', 'duts': ['str2-7050cx3-acs-06', 'str2-7050cx3-acs-07'], ...}
up_bgp_neighbors = ['10.0.1.63', '10.0.1.61', '10.0.1.59', '10.0.1.57', 'fc00::1:76', 'fc00::1:72', ...]
v = {'accepted prefixes': 2, 'admin': u'up', 'capabilities': {'peer restart timer': 300}, 'connections dropped': 0, ...}
autorestart/test_container_autorestart.py:372: Failed
The text was updated successfully, but these errors were encountered:
Root reason is both these two nightly tests failed on the check_bgp_session_state(...) which will check whether the BGP sessions are in established state or not. From the logs, the state of current BGP sessions are in active state not in the established state.
The text was updated successfully, but these errors were encountered: