Skip to content

Commit

Permalink
[teamd]: Increase wait timeout for teamd docker stop to clean Port ch…
Browse files Browse the repository at this point in the history
…annels. (#6537)

The Portchannels were not getting cleaned up as the cleanup activity was taking more than 10 secs which is default docker timeout after which a SIGKILL will be send.
Fixes #6199
To check if it works out for this issue in 201911 ? #6503

This issue is significantly seen in master branch compared to 201911 because the Portchannel cleanup takes more time in master. Test on a DUT with 8 Port Channels.

master

    admin@str-s6000-acs-8:~$ time sudo systemctl stop teamd
    real    0m15.599s
    user    0m0.061s
    sys     0m0.038s
Sonic 201911.v58

    admin@str-s6000-acs-8:~$ time sudo systemctl stop teamd
    real    0m5.541s
    user    0m0.020s
    sys     0m0.028s
  • Loading branch information
judyjoseph authored and daall committed Feb 6, 2021
1 parent eccff4b commit 0c17839
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 15 deletions.
3 changes: 3 additions & 0 deletions files/build_templates/docker_image_ctl.j2
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,9 @@ stop() {
if [ "$DEV" ]; then
ip netns delete "$NET_NS"
fi
{%- elif docker_container_name == "teamd" %}
# Longer timeout of 60 sec to wait for Portchannels to be cleaned.
/usr/local/bin/container stop -t 60 $DOCKERNAME
{%- else %}
/usr/local/bin/container stop $DOCKERNAME
{%- endif %}
Expand Down
34 changes: 19 additions & 15 deletions src/sonic-ctrmgrd/ctrmgr/container
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ def read_state(feature):
[(CURRENT_OWNER, "none"), (REMOTE_STATE, "none"), (CONTAINER_ID, "")])


def docker_action(action, feature):
def docker_action(action, feature, **kwargs):
""" Execute docker action """
try:
client = docker.from_env()
container = client.containers.get(feature)
getattr(container, action)()
getattr(container, action)(**kwargs)
syslog.syslog(syslog.LOG_INFO, "docker cmd: {} for {}".format(action, feature))
return 0

Expand Down Expand Up @@ -161,7 +161,7 @@ def container_id(feature):
return data.get(CONTAINER_ID, feature)


def container_start(feature):
def container_start(feature, **kwargs):
"""
Starts a container for given feature.
Expand Down Expand Up @@ -219,15 +219,15 @@ def container_start(feature):
update_data(feature, data)

if (start_val & START_LOCAL):
ret = docker_action("start", feature)
ret = docker_action("start", feature, **kwargs)

if (start_val & START_KUBE):
set_label(feature, True)
debug_msg("END")
return ret


def container_stop(feature):
def container_stop(feature, **kwargs):
"""
Stops the running container for this feature.
Expand Down Expand Up @@ -257,7 +257,7 @@ def container_stop(feature):
set_label(feature, False)

if docker_id:
docker_action("stop", docker_id)
docker_action("stop", docker_id, **kwargs)
else:
syslog.syslog(
syslog.LOG_ERR if current_owner != "none" else syslog.LOG_INFO,
Expand Down Expand Up @@ -289,7 +289,7 @@ def container_stop(feature):
debug_msg("END")


def container_kill(feature):
def container_kill(feature, **kwargs):
"""
Kills the running container for this feature.
Expand All @@ -314,7 +314,7 @@ def container_kill(feature):
set_label(feature, False)

if docker_id:
docker_action("kill", docker_id)
docker_action("kill", docker_id, **kwargs)

else:
syslog.syslog(
Expand All @@ -325,7 +325,7 @@ def container_kill(feature):
debug_msg("END")


def container_wait(feature):
def container_wait(feature, **kwargs):
"""
Waits on the running container for this feature.
Expand Down Expand Up @@ -378,30 +378,34 @@ def container_wait(feature):
format(feature))
else:
debug_msg("END -- transitioning to docker wait")
docker_action("wait", docker_id)
docker_action("wait", docker_id, **kwargs)


def main():
parser=argparse.ArgumentParser(description="container commands for start/stop/wait/kill/id")
parser.add_argument("action", choices=["start", "stop", "wait", "kill", "id"])
parser.add_argument('-t', '--timeout', type=int, help='container action timeout value', default=None)
parser.add_argument("name")

args = parser.parse_args()
kwargs = {}

if args.action == "start":
container_start(args.name)
container_start(args.name, **kwargs)

elif args.action == "stop":
container_stop(args.name)
if args.timeout is not None:
kwargs['timeout'] = args.timeout
container_stop(args.name, **kwargs)

elif args.action == "kill":
container_kill(args.name)
container_kill(args.name, **kwargs)

elif args.action == "wait":
container_wait(args.name)
container_wait(args.name, **kwargs)

elif args.action == "id":
id = container_id(args.name)
id = container_id(args.name, **kwargs)
print(id)


Expand Down

0 comments on commit 0c17839

Please sign in to comment.