From e797dc4253de42ff4ab809153ab3acebf1fec6b8 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 28 Feb 2020 15:03:25 -0800 Subject: [PATCH 1/3] [Services] Add a mechanism for nat container to enable auto-restart it if one of its critical processes crashed or exited unexpectedly. Signed-off-by: Yong Zhao --- dockers/docker-nat/Dockerfile.j2 | 2 ++ dockers/docker-nat/critical_processes | 2 ++ dockers/docker-nat/supervisord.conf | 6 ++++++ files/build_templates/nat.service.j2 | 4 ++++ rules/docker-nat.mk | 2 +- 5 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 dockers/docker-nat/critical_processes diff --git a/dockers/docker-nat/Dockerfile.j2 b/dockers/docker-nat/Dockerfile.j2 index 3cfbd99e95e1..a74147cc26fd 100644 --- a/dockers/docker-nat/Dockerfile.j2 +++ b/dockers/docker-nat/Dockerfile.j2 @@ -38,6 +38,8 @@ RUN apt-get update \ COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] COPY ["restore_nat_entries.py", "/usr/bin/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y RUN rm -rf /debs diff --git a/dockers/docker-nat/critical_processes b/dockers/docker-nat/critical_processes new file mode 100644 index 000000000000..d442976143f1 --- /dev/null +++ b/dockers/docker-nat/critical_processes @@ -0,0 +1,2 @@ +natmgrd +natsyncd diff --git a/dockers/docker-nat/supervisord.conf b/dockers/docker-nat/supervisord.conf index bb42d23fe355..f83fad272f64 100644 --- a/dockers/docker-nat/supervisord.conf +++ b/dockers/docker-nat/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name nat +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/files/build_templates/nat.service.j2 b/files/build_templates/nat.service.j2 index 2e3e17439ef7..79a56f67ca89 100644 --- a/files/build_templates/nat.service.j2 +++ b/files/build_templates/nat.service.j2 @@ -3,12 +3,16 @@ Description=NAT container Requires=updategraph.service swss.service After=updategraph.service swss.service syncd.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target swss.service diff --git a/rules/docker-nat.mk b/rules/docker-nat.mk index dcccc24ba2ff..51da6203f39b 100644 --- a/rules/docker-nat.mk +++ b/rules/docker-nat.mk @@ -31,4 +31,4 @@ $(DOCKER_NAT)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_NAT)_RUN_OPT += -v /host/warmboot:/var/warmboot $(DOCKER_NAT)_BASE_IMAGE_FILES += natctl:/usr/bin/natctl - +$(DOCKER_NAT)_BASE_IMAGE_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) From 5d29a26a35abefb908b022243062a91b4231833f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 28 Feb 2020 15:10:44 -0800 Subject: [PATCH 2/3] [Services] Change the rsyslogd to unexpected state if it restarted in supervisord.conf file. Signed-off-by: Yong Zhao --- dockers/docker-nat/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-nat/supervisord.conf b/dockers/docker-nat/supervisord.conf index f83fad272f64..839d6f59ab3c 100644 --- a/dockers/docker-nat/supervisord.conf +++ b/dockers/docker-nat/supervisord.conf @@ -21,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog From b672963eebb7762515a1f852af4ebc1df0568616 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 3 Mar 2020 16:52:46 -0800 Subject: [PATCH 3/3] [Services] Fix an error that the macro of event listener script should be added into container image. Signed-off-by: Yong Zhao --- rules/docker-nat.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rules/docker-nat.mk b/rules/docker-nat.mk index 51da6203f39b..eb6bd16ccd46 100644 --- a/rules/docker-nat.mk +++ b/rules/docker-nat.mk @@ -30,5 +30,6 @@ $(DOCKER_NAT)_RUN_OPT += --privileged -t $(DOCKER_NAT)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_NAT)_RUN_OPT += -v /host/warmboot:/var/warmboot +$(DOCKER_NAT)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) + $(DOCKER_NAT)_BASE_IMAGE_FILES += natctl:/usr/bin/natctl -$(DOCKER_NAT)_BASE_IMAGE_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)