Skip to content

Commit

Permalink
[Services] Restart NAT service upon unexpected critical process exit. (
Browse files Browse the repository at this point in the history
  • Loading branch information
jleveque authored and tiantianlv committed Apr 24, 2020
1 parent dd499ee commit 8689613
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 2 deletions.
2 changes: 2 additions & 0 deletions dockers/docker-nat/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ RUN apt-get update \
COPY ["start.sh", "/usr/bin/"]
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
COPY ["restore_nat_entries.py", "/usr/bin/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]

RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y
RUN rm -rf /debs
Expand Down
2 changes: 2 additions & 0 deletions dockers/docker-nat/critical_processes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
natmgrd
natsyncd
8 changes: 7 additions & 1 deletion dockers/docker-nat/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name nat
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
Expand All @@ -15,7 +21,7 @@ stderr_logfile=syslog
command=/usr/sbin/rsyslogd -n
priority=2
autostart=false
autorestart=false
autorestart=unexpected
stdout_logfile=syslog
stderr_logfile=syslog

Expand Down
4 changes: 4 additions & 0 deletions files/build_templates/nat.service.j2
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ Description=NAT container
Requires=updategraph.service swss.service
After=updategraph.service swss.service syncd.service
Before=ntp-config.service
StartLimitIntervalSec=1200
StartLimitBurst=3

[Service]
User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30

[Install]
WantedBy=multi-user.target swss.service
Expand Down
3 changes: 2 additions & 1 deletion rules/docker-nat.mk
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ $(DOCKER_NAT)_RUN_OPT += --privileged -t
$(DOCKER_NAT)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_NAT)_RUN_OPT += -v /host/warmboot:/var/warmboot

$(DOCKER_NAT)_BASE_IMAGE_FILES += natctl:/usr/bin/natctl
$(DOCKER_NAT)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)

$(DOCKER_NAT)_BASE_IMAGE_FILES += natctl:/usr/bin/natctl

0 comments on commit 8689613

Please sign in to comment.