From e334536f87194af33000988ba685de36cefe7192 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:01:15 -0800 Subject: [PATCH 01/35] [Event_listener] modify event listener such that it will read the flag of auto-restart feature from database and then decide whether to enable/disable this feature. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 26 +++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 8d1735cd2b0c..b634c997218f 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -4,6 +4,8 @@ import os import signal import sys import syslog +import swsssdk +import getopt from supervisor import childutils @@ -11,7 +13,17 @@ from supervisor import childutils # supervisor.conf file), one per line CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' -def main(): +# This table in databse contains the features for container and each +# feature for a row will be configured a state or number. +CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE' + +def main(argv): + container_name = '' + opts, args = getopt.getopt(argv, "c:", ["container-name"]) + for opt, arg in opts: + if opt in ("-c", "--container-name"): + container_name = arg + # Read the list of critical processes from a file with open(CRITICAL_PROCESSES_FILE, 'r') as f: critical_processes = [line.rstrip('\n') for line in f] @@ -35,12 +47,18 @@ def main(): processname = payload_headers['processname'] groupname = payload_headers['groupname'] - # If a critical process exited unexpectedly, terminate supervisor - if expected == 0 and processname in critical_processes or groupname in critical_processes: + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + docker_config = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) + if docker_config and docker_config.has_key(container_name): + restart_feature = docker_config[container_name].get('auto_restart') + + # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor + if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." msg = MSG_FORMAT_STR.format(payload_headers['processname']) syslog.syslog(syslog.LOG_INFO, msg) os.kill(os.getppid(), signal.SIGTERM) if __name__ == "__main__": - main() + main(sys.argv[1:]) From d0bf16b9d9bd82e62a1078bd41e831b5fc8fc0fd Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:21:23 -0800 Subject: [PATCH 02/35] [Docker-teamd] Modify supervisord config file to pass the container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-teamd/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 3a420e0fcdcf..b5929fce2f16 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name teamd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 59537400c4bc87c688a104d916c6518d7313ed1a Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:23:16 -0800 Subject: [PATCH 03/35] [Docker-telemetry] Modify the supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index e1346fe7db4e..54f4c5b2348d 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry events=PROCESS_STATE_EXITED autostart=true autorestart=false From 5d007b114a8179e0c8188678559813c39885d95e Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:24:11 -0800 Subject: [PATCH 04/35] [Docker-sflow] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-sflow/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-sflow/supervisord.conf b/dockers/docker-sflow/supervisord.conf index 50986f197d88..8eb1bdc05e57 100644 --- a/dockers/docker-sflow/supervisord.conf +++ b/dockers/docker-sflow/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name sflow events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 4439c4ac0e6b197161d88bed219673eb4e55c84f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:25:10 -0800 Subject: [PATCH 05/35] [Docker-lldp] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-lldp-sv2/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-lldp-sv2/supervisord.conf b/dockers/docker-lldp-sv2/supervisord.conf index 3f3f5beabc8d..73ff52f4420e 100644 --- a/dockers/docker-lldp-sv2/supervisord.conf +++ b/dockers/docker-lldp-sv2/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name lldp events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From e2ef60680fa2fd4126048457eef4d3fe5a6a667d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:26:20 -0800 Subject: [PATCH 06/35] [Docker-swss] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-orchagent/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 9ae2776f6d26..6b21d73f3c81 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name swss events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 8a963d6215d906a72adebd45788890486900c5a7 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:28:41 -0800 Subject: [PATCH 07/35] [Docker-pmon] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index 9a2414c30d05..13ae0e767ab2 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name pmon events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From f460268929774f2fbfa124bba3ff3bb714262c24 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:29:38 -0800 Subject: [PATCH 08/35] [Docker-radv] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- .../docker-router-advertiser.supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf index 4ea84ab11c92..bf9320acc776 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-script] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name radv events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From e1580dd76994ea902c0dba4901dbb680dd243869 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:31:01 -0800 Subject: [PATCH 09/35] [Docker-dhcp-relay] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 94fdbfdaff2f..d52400480775 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 091a550b4475a9937058bc2e8cc445741f79a736 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:31:56 -0800 Subject: [PATCH 10/35] [Docker-snmp] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- dockers/docker-snmp-sv2/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/docker-snmp-sv2/supervisord.conf b/dockers/docker-snmp-sv2/supervisord.conf index 7fd16eec5bbe..992292330552 100644 --- a/dockers/docker-snmp-sv2/supervisord.conf +++ b/dockers/docker-snmp-sv2/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name snmp events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From ec4d6ee1088761dcc4af5bab60ec13d0038f147d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:37:56 -0800 Subject: [PATCH 11/35] [Docker-bgp] Add an event listener, critical process list for container bgp and event listener will monitor these critical processes and if one of them exited, the event listener will restart this container based on the configuration flag set in database. Signed-off-by: Yong Zhao --- dockers/docker-fpm-frr/Dockerfile.j2 | 2 ++ dockers/docker-fpm-frr/critical_processes | 5 +++++ dockers/docker-fpm-frr/supervisord.conf | 6 ++++++ 3 files changed, 13 insertions(+) create mode 100644 dockers/docker-fpm-frr/critical_processes diff --git a/dockers/docker-fpm-frr/Dockerfile.j2 b/dockers/docker-fpm-frr/Dockerfile.j2 index 418676c18329..b092e42e19fd 100644 --- a/dockers/docker-fpm-frr/Dockerfile.j2 +++ b/dockers/docker-fpm-frr/Dockerfile.j2 @@ -50,6 +50,8 @@ COPY ["snmp.conf", "/etc/snmp/frr.conf"] COPY ["TSA", "/usr/bin/TSA"] COPY ["TSB", "/usr/bin/TSB"] COPY ["TSC", "/usr/bin/TSC"] +COPY ["files/supervisor-proc-exit-listener","/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] RUN chmod a+x /usr/bin/TSA && \ chmod a+x /usr/bin/TSB && \ chmod a+x /usr/bin/TSC diff --git a/dockers/docker-fpm-frr/critical_processes b/dockers/docker-fpm-frr/critical_processes new file mode 100644 index 000000000000..8ea09e1bb538 --- /dev/null +++ b/dockers/docker-fpm-frr/critical_processes @@ -0,0 +1,5 @@ +zebra +staticd +bgpd +fpmsyncd +bgpcfgd diff --git a/dockers/docker-fpm-frr/supervisord.conf b/dockers/docker-fpm-frr/supervisord.conf index fe0ce6eda1a4..a6c8013b3f11 100644 --- a/dockers/docker-fpm-frr/supervisord.conf +++ b/dockers/docker-fpm-frr/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name bgp +event=PROCESS_STAT_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 From 816190348eaad150c1f6217cfa912066bbf29d50 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:41:02 -0800 Subject: [PATCH 12/35] [Docker-bgp] Set the maximum restart times to 3 during 20 mins. Signed-off-by: Yong Zhao --- files/build_templates/bgp.service.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/build_templates/bgp.service.j2 b/files/build_templates/bgp.service.j2 index 7200a0e3ecf2..fdf9d9c78c04 100644 --- a/files/build_templates/bgp.service.j2 +++ b/files/build_templates/bgp.service.j2 @@ -3,12 +3,16 @@ Description=BGP container Requires=updategraph.service After=updategraph.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target From 909798852b932aae8c6487b86a5062962c019e40 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:43:17 -0800 Subject: [PATCH 13/35] [Docker-bgp] Add a macro in docker's makefile. Signed-off-by: Yong Zhao --- rules/docker-fpm-frr.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/rules/docker-fpm-frr.mk b/rules/docker-fpm-frr.mk index e833d360bec2..4f0b52269587 100644 --- a/rules/docker-fpm-frr.mk +++ b/rules/docker-fpm-frr.mk @@ -31,3 +31,4 @@ $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += TSA:/usr/bin/TSA $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += TSB:/usr/bin/TSB $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += TSC:/usr/bin/TSC $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += monit_bgp:/etc/monit/conf.d +$(DOCKER_FPM_FRR)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) From 05170f3eaf941761067c2936de782271768ceecd Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:47:57 -0800 Subject: [PATCH 14/35] [Docker-syncd-barefoot] Modify the supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- platform/barefoot/docker-syncd-bfn/supervisord.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/platform/barefoot/docker-syncd-bfn/supervisord.conf b/platform/barefoot/docker-syncd-bfn/supervisord.conf index 1e015fef931f..1744d6ffefb5 100644 --- a/platform/barefoot/docker-syncd-bfn/supervisord.conf +++ b/platform/barefoot/docker-syncd-bfn/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 From 0d52f4ee313fb83d0d9926504ff717c13e7ec6af Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:49:02 -0800 Subject: [PATCH 15/35] [Docker-syncd-broadcom] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- platform/broadcom/docker-syncd-brcm/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index cd6712acbf22..3fa8febb85d8 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 4b9b0091204246710c9ee870dba91759b01bde89 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:50:12 -0800 Subject: [PATCH 16/35] [Docker-syncd-cavium] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- platform/cavium/docker-syncd-cavm/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/cavium/docker-syncd-cavm/supervisord.conf b/platform/cavium/docker-syncd-cavm/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/cavium/docker-syncd-cavm/supervisord.conf +++ b/platform/cavium/docker-syncd-cavm/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 81cb12be01af5fe287c997f68d49cfc4abd0844b Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:51:16 -0800 Subject: [PATCH 17/35] [Docker-syncd-centec] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- platform/centec/docker-syncd-centec/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/centec/docker-syncd-centec/supervisord.conf b/platform/centec/docker-syncd-centec/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/centec/docker-syncd-centec/supervisord.conf +++ b/platform/centec/docker-syncd-centec/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 3b84e80832ffefef4309250608a6e87418bb7a31 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:52:27 -0800 Subject: [PATCH 18/35] [Docker-syncd-marvell] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- platform/marvell/docker-syncd-mrvl/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index aea4d45b9afd..43de2426f981 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 44970a097d2ed898fa1db1c61e0bd1332156a33d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:53:57 -0800 Subject: [PATCH 19/35] [Docker-syncd-marvell-arm64] Modify supervisord config file to pass container name to event listener. Signed-off-by: Yong Zhao --- platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf index 1af5d70a1d0c..b11e045fac7e 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 From e73a86d1b2dd89d16fe064696cf592744fb76a31 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:55:15 -0800 Subject: [PATCH 20/35] [Docker-syncd-marvell-armhf] Modify supervisord config file to pass container name as parameter to event listener. Signed-off-by: Yong Zhao --- platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf index 1af5d70a1d0c..b11e045fac7e 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 From bcf43c70397858bebd62ee2d46432f0db00bcd9e Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:56:10 -0800 Subject: [PATCH 21/35] [Docker-syncd-mellanox] Modify supervisord config file to pass container name as parameter to event listener. Signed-off-by: Yong Zhao --- platform/mellanox/docker-syncd-mlnx/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From cbebfe05dd4671e75ff765ae286a4ae8fa54e7c4 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 15:57:10 -0800 Subject: [PATCH 22/35] [Docker-syncd-nephos] Modify the supervisord config file to pass container name as parameter to event listener. Signed-off-by: Yong Zhao --- platform/nephos/docker-syncd-nephos/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From b8a2f393cf368b9bbfd47703f0b3d24c056b499f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 27 Jan 2020 16:28:49 -0800 Subject: [PATCH 23/35] [Event listener] Add '=' for getopt parameters. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index b634c997218f..60c87de73671 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -19,7 +19,7 @@ CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE' def main(argv): container_name = '' - opts, args = getopt.getopt(argv, "c:", ["container-name"]) + opts, args = getopt.getopt(argv, "c:", ["container-name="]) for opt, arg in opts: if opt in ("-c", "--container-name"): container_name = arg From 5dcb1ece00ecd3eb6d77faec5718e958c7af91c0 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 28 Jan 2020 09:51:15 -0800 Subject: [PATCH 24/35] [Event-listener] Reorganize the import commands in event listener. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 60c87de73671..14eeae6bca0a 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -1,11 +1,12 @@ #!/usr/bin/env python +import getopt import os import signal import sys import syslog + import swsssdk -import getopt from supervisor import childutils From b30a5a25afd72cdf580df87ab9561b50583d6656 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 28 Jan 2020 09:56:47 -0800 Subject: [PATCH 25/35] [Docker-bgp] Delete all the relevant files of auto-restart feature. Signed-off-by: Yong Zhao --- dockers/docker-fpm-frr/Dockerfile.j2 | 2 -- dockers/docker-fpm-frr/critical_processes | 5 ----- dockers/docker-fpm-frr/supervisord.conf | 6 ------ files/build_templates/bgp.service.j2 | 4 ---- rules/docker-fpm-frr.mk | 1 - 5 files changed, 18 deletions(-) delete mode 100644 dockers/docker-fpm-frr/critical_processes diff --git a/dockers/docker-fpm-frr/Dockerfile.j2 b/dockers/docker-fpm-frr/Dockerfile.j2 index b092e42e19fd..418676c18329 100644 --- a/dockers/docker-fpm-frr/Dockerfile.j2 +++ b/dockers/docker-fpm-frr/Dockerfile.j2 @@ -50,8 +50,6 @@ COPY ["snmp.conf", "/etc/snmp/frr.conf"] COPY ["TSA", "/usr/bin/TSA"] COPY ["TSB", "/usr/bin/TSB"] COPY ["TSC", "/usr/bin/TSC"] -COPY ["files/supervisor-proc-exit-listener","/usr/bin"] -COPY ["critical_processes", "/etc/supervisor"] RUN chmod a+x /usr/bin/TSA && \ chmod a+x /usr/bin/TSB && \ chmod a+x /usr/bin/TSC diff --git a/dockers/docker-fpm-frr/critical_processes b/dockers/docker-fpm-frr/critical_processes deleted file mode 100644 index 8ea09e1bb538..000000000000 --- a/dockers/docker-fpm-frr/critical_processes +++ /dev/null @@ -1,5 +0,0 @@ -zebra -staticd -bgpd -fpmsyncd -bgpcfgd diff --git a/dockers/docker-fpm-frr/supervisord.conf b/dockers/docker-fpm-frr/supervisord.conf index a6c8013b3f11..fe0ce6eda1a4 100644 --- a/dockers/docker-fpm-frr/supervisord.conf +++ b/dockers/docker-fpm-frr/supervisord.conf @@ -3,12 +3,6 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true -[eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -event=PROCESS_STAT_EXITED -autostart=true -autorestart=unexpected - [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/files/build_templates/bgp.service.j2 b/files/build_templates/bgp.service.j2 index fdf9d9c78c04..7200a0e3ecf2 100644 --- a/files/build_templates/bgp.service.j2 +++ b/files/build_templates/bgp.service.j2 @@ -3,16 +3,12 @@ Description=BGP container Requires=updategraph.service After=updategraph.service Before=ntp-config.service -StartLimitIntervalSec=1200 -StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop -Restart=always -RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/rules/docker-fpm-frr.mk b/rules/docker-fpm-frr.mk index 4f0b52269587..e833d360bec2 100644 --- a/rules/docker-fpm-frr.mk +++ b/rules/docker-fpm-frr.mk @@ -31,4 +31,3 @@ $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += TSA:/usr/bin/TSA $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += TSB:/usr/bin/TSB $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += TSC:/usr/bin/TSC $(DOCKER_FPM_FRR)_BASE_IMAGE_FILES += monit_bgp:/etc/monit/conf.d -$(DOCKER_FPM_FRR)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) From 51cdf73a03aed051b8de9cb1421c7f437a144b9d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 28 Jan 2020 11:38:05 -0800 Subject: [PATCH 26/35] [Docker-dhcp-relay] Pass container name as paramter in sampled generated configuration file. Signed-off-by: Yong Zhao --- .../tests/sample_output/docker-dhcp-relay.supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf index a29982a646f4..fde1d6c7714d 100644 --- a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected From 6a2932b0b8a244d7404b96508c5a02e6d3011cdd Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 11:00:00 -0800 Subject: [PATCH 27/35] [Event-listener] Add the logic to handle if the container name is not provided or container name is invalid. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 14eeae6bca0a..e549432e9519 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -19,12 +19,16 @@ CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE' def main(argv): - container_name = '' + container_name = None opts, args = getopt.getopt(argv, "c:", ["container-name="]) for opt, arg in opts: if opt in ("-c", "--container-name"): container_name = arg + if not container_name: + syslog.syslog(syslog.LOG_ERROR, "Container name not specified. Exiting...") + sys.exit(0) + # Read the list of critical processes from a file with open(CRITICAL_PROCESSES_FILE, 'r') as f: critical_processes = [line.rstrip('\n') for line in f] @@ -54,6 +58,10 @@ def main(argv): if docker_config and docker_config.has_key(container_name): restart_feature = docker_config[container_name].get('auto_restart') + if not restart_feature: + syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting".format(container_name)) + sys.exit(0) + # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." From 2457dbf58d0f3db4d3395eba1875d29ccbb3af17 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 11:53:48 -0800 Subject: [PATCH 28/35] [Event-listener] Change the exit code. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index e549432e9519..6965d3c74a35 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -27,7 +27,7 @@ def main(argv): if not container_name: syslog.syslog(syslog.LOG_ERROR, "Container name not specified. Exiting...") - sys.exit(0) + sys.exit(1) # Read the list of critical processes from a file with open(CRITICAL_PROCESSES_FILE, 'r') as f: @@ -57,10 +57,9 @@ def main(argv): docker_config = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) if docker_config and docker_config.has_key(container_name): restart_feature = docker_config[container_name].get('auto_restart') - - if not restart_feature: - syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting".format(container_name)) - sys.exit(0) + if not restart_feature: + syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting".format(container_name)) + sys.exit(1) # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): From 5e6ab1f3f081621a15297332d600c3ae16ad9ffc Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 12:04:58 -0800 Subject: [PATCH 29/35] [Event-listener] Change the exit code from 1 to 2. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 6965d3c74a35..d4eef043d2a9 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -27,7 +27,7 @@ def main(argv): if not container_name: syslog.syslog(syslog.LOG_ERROR, "Container name not specified. Exiting...") - sys.exit(1) + sys.exit(2) # Read the list of critical processes from a file with open(CRITICAL_PROCESSES_FILE, 'r') as f: @@ -59,7 +59,7 @@ def main(argv): restart_feature = docker_config[container_name].get('auto_restart') if not restart_feature: syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting".format(container_name)) - sys.exit(1) + sys.exit(2) # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): From 693fd78d02ce010f17dc4aaf3e28556d783b620b Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 13:46:02 -0800 Subject: [PATCH 30/35] [Event-listener] Change the exit code from 2 to 1 in line 30. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index d4eef043d2a9..57341ca26daf 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -27,7 +27,7 @@ def main(argv): if not container_name: syslog.syslog(syslog.LOG_ERROR, "Container name not specified. Exiting...") - sys.exit(2) + sys.exit(1) # Read the list of critical processes from a file with open(CRITICAL_PROCESSES_FILE, 'r') as f: From 324dd4f9544832f4d9a1a894a68ec77b727e79df Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 13:51:27 -0800 Subject: [PATCH 31/35] [Event-listener] Add three periods in the line 61. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 57341ca26daf..1bf46c1daba8 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -58,7 +58,7 @@ def main(argv): if docker_config and docker_config.has_key(container_name): restart_feature = docker_config[container_name].get('auto_restart') if not restart_feature: - syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting".format(container_name)) + syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) sys.exit(2) # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor From 8201389f6f21efa8175fb988a201ad138af54b32 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 14:57:40 -0800 Subject: [PATCH 32/35] [Event-listener] Add logic to decide the invalid container name. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 1bf46c1daba8..8b5b47d08a4a 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -55,11 +55,15 @@ def main(argv): config_db = swsssdk.ConfigDBConnector() config_db.connect() docker_config = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) - if docker_config and docker_config.has_key(container_name): - restart_feature = docker_config[container_name].get('auto_restart') - if not restart_feature: - syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) - sys.exit(2) + if docker_config: + if docker_config.has_key(container_name): + restart_feature = docker_config[container_name].get('auto_restart') + if not restart_feature: + syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) + sys.exit(2) + else: + syslog.syslog(syslog.LOG_ERROR, "Unable to find container '{}' in feature table. Exiting...".format(container_name)) + sys.exit(3) # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): From 3df1a3db1494498496e71d8092184dbe29d21a4b Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 29 Jan 2020 15:04:33 -0800 Subject: [PATCH 33/35] [Event-listener] Change the logic to avoid invalid container name. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 28 +++++++++------------ 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 8b5b47d08a4a..39e8e2971a8a 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -55,22 +55,18 @@ def main(argv): config_db = swsssdk.ConfigDBConnector() config_db.connect() docker_config = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) - if docker_config: - if docker_config.has_key(container_name): - restart_feature = docker_config[container_name].get('auto_restart') - if not restart_feature: - syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) - sys.exit(2) - else: - syslog.syslog(syslog.LOG_ERROR, "Unable to find container '{}' in feature table. Exiting...".format(container_name)) - sys.exit(3) - - # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor - if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): - MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." - msg = MSG_FORMAT_STR.format(payload_headers['processname']) - syslog.syslog(syslog.LOG_INFO, msg) - os.kill(os.getppid(), signal.SIGTERM) + if docker_config and docker_config.has_key(container_name): + restart_feature = docker_config[container_name].get('auto_restart') + if not restart_feature: + syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) + sys.exit(2) + + # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor + if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): + MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." + msg = MSG_FORMAT_STR.format(payload_headers['processname']) + syslog.syslog(syslog.LOG_INFO, msg) + os.kill(os.getppid(), signal.SIGTERM) if __name__ == "__main__": main(sys.argv[1:]) From 0833a03114aac311efe24f3fcd78df28ac5b41fa Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Thu, 30 Jan 2020 14:38:11 -0800 Subject: [PATCH 34/35] [Event-listener] Add the logic to handle the corner caseis if the table is not found in config db and container name is not found in the table. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 34 +++++++++++++-------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 39e8e2971a8a..d89d9e5bfb33 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -54,19 +54,27 @@ def main(argv): config_db = swsssdk.ConfigDBConnector() config_db.connect() - docker_config = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) - if docker_config and docker_config.has_key(container_name): - restart_feature = docker_config[container_name].get('auto_restart') - if not restart_feature: - syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) - sys.exit(2) - - # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor - if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): - MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." - msg = MSG_FORMAT_STR.format(payload_headers['processname']) - syslog.syslog(syslog.LOG_INFO, msg) - os.kill(os.getppid(), signal.SIGTERM) + container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) + if not container_features_table: + syslog.syslog(syslog.LOG_ERROR, "Unable to retrieve container features table from Config DB. Exiting...") + sys.exit(2) + + if not container_features_table.has_key(container_name): + syslog.syslog(syslog.LOG_ERROR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) + sys.exit(3) + + restart_feature = container_features_table[container_name].get('auto_restart') + if not restart_feature: + syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) + sys.exit(4) + + # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor + if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): + MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." + msg = MSG_FORMAT_STR.format(payload_headers['processname']) + syslog.syslog(syslog.LOG_INFO, msg) + os.kill(os.getppid(), signal.SIGTERM) + if __name__ == "__main__": main(sys.argv[1:]) From 56ed2622b9c7c342f57a32bfe82951f90921ef94 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 3 Feb 2020 11:36:29 -0800 Subject: [PATCH 35/35] [Event-listener] Use the syslog.LOG_ERR to write message into syslog file. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index d89d9e5bfb33..cf26d5383074 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -26,7 +26,7 @@ def main(argv): container_name = arg if not container_name: - syslog.syslog(syslog.LOG_ERROR, "Container name not specified. Exiting...") + syslog.syslog(syslog.LOG_ERR, "Container name not specified. Exiting...") sys.exit(1) # Read the list of critical processes from a file @@ -56,16 +56,16 @@ def main(argv): config_db.connect() container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) if not container_features_table: - syslog.syslog(syslog.LOG_ERROR, "Unable to retrieve container features table from Config DB. Exiting...") + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...") sys.exit(2) if not container_features_table.has_key(container_name): - syslog.syslog(syslog.LOG_ERROR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) sys.exit(3) restart_feature = container_features_table[container_name].get('auto_restart') if not restart_feature: - syslog.syslog(syslog.LOG_ERROR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) + syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) sys.exit(4) # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor