diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 94fdbfdaff2f..d52400480775 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-lldp-sv2/supervisord.conf b/dockers/docker-lldp-sv2/supervisord.conf index 3f3f5beabc8d..73ff52f4420e 100644 --- a/dockers/docker-lldp-sv2/supervisord.conf +++ b/dockers/docker-lldp-sv2/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name lldp events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 9ae2776f6d26..6b21d73f3c81 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name swss events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index 9a2414c30d05..13ae0e767ab2 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name pmon events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf index 4ea84ab11c92..bf9320acc776 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-script] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name radv events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-sflow/supervisord.conf b/dockers/docker-sflow/supervisord.conf index 50986f197d88..8eb1bdc05e57 100644 --- a/dockers/docker-sflow/supervisord.conf +++ b/dockers/docker-sflow/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name sflow events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-snmp-sv2/supervisord.conf b/dockers/docker-snmp-sv2/supervisord.conf index 7fd16eec5bbe..992292330552 100644 --- a/dockers/docker-snmp-sv2/supervisord.conf +++ b/dockers/docker-snmp-sv2/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name snmp events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index e1346fe7db4e..54f4c5b2348d 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry events=PROCESS_STATE_EXITED autostart=true autorestart=false diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 3a420e0fcdcf..b5929fce2f16 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name teamd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 8d1735cd2b0c..cf26d5383074 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -1,17 +1,34 @@ #!/usr/bin/env python +import getopt import os import signal import sys import syslog +import swsssdk + from supervisor import childutils # Contents of file should be the names of critical processes (as defined in # supervisor.conf file), one per line CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' -def main(): +# This table in databse contains the features for container and each +# feature for a row will be configured a state or number. +CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE' + +def main(argv): + container_name = None + opts, args = getopt.getopt(argv, "c:", ["container-name="]) + for opt, arg in opts: + if opt in ("-c", "--container-name"): + container_name = arg + + if not container_name: + syslog.syslog(syslog.LOG_ERR, "Container name not specified. Exiting...") + sys.exit(1) + # Read the list of critical processes from a file with open(CRITICAL_PROCESSES_FILE, 'r') as f: critical_processes = [line.rstrip('\n') for line in f] @@ -35,12 +52,29 @@ def main(): processname = payload_headers['processname'] groupname = payload_headers['groupname'] - # If a critical process exited unexpectedly, terminate supervisor - if expected == 0 and processname in critical_processes or groupname in critical_processes: + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) + if not container_features_table: + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...") + sys.exit(2) + + if not container_features_table.has_key(container_name): + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) + sys.exit(3) + + restart_feature = container_features_table[container_name].get('auto_restart') + if not restart_feature: + syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) + sys.exit(4) + + # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor + if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." msg = MSG_FORMAT_STR.format(payload_headers['processname']) syslog.syslog(syslog.LOG_INFO, msg) os.kill(os.getppid(), signal.SIGTERM) + if __name__ == "__main__": - main() + main(sys.argv[1:]) diff --git a/platform/barefoot/docker-syncd-bfn/supervisord.conf b/platform/barefoot/docker-syncd-bfn/supervisord.conf index 1e015fef931f..1744d6ffefb5 100644 --- a/platform/barefoot/docker-syncd-bfn/supervisord.conf +++ b/platform/barefoot/docker-syncd-bfn/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index cd6712acbf22..3fa8febb85d8 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/platform/cavium/docker-syncd-cavm/supervisord.conf b/platform/cavium/docker-syncd-cavm/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/cavium/docker-syncd-cavm/supervisord.conf +++ b/platform/cavium/docker-syncd-cavm/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/platform/centec/docker-syncd-centec/supervisord.conf b/platform/centec/docker-syncd-centec/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/centec/docker-syncd-centec/supervisord.conf +++ b/platform/centec/docker-syncd-centec/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf index 1af5d70a1d0c..b11e045fac7e 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf index 1af5d70a1d0c..b11e045fac7e 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index aea4d45b9afd..43de2426f981 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index c823ab5680ef..0c6285d46ae0 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name syncd events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected diff --git a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf index a29982a646f4..fde1d6c7714d 100644 --- a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf @@ -4,7 +4,7 @@ logfile_backups=2 nodaemon=true [eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener +command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay events=PROCESS_STATE_EXITED autostart=true autorestart=unexpected