From cc9c3f567e65d253ca924fb3a766e76905eb2ed6 Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Thu, 21 Jan 2021 12:57:49 -0800 Subject: [PATCH] [supervisord] Monitoring the critical processes with supervisord. (#6242) - Why I did it Initially, we used Monit to monitor critical processes in each container. If one of critical processes was not running or crashed due to some reasons, then Monit will write an alerting message into syslog periodically. If we add a new process in a container, the corresponding Monti configuration file will also need to update. It is a little hard for maintenance. Currently we employed event listener of Supervisod to do this monitoring. Since processes in each container are managed by Supervisord, we can only focus on the logic of monitoring. - How I did it We borrowed the event listener of Supervisord to monitor critical processes in containers. The event listener will take following steps if it was notified one of critical processes exited unexpectedly: The event listener will first check whether the auto-restart mechanism was enabled for this container or not. If auto-restart mechanism was enabled, event listener will kill the Supervisord process, which should cause the container to exit and subsequently get restarted. If auto-restart mechanism was not enabled for this contianer, the event listener will enter a loop which will first sleep 1 minute and then check whether the process is running. If yes, the event listener exits. If no, an alerting message will be written into syslog. - How to verify it First, we need checked whether the auto-restart mechanism of a container was enabled or not by running the command show feature status. If enabled, one critical process should be selected and killed manually, then we need check whether the container will be restarted or not. Second, we can disable the auto-restart mechanism if it was enabled at step 1 by running the commnad sudo config feature autorestart disabled. Then one critical process should be selected and killed. After that, we will see the alerting message which will appear in the syslog every 1 minute. - Which release branch to backport (provide reason below if selected) 201811 201911 [x ] 202006 --- dockers/docker-database/supervisord.conf.j2 | 2 +- .../docker-dhcp-relay.supervisord.conf.j2 | 2 +- .../frr/supervisord/supervisord.conf.j2 | 2 +- dockers/docker-fpm-gobgp/supervisord.conf | 2 +- dockers/docker-fpm-quagga/supervisord.conf | 2 +- dockers/docker-lldp/supervisord.conf.j2 | 2 +- dockers/docker-nat/supervisord.conf | 2 +- dockers/docker-orchagent/supervisord.conf | 2 +- .../docker-pmon.supervisord.conf.j2 | 2 +- ...cker-router-advertiser.supervisord.conf.j2 | 2 +- dockers/docker-sflow/supervisord.conf | 2 +- dockers/docker-snmp/supervisord.conf | 2 +- dockers/docker-sonic-restapi/supervisord.conf | 2 +- .../docker-sonic-telemetry/supervisord.conf | 2 +- dockers/docker-teamd/supervisord.conf | 2 +- files/scripts/supervisor-proc-exit-listener | 146 ++++++++++++------ .../docker-syncd-bfn/supervisord.conf | 2 +- .../docker-syncd-brcm/supervisord.conf | 2 +- .../cavium/docker-syncd-cavm/supervisord.conf | 2 +- .../docker-syncd-centec/supervisord.conf | 2 +- .../docker-syncd-centec/supervisord.conf | 2 +- .../docker-syncd-mrvl/supervisord.conf | 2 +- .../docker-syncd-mrvl/supervisord.conf | 2 +- .../docker-syncd-mrvl/supervisord.conf | 2 +- .../docker-syncd-mlnx/supervisord.conf | 2 +- .../docker-syncd-nephos/supervisord.conf | 2 +- .../vs/docker-gbsyncd-vs/supervisord.conf | 2 +- platform/vs/docker-syncd-vs/supervisord.conf | 2 +- .../py2/docker-dhcp-relay.supervisord.conf | 2 +- .../py3/docker-dhcp-relay.supervisord.conf | 2 +- 30 files changed, 130 insertions(+), 74 deletions(-) diff --git a/dockers/docker-database/supervisord.conf.j2 b/dockers/docker-database/supervisord.conf.j2 index 65a172b3743a..616475fb07ce 100644 --- a/dockers/docker-database/supervisord.conf.j2 +++ b/dockers/docker-database/supervisord.conf.j2 @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name database -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 20f631ad47c1..19a6cc294f7f 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 index a6327936954f..dd43e0cc4ec7 100644 --- a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 +++ b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-fpm-gobgp/supervisord.conf b/dockers/docker-fpm-gobgp/supervisord.conf index b814dc024fa3..e7e3ee9f301a 100644 --- a/dockers/docker-fpm-gobgp/supervisord.conf +++ b/dockers/docker-fpm-gobgp/supervisord.conf @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-fpm-quagga/supervisord.conf b/dockers/docker-fpm-quagga/supervisord.conf index 7397a7428a08..470dea18a16d 100644 --- a/dockers/docker-fpm-quagga/supervisord.conf +++ b/dockers/docker-fpm-quagga/supervisord.conf @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-lldp/supervisord.conf.j2 b/dockers/docker-lldp/supervisord.conf.j2 index 4692f7bf2a18..3a84caee3040 100644 --- a/dockers/docker-lldp/supervisord.conf.j2 +++ b/dockers/docker-lldp/supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name lldp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-nat/supervisord.conf b/dockers/docker-nat/supervisord.conf index 8555f2a48ae6..f03b0b3772b8 100644 --- a/dockers/docker-nat/supervisord.conf +++ b/dockers/docker-nat/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name nat -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 37ddade2edcf..538f251c2621 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=100 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name swss -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index 3de20178b224..c205d3cc25dc 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=100 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name pmon -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 index ae487922859c..5cbfd60322e1 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-script] command=/usr/bin/supervisor-proc-exit-listener --container-name radv -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-sflow/supervisord.conf b/dockers/docker-sflow/supervisord.conf index 8d1bdc5059cb..3ff5ff564544 100644 --- a/dockers/docker-sflow/supervisord.conf +++ b/dockers/docker-sflow/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name sflow -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-snmp/supervisord.conf b/dockers/docker-snmp/supervisord.conf index d1e6d09a8213..414445fdd6d6 100644 --- a/dockers/docker-snmp/supervisord.conf +++ b/dockers/docker-snmp/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name snmp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-sonic-restapi/supervisord.conf b/dockers/docker-sonic-restapi/supervisord.conf index 74bbc9241576..44508ce88138 100644 --- a/dockers/docker-sonic-restapi/supervisord.conf +++ b/dockers/docker-sonic-restapi/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name restapi -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=false diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index df1e6fa5a354..fa8c86f597c7 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=false diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 78549a7684f2..04432a31239a 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name teamd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 143e539a1689..06e402bd4f6a 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -2,11 +2,14 @@ import getopt import os +import select import signal import sys import syslog +import time import swsssdk + from supervisor import childutils # Each line of this file should specify either one critical process or one @@ -20,10 +23,18 @@ CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' # The FEATURE table in config db contains auto-restart field FEATURE_TABLE_NAME = 'FEATURE' -# Read the critical processes/group names from CRITICAL_PROCESSES_FILE +# Value of parameter 'timeout' in select(...) method +SELECT_TIMEOUT_SECS = 1.0 + +# Alerting message will be written into syslog in the following interval +ALERTING_INTERVAL_SECS = 60 def get_critical_group_and_process_list(): + """ + @summary: Read the critical processes/group names from CRITICAL_PROCESSES_FILE. + @return: Two lists which contain critical processes and group names respectively. + """ critical_group_list = [] critical_process_list = [] @@ -49,6 +60,47 @@ def get_critical_group_and_process_list(): return critical_group_list, critical_process_list +def generate_alerting_message(process_name): + """ + @summary: If a critical process was not running, this function will determine it resides in host + or in a specific namespace. Then an alerting message will be written into syslog. + """ + namespace_prefix = os.environ.get("NAMESPACE_PREFIX") + namespace_id = os.environ.get("NAMESPACE_ID") + + if not namespace_prefix or not namespace_id: + namespace = "host" + else: + namespace = namespace_prefix + namespace_id + + syslog.syslog(syslog.LOG_ERR, "Process '{}' is not running in namespace '{}'.".format(process_name, namespace)) + + +def get_autorestart_state(container_name): + """ + @summary: Read the status of auto-restart feature from Config_DB. + @return: Return the status of auto-restart feature. + """ + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + features_table = config_db.get_table(FEATURE_TABLE_NAME) + if not features_table: + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features table from Config DB. Exiting...") + sys.exit(2) + + if container_name not in features_table: + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve feature '{}'. Exiting...".format(container_name)) + sys.exit(3) + + is_auto_restart = features_table[container_name].get('auto_restart') + if not is_auto_restart: + syslog.syslog( + syslog.LOG_ERR, "Unable to determine auto-restart feature status for '{}'. Exiting...".format(container_name)) + sys.exit(4) + + return is_auto_restart + + def main(argv): container_name = None opts, args = getopt.getopt(argv, "c:", ["container-name="]) @@ -62,51 +114,55 @@ def main(argv): critical_group_list, critical_process_list = get_critical_group_and_process_list() + process_under_alerting = {} + # Transition from ACKNOWLEDGED to READY + childutils.listener.ready() + while True: - # Transition from ACKNOWLEDGED to READY - childutils.listener.ready() - - line = sys.stdin.readline() - headers = childutils.get_headers(line) - payload = sys.stdin.read(int(headers['len'])) - - # Transition from READY to ACKNOWLEDGED - childutils.listener.ok() - - # We only care about PROCESS_STATE_EXITED events - if headers['eventname'] == 'PROCESS_STATE_EXITED': - payload_headers, payload_data = childutils.eventdata(payload + '\n') - - expected = int(payload_headers['expected']) - processname = payload_headers['processname'] - groupname = payload_headers['groupname'] - - # Read the status of auto-restart feature from Config_DB. - config_db = swsssdk.ConfigDBConnector() - config_db.connect() - features_table = config_db.get_table(FEATURE_TABLE_NAME) - if not features_table: - syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features table from Config DB. Exiting...") - sys.exit(2) - - if container_name not in features_table: - syslog.syslog(syslog.LOG_ERR, "Unable to retrieve feature '{}'. Exiting...".format(container_name)) - sys.exit(3) - - restart_feature = features_table[container_name].get('auto_restart') - if not restart_feature: - syslog.syslog( - syslog.LOG_ERR, "Unable to determine auto-restart feature status for '{}'. Exiting...".format(container_name)) - sys.exit(4) - - # If auto-restart feature is not disabled and at the same time - # a critical process exited unexpectedly, terminate supervisor - if (restart_feature != 'disabled' and expected == 0 and - (processname in critical_process_list or groupname in critical_group_list)): - MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." - msg = MSG_FORMAT_STR.format(payload_headers['processname']) - syslog.syslog(syslog.LOG_INFO, msg) - os.kill(os.getppid(), signal.SIGTERM) + file_descriptor_list = select.select([sys.stdin], [], [], SELECT_TIMEOUT_SECS)[0] + if len(file_descriptor_list) > 0: + line = file_descriptor_list[0].readline() + headers = childutils.get_headers(line) + payload = sys.stdin.read(int(headers['len'])) + + # Handle the PROCESS_STATE_EXITED event + if headers['eventname'] == 'PROCESS_STATE_EXITED': + payload_headers, payload_data = childutils.eventdata(payload + '\n') + + expected = int(payload_headers['expected']) + process_name = payload_headers['processname'] + group_name = payload_headers['groupname'] + + if (process_name in critical_process_list or group_name in critical_group_list) and expected == 0: + is_auto_restart = get_autorestart_state(container_name) + if is_auto_restart != "disabled": + MSG_FORMAT_STR = "Process '{}' exited unexpectedly. Terminating supervisor '{}'" + msg = MSG_FORMAT_STR.format(payload_headers['processname'], container_name) + syslog.syslog(syslog.LOG_INFO, msg) + os.kill(os.getppid(), signal.SIGTERM) + else: + process_under_alerting[process_name] = time.time() + + # Handle the PROCESS_STATE_RUNNING event + elif headers['eventname'] == 'PROCESS_STATE_RUNNING': + payload_headers, payload_data = childutils.eventdata(payload + '\n') + process_name = payload_headers['processname'] + + if process_name in process_under_alerting: + process_under_alerting.pop(process_name) + + # Transition from BUSY to ACKNOWLEDGED + childutils.listener.ok() + + # Transition from ACKNOWLEDGED to READY + childutils.listener.ready() + + # Check whether we need write alerting messages into syslog + for process in process_under_alerting.keys(): + epoch_time = time.time() + if epoch_time - process_under_alerting[process] >= ALERTING_INTERVAL_SECS: + process_under_alerting[process] = epoch_time + generate_alerting_message(process) if __name__ == "__main__": diff --git a/platform/barefoot/docker-syncd-bfn/supervisord.conf b/platform/barefoot/docker-syncd-bfn/supervisord.conf index 39ea308277d7..c83484e5e93b 100644 --- a/platform/barefoot/docker-syncd-bfn/supervisord.conf +++ b/platform/barefoot/docker-syncd-bfn/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index a8e594c47aeb..5e801106972f 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/cavium/docker-syncd-cavm/supervisord.conf b/platform/cavium/docker-syncd-cavm/supervisord.conf index 0c6285d46ae0..91b94a258033 100644 --- a/platform/cavium/docker-syncd-cavm/supervisord.conf +++ b/platform/cavium/docker-syncd-cavm/supervisord.conf @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/centec-arm64/docker-syncd-centec/supervisord.conf b/platform/centec-arm64/docker-syncd-centec/supervisord.conf index 2cf6814ddaa9..10f406129d9c 100755 --- a/platform/centec-arm64/docker-syncd-centec/supervisord.conf +++ b/platform/centec-arm64/docker-syncd-centec/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/centec/docker-syncd-centec/supervisord.conf b/platform/centec/docker-syncd-centec/supervisord.conf index 831b7256a43b..6df1893a0be0 100644 --- a/platform/centec/docker-syncd-centec/supervisord.conf +++ b/platform/centec/docker-syncd-centec/supervisord.conf @@ -13,7 +13,7 @@ events=PROCESS_STATE [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf index 2cf6814ddaa9..10f406129d9c 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf index c099bbccbf0f..e633b4fe115c 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index 85442933cf8e..94be9dd268f6 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index 9311a255b0c8..8491d762bf51 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index a05bf7bfec73..955021ad2d51 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/vs/docker-gbsyncd-vs/supervisord.conf b/platform/vs/docker-gbsyncd-vs/supervisord.conf index 3583ef6b5a8f..52267c8fa58f 100644 --- a/platform/vs/docker-gbsyncd-vs/supervisord.conf +++ b/platform/vs/docker-gbsyncd-vs/supervisord.conf @@ -13,7 +13,7 @@ events=PROCESS_STATE [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name gbsyncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/vs/docker-syncd-vs/supervisord.conf b/platform/vs/docker-syncd-vs/supervisord.conf index 7416f23a45e5..6a6d946632e0 100644 --- a/platform/vs/docker-syncd-vs/supervisord.conf +++ b/platform/vs/docker-syncd-vs/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf index a213a2517866..dad758947f22 100644 --- a/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf index d5338b18cfa1..e2135d05296b 100644 --- a/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected