Skip to content

Commit

Permalink
[Service] Enable/disable container auto-restart based on configuratio…
Browse files Browse the repository at this point in the history
…n. (#4073)
  • Loading branch information
yozhao101 committed Feb 7, 2020
1 parent a6efbae commit 91e5fb5
Show file tree
Hide file tree
Showing 20 changed files with 72 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion dockers/docker-lldp-sv2/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name lldp
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion dockers/docker-orchagent/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name swss
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name pmon
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-script]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name radv
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion dockers/docker-sflow/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name sflow
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion dockers/docker-snmp-sv2/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name snmp
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion dockers/docker-sonic-telemetry/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry
events=PROCESS_STATE_EXITED
autostart=true
autorestart=false
Expand Down
2 changes: 1 addition & 1 deletion dockers/docker-teamd/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name teamd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
42 changes: 38 additions & 4 deletions files/scripts/supervisor-proc-exit-listener
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
#!/usr/bin/env python

import getopt
import os
import signal
import sys
import syslog

import swsssdk

from supervisor import childutils

# Contents of file should be the names of critical processes (as defined in
# supervisor.conf file), one per line
CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes'

def main():
# This table in databse contains the features for container and each
# feature for a row will be configured a state or number.
CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE'

def main(argv):
container_name = None
opts, args = getopt.getopt(argv, "c:", ["container-name="])
for opt, arg in opts:
if opt in ("-c", "--container-name"):
container_name = arg

if not container_name:
syslog.syslog(syslog.LOG_ERR, "Container name not specified. Exiting...")
sys.exit(1)

# Read the list of critical processes from a file
with open(CRITICAL_PROCESSES_FILE, 'r') as f:
critical_processes = [line.rstrip('\n') for line in f]
Expand All @@ -35,12 +52,29 @@ def main():
processname = payload_headers['processname']
groupname = payload_headers['groupname']

# If a critical process exited unexpectedly, terminate supervisor
if expected == 0 and processname in critical_processes or groupname in critical_processes:
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
if not container_features_table:
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...")
sys.exit(2)

if not container_features_table.has_key(container_name):
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name))
sys.exit(3)

restart_feature = container_features_table[container_name].get('auto_restart')
if not restart_feature:
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
sys.exit(4)

# If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor
if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes):
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
syslog.syslog(syslog.LOG_INFO, msg)
os.kill(os.getppid(), signal.SIGTERM)


if __name__ == "__main__":
main()
main(sys.argv[1:])
6 changes: 6 additions & 0 deletions platform/barefoot/docker-syncd-bfn/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
Expand Down
2 changes: 1 addition & 1 deletion platform/broadcom/docker-syncd-brcm/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion platform/cavium/docker-syncd-cavm/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion platform/centec/docker-syncd-centec/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
6 changes: 6 additions & 0 deletions platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
Expand Down
6 changes: 6 additions & 0 deletions platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
Expand Down
2 changes: 1 addition & 1 deletion platform/marvell/docker-syncd-mrvl/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion platform/mellanox/docker-syncd-mlnx/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
2 changes: 1 addition & 1 deletion platform/nephos/docker-syncd-nephos/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Expand Down

0 comments on commit 91e5fb5

Please sign in to comment.