diff --git a/dockers/docker-database/Dockerfile.j2 b/dockers/docker-database/Dockerfile.j2 index acb5e013fb84..8cd181614672 100644 --- a/dockers/docker-database/Dockerfile.j2 +++ b/dockers/docker-database/Dockerfile.j2 @@ -36,5 +36,7 @@ COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"] COPY ["docker-database-init.sh", "/usr/local/bin/"] COPY ["ping_pong_db_insts", "/usr/local/bin/"] COPY ["database_config.json", "/etc/default/sonic-db/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/local/bin/docker-database-init.sh"] diff --git a/dockers/docker-database/critical_processes b/dockers/docker-database/critical_processes new file mode 100644 index 000000000000..7800f0fad3ff --- /dev/null +++ b/dockers/docker-database/critical_processes @@ -0,0 +1 @@ +redis diff --git a/dockers/docker-database/supervisord.conf.j2 b/dockers/docker-database/supervisord.conf.j2 index 110619f762be..442bec1438c8 100644 --- a/dockers/docker-database/supervisord.conf.j2 +++ b/dockers/docker-database/supervisord.conf.j2 @@ -3,6 +3,13 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name database +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + + [program:rsyslogd] command=/bin/bash -c "rm -f /var/run/rsyslogd.pid && /usr/sbin/rsyslogd -n" priority=1 diff --git a/files/build_templates/single_instance/database.service.j2 b/files/build_templates/single_instance/database.service.j2 index 472b9d328b7d..fd0063195e31 100644 --- a/files/build_templates/single_instance/database.service.j2 +++ b/files/build_templates/single_instance/database.service.j2 @@ -3,12 +3,16 @@ Description=Database container Requires=docker.service After=docker.service After=rc-local.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User=root ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index cf26d5383074..cf154b3a5c10 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -52,24 +52,28 @@ def main(argv): processname = payload_headers['processname'] groupname = payload_headers['groupname'] - config_db = swsssdk.ConfigDBConnector() - config_db.connect() - container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) - if not container_features_table: - syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...") - sys.exit(2) - - if not container_features_table.has_key(container_name): - syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) - sys.exit(3) - - restart_feature = container_features_table[container_name].get('auto_restart') - if not restart_feature: - syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) - sys.exit(4) - - # If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor - if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): + # Read the status of auto-restart feature from Config_DB. + if container_name != 'database': + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) + if not container_features_table: + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...") + sys.exit(2) + + if not container_features_table.has_key(container_name): + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) + sys.exit(3) + + restart_feature = container_features_table[container_name].get('auto_restart') + if not restart_feature: + syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) + sys.exit(4) + + # If container is database or auto-restart feature is enabled and at the same time + # a critical process exited unexpectedly, terminate supervisor + if ((container_name == 'database' or restart_feature == 'enabled') and expected == 0 and + (processname in critical_processes or groupname in critical_processes)): MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." msg = MSG_FORMAT_STR.format(payload_headers['processname']) syslog.syslog(syslog.LOG_INFO, msg) diff --git a/rules/docker-database.mk b/rules/docker-database.mk index 91fd06819a4b..7e372048afab 100644 --- a/rules/docker-database.mk +++ b/rules/docker-database.mk @@ -28,3 +28,4 @@ $(DOCKER_DATABASE)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_DATABASE)_BASE_IMAGE_FILES += redis-cli:/usr/bin/redis-cli $(DOCKER_DATABASE)_BASE_IMAGE_FILES += monit_database:/etc/monit/conf.d +$(DOCKER_DATABASE)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)