Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[container_checker] Use Feature table to get running containers #7474

Merged
merged 9 commits into from
May 7, 2021
112 changes: 76 additions & 36 deletions files/image_config/monit/container_checker
Original file line number Diff line number Diff line change
Expand Up @@ -16,50 +16,31 @@ check program container_checker with path "/usr/bin/container_checker"
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
"""

import subprocess
import docker
import sys

import swsssdk
from sonic_py_common import multi_asic


def get_command_result(command):
"""
@summary: This function will execute the command and return the resulting output.
@return: A string which contains the output of command.
"""
command_stdout = ""

try:
proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True, universal_newlines=True)
command_stdout, command_stderr = proc_instance.communicate()
if proc_instance.returncode != 0:
print("Failed to execute the command '{}'. Return code: '{}'".format(
command, proc_instance.returncode))
sys.exit(1)
except (OSError, ValueError) as err:
print("Failed to execute the command '{}'. Error: '{}'".format(command, err))
sys.exit(2)

return command_stdout.rstrip().split("\n")
from swsscommon import swsscommon


def get_expected_running_containers():
"""
@summary: This function will get the expected running containers by following the rule:
@summary: This function will get the expected running & always-enabled containers by following the rule:
The 'state' field of container in 'FEATURE' table should not be 'disabled' or 'always_disabled'.
If the device has Multi-ASIC, this function will get container list by determining the
value of field 'has_global_scope', the number of ASICs and the value of field
'has_per_asic_scope'.
If the device has single ASIC, the container name was put into the list.
@return: A set which contains the expected running containers.
@return: A set which contains the expected running containers and a set that has
containers marked as "always_enabled".
"""
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
feature_table = config_db.get_table("FEATURE")

expected_running_containers = set()
always_running_containers = set()

for container_name in feature_table.keys():
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
Expand All @@ -70,37 +51,95 @@ def get_expected_running_containers():
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
expected_running_containers.add(container_name + str(asic_id))
elif feature_table[container_name]["state"] == 'always_enabled':
always_running_containers.add(container_name)
else:
expected_running_containers.add(container_name)

return expected_running_containers
return expected_running_containers, always_running_containers


def get_current_running_containers():
def get_current_running_from_DB(always_running_containers):
"""
@summary: This function will get the current running container list by analyzing the
output of command `docker ps`.
@return: A set which contains the current running contianers.
@summary: This function will get the current running container list
from FEATURE table @ STATE_DB, if this table is available.
@return: a tuple
First: Return value indicating if info can be obtained from
DB or not.
Second: A set which contains the current running containers,
if this info is available in DB.
"""
yozhao101 marked this conversation as resolved.
Show resolved Hide resolved
running_containers = set()

command = "docker ps"
command_stdout = get_command_result(command)
for line in command_stdout[1:]:
running_containers.add(line.split()[-1].strip())
state_db = swsscommon.DBConnector("STATE_DB", 0)
tbl = swsscommon.Table(state_db, "FEATURE")
if not tbl.getKeys():
return False, None

for name in tbl.getKeys():
yozhao101 marked this conversation as resolved.
Show resolved Hide resolved
data = dict(tbl.get(name)[1])
if data.get('container_id'):
running_containers.add(name)

DOCKER_CLIENT = docker.DockerClient(base_url='unix://var/run/docker.sock')
RUNNING = 'running'
for name in always_running_containers:
try:
container = DOCKER_CLIENT.containers.get(name)
container_state = container.attrs.get('State', {})
if container_state.get('Status', "") == RUNNING:
running_containers.add(name)
except (docker.errors.NotFound, docker.errors.APIError) as err:
print("Failed to get container '{}'. Error: '{}'".format(name, err))
pass

return True, running_containers


def get_current_running_from_dockers():
"""
@summary: This function will get all running containers from
the list of docker containers in running state.
@return: A set which contains containers that are
in running state.
renukamanavalan marked this conversation as resolved.
Show resolved Hide resolved
"""
DOCKER_CLIENT = docker.DockerClient(base_url='unix://var/run/docker.sock')
running_containers = set()
ctrs = DOCKER_CLIENT.containers
try:
lst = ctrs.list(filters={"status": "running"})
for ctr in lst:
running_containers.add(ctr.name)
except docker.errors.APIError as err:
print("Failed to retrieve the running container list. Error: '{}'".format(err))
renukamanavalan marked this conversation as resolved.
Show resolved Hide resolved
pass
yozhao101 marked this conversation as resolved.
Show resolved Hide resolved
return running_containers


def get_current_running_containers(always_running_containers):
"""
@summary: This function will get the list of currently running containers.
If available in STATE-DB, get from DB else from list of dockers.

@return: A set of currently running containers.
"""

ret, current_running_containers = get_current_running_from_DB(always_running_containers)
if not ret:
current_running_containers = get_current_running_from_dockers()
return current_running_containers


def main():
"""
@summary: This function will compare the difference between the current running containers
and the containers which were expected to run. If containers which were exepcted
to run were not running, then an alerting message will be written into syslog.
"""
expected_running_containers = get_expected_running_containers()
current_running_containers = get_current_running_containers()
expected_running_containers, always_running_containers = get_expected_running_containers()
current_running_containers = get_current_running_containers(always_running_containers)

expected_running_containers |= always_running_containers
not_running_containers = expected_running_containers.difference(current_running_containers)
if not_running_containers:
print("Expected containers not running: " + ", ".join(not_running_containers))
Expand All @@ -114,3 +153,4 @@ def main():

if __name__ == "__main__":
main()
sys.exit(0)