use oc exec on fluentd pod if host is master

openshift · Jul 6, 2017 · 9bbc959 · 9bbc959
1 parent 7c0b63f
commit 9bbc959
Show file tree

Hide file tree

Showing 2 changed files with 277 additions and 9 deletions.
diff --git a/roles/openshift_health_checker/openshift_checks/fluentd_config.py b/roles/openshift_health_checker/openshift_checks/fluentd_config.py
@@ -2,7 +2,10 @@
 Module for performing checks on an Fluentd logging deployment configuration
 """
 
-from openshift_checks import OpenShiftCheck, get_var
+import os
+import json
+
+from openshift_checks import OpenShiftCheck, get_var, OpenShiftCheckException
 
 
 class FluentdConfig(OpenShiftCheck):
@@ -39,6 +42,13 @@ def check_logging_config(self, task_vars):
         Otherwise, the value of the Docker logging driver should be "json-file".
         Returns an error string if the above condition is not met, or None otherwise."""
         use_journald = get_var(task_vars, "openshift_logging_fluentd_use_journal", default=True)
+
+        # if check is running on a master, use `ocutil` to read the "USE_JOURNAL"
+        # environment variable inside one of the fluentd pods
+        group_names = get_var(task_vars, "group_names")
+        if "masters" in group_names:
+            use_journald = self.check_fluentd_env_var(task_vars)
+
         docker_info = self.execute_module("docker_info", {}, task_vars)
         if not docker_info.get("info", False):
             return "No information was returned by the Docker client. Unable to verify the logging driver in use."
@@ -81,3 +91,100 @@ def check_logging_config(self, task_vars):
                       'for more information.').format(driver=recommended_logging_driver)
 
         return error
+
+    def check_fluentd_env_var(self, task_vars):
+        """Read and return the value of the 'USE_JOURNAL' environment variable on a fluentd pod."""
+        running_pods = self.running_fluentd_pods(task_vars)
+        pod_name = running_pods[0]["metadata"]["name"]
+        cmd_string = "exec {} /bin/printenv USE_JOURNAL".format(pod_name)
+
+        try:
+            use_journald = self.exec_oc(
+                self.execute_module,
+                self.logging_namespace,
+                cmd_string,
+                [], task_vars
+            )
+        except OpenShiftCheckException as error:
+            if "false" not in str(error):
+                msg = "Invalid value received from command {}: {}".format(cmd_string, str(error))
+                raise OpenShiftCheckException(msg)
+
+            use_journald = False
+
+        return use_journald
+
+    def running_fluentd_pods(self, task_vars):
+        """Return a list of running fluentd pods."""
+        fluentd_pods, error = self.get_pods_for_component(
+            self.execute_module,
+            self.logging_namespace,
+            "fluentd",
+            task_vars,
+        )
+        if error:
+            msg = 'Unable to retrieve any pods for the "fluentd" logging component: {}'.format(str(error))
+            raise OpenShiftCheckException(msg)
+
+        running_fluentd_pods = [pod for pod in fluentd_pods if pod['status']['phase'] == 'Running']
+        if not len(running_fluentd_pods):
+            msg = ('No Fluentd pods were found to be in the "Running" state.'
+                   'At least one Fluentd pod is required in order to perform this check.')
+
+            raise OpenShiftCheckException(msg)
+
+        return running_fluentd_pods
+
+    def get_pods_for_component(self, execute_module, namespace, logging_component, task_vars):
+        """Get all pods for a given component. Returns: list of pods for component, error string."""
+        try:
+            pod_output = self.exec_oc(
+                execute_module,
+                namespace,
+                "get pods -l component={} -o json".format(logging_component),
+                [],
+                task_vars
+            )
+        except OpenShiftCheckException as err:
+            if "false" not in str(err):
+                raise err
+
+        try:
+            pods = json.loads(pod_output)
+            if not pods or not pods.get('items'):
+                raise ValueError()
+        except ValueError:
+            # successful run but non-parsing data generally means there were no pods in the namespace
+            return None, 'There are no pods in the {} namespace. Is logging deployed?'.format(namespace)
+
+        return pods['items'], None
+
+    @staticmethod
+    def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None):
+        """Execute an 'oc' command in the remote host.
+        Returns: output of command and namespace,
+        or raises OpenShiftCheckException on error."""
+        config_base = get_var(task_vars, "openshift", "common", "config_base")
+        args = {
+            "namespace": namespace,
+            "config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
+            "cmd": cmd_str,
+            "extra_args": list(extra_args) if extra_args else [],
+        }
+
+        result = execute_module("ocutil", args, task_vars)
+        if result.get("failed"):
+            msg = (
+                'Unexpected error using `oc` to validate the logging stack components.\n'
+                'Error executing `oc {cmd}`:\n'
+                '{error}'
+            ).format(cmd=args['cmd'], error=result['result'])
+
+            if result['result'] == '[Errno 2] No such file or directory':
+                msg = (
+                    "This host is supposed to be a master but does not have the `oc` command where expected.\n"
+                    "Has an installation been run on this host yet?"
+                )
+            raise OpenShiftCheckException(msg)
+
+        return result.get("result", "")
diff --git a/roles/openshift_health_checker/test/fluentd_config_test.py b/roles/openshift_health_checker/test/fluentd_config_test.py
@@ -1,6 +1,6 @@
 import pytest
 
-from openshift_checks.fluentd_config import FluentdConfig
+from openshift_checks.fluentd_config import FluentdConfig, OpenShiftCheckException
 
 
 def canned_fluentd_config(exec_oc=None):
@@ -11,37 +11,63 @@ def canned_fluentd_config(exec_oc=None):
     return check
 
 
-@pytest.mark.parametrize('use_journald, logging_driver, failed, extra_words', [
+fluentd_pod = {
+    "metadata": {
+        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+        "name": "logging-fluentd-1",
+    },
+    "spec": {"host": "node1", "nodeName": "node1"},
+    "status": {
+        "phase": "Running",
+        "containerStatuses": [{"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+
+not_running_fluentd_pod = {
+    "metadata": {
+        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+        "name": "logging-fluentd-2",
+    },
+    "status": {
+        "phase": "Unknown",
+        "containerStatuses": [{"ready": True}, {"ready": False}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+
+
+@pytest.mark.parametrize('name, use_journald, logging_driver, failed, extra_words', [
     (
+        'test failure with use_journald=false, but docker config set to use "journald"',
         False,
         "journald",
         True,
         ['json log files', 'has been set to use "journald"'],
     ),
-    # # result from ocutil returns false (not using journald), but check succeeds
-    # # since docker is set to use json-file
     (
+        'test success with use_journald=false, and docker config set to use "json-file"',
         False,
         "json-file",
         False,
         [],
     ),
-    # # fluentd not set to check journald, docker not set to use default json-file
     (
+        'test failure with use_journald=false, but docker config set to use an "unsupported" driver',
         False,
         "unsupported",
         True,
         ["json log files", 'has been set to use "unsupported"'],
     ),
-    # fluentd set to aggregate from journald, but docker config set to use json-file
     (
+        'test failure with use_journald=true, but docker config set to use "json-file"',
         True,
         "json-file",
         True,
         ['logs from "journald"', 'has been set to use "json-file"'],
     ),
-])
-def test_check_logging_config(use_journald, logging_driver, failed, extra_words):
+], ids=lambda argvals: argvals[0])
+def test_check_logging_config_non_master(name, use_journald, logging_driver, failed, extra_words):
     def execute_module(module_name, args, task_vars):
         if module_name == "docker_info":
             return {
@@ -53,6 +79,7 @@ def execute_module(module_name, args, task_vars):
         return {}
 
     task_vars = dict(
+        group_names=["nodes", "etcd"],
         openshift_logging_fluentd_use_journal=use_journald,
         openshift=dict(
             common=dict(config_base=""),
@@ -72,3 +99,137 @@ def execute_module(module_name, args, task_vars):
         return
 
     assert error is None
+
+
+@pytest.mark.parametrize('name, pods, response, logging_driver, extra_words', [
+    (
+        'test failure with use_journald=false, but docker config set to use "journald"',
+        [fluentd_pod],
+        {
+            "failed": True,
+            "result": "false",
+        },
+        "journald",
+        ['json log files', 'has been set to use "journald"'],
+    ),
+    (
+        'test failure with use_journald=true, but docker config set to use "json-file"',
+        [fluentd_pod],
+        {
+            "failed": False,
+            "result": "true",
+        },
+        "json-file",
+        ['logs from "journald"', 'has been set to use "json-file"'],
+    ),
+    (
+        'test failure with use_journald=false, but docker set to use an "unsupported" driver',
+        [fluentd_pod],
+        {
+            "failed": True,
+            "result": "false",
+        },
+        "unsupported",
+        ["json log files", 'has been set to use "unsupported"'],
+    ),
+    # use_journald returns false (not using journald), but check succeeds
+    # since docker is set to use json-file
+    (
+        'test success with use_journald=false, and docker config set to use default driver "json-file"',
+        [fluentd_pod],
+        {
+            "failed": True,
+            "result": "false",
+        },
+        "json-file",
+        [],
+    ),
+], ids=lambda argvals: argvals[0])
+def test_check_logging_config_master(name, pods, response, logging_driver, extra_words):
+    def execute_module(module_name, args, task_vars):
+        if module_name == "docker_info":
+            return {
+                "info": {
+                    "LoggingDriver": logging_driver,
+                }
+            }
+
+        if module_name == "ocutil":
+            return response
+
+        return {}
+
+    task_vars = dict(
+        group_names=["masters"],
+        openshift=dict(
+            common=dict(config_base=""),
+        ),
+    )
+
+    def get_pods(execute_module, namespace, logging_component, task_vars):
+        return pods, None
+
+    check = canned_fluentd_config(None)
+    check.execute_module = execute_module
+    check.get_pods_for_component = get_pods
+    error = check.check_logging_config(task_vars)
+
+    if error:
+        for word in extra_words:
+            assert word in error
+
+        return
+
+    assert error is None
+
+
+@pytest.mark.parametrize('name, pods, response, logging_driver, extra_words', [
+    (
+        'test OpenShiftCheckException with use_journald=false, but docker config set to use default "json-file" driver',
+        [fluentd_pod],
+        {
+            "failed": True,
+            "result": "unexpected",
+        },
+        "json-file",
+        ['Unexpected error', 'using `oc` to validate'],
+    ),
+], ids=lambda argvals: argvals[0])
+def test_failed_check_logging_config_master(name, pods, response, logging_driver, extra_words):
+    def execute_module(module_name, args, task_vars):
+        if module_name == "docker_info":
+            return {
+                "info": {
+                    "LoggingDriver": logging_driver,
+                }
+            }
+
+        if module_name == "ocutil":
+            return response
+
+        return {}
+
+    task_vars = dict(
+        group_names=["masters"],
+        openshift=dict(
+            common=dict(config_base=""),
+        ),
+    )
+
+    def get_pods(execute_module, namespace, logging_component, task_vars):
+        return pods, None
+
+    check = canned_fluentd_config(None)
+    check.execute_module = execute_module
+    check.get_pods_for_component = get_pods
+
+    with pytest.raises(OpenShiftCheckException) as error:
+        check.check_logging_config(task_vars)
+
+    if error:
+        for word in extra_words:
+            assert word in str(error)
+
+        return
+
+    assert error is None