DAOS-16217 test: Update run_local(). (#14748)

Update the current run_local() command to return an object similar to run_remote() to allow them to be used interchangeably. increase verify_perms.py timeout. Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
daos-stack · Jul 30, 2024 · ed96c9d · ed96c9d
1 parent 18a7878
commit ed96c9d
Show file tree

Hide file tree

Showing 20 changed files with 488 additions and 415 deletions.
diff --git a/src/tests/ftest/dfuse/pil4dfs_fio.py b/src/tests/ftest/dfuse/pil4dfs_fio.py
@@ -74,7 +74,7 @@ def _get_bandwidth(self, fio_result, rw):
         """Returns FIO bandwidth of a given I/O pattern
 
         Args:
-            fio_result (RemoteCommandResult): results of a FIO command.
+            fio_result (CommandResult): results of a FIO command.
             rw (str): Type of I/O pattern.
 
         Returns:

diff --git a/src/tests/ftest/harness/core_files.py b/src/tests/ftest/harness/core_files.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2021-2023 Intel Corporation.
+  (C) Copyright 2021-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -9,7 +9,7 @@
 
 from apricot import TestWithServers
 from ClusterShell.NodeSet import NodeSet
-from run_utils import RunException, run_local, run_remote
+from run_utils import run_local, run_remote
 
 
 class HarnessCoreFilesTest(TestWithServers):
@@ -40,11 +40,10 @@ def test_core_files(self):
         """
         # create a core.gdb file
         self.log.debug("Create a core.gdb.harness.advanced file in core_pattern dir.")
-        try:
-            results = run_local(self.log, "cat /proc/sys/kernel/core_pattern", check=True)
-        except RunException:
+        result = run_local(self.log, "cat /proc/sys/kernel/core_pattern")
+        if not result.passed:
             self.fail("Unable to find local core file pattern")
-        core_path = os.path.split(results.stdout.splitlines()[-1])[0]
+        core_path = os.path.split(result.joined_stdout.splitlines()[-1])[0]
         core_file = "{}/core.gdb.harness.advanced".format(core_path)
 
         self.log.debug("Creating %s", core_file)

diff --git a/src/tests/ftest/harness/unit.py b/src/tests/ftest/harness/unit.py
diff --git a/src/tests/ftest/process_core_files.py b/src/tests/ftest/process_core_files.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2022-2023 Intel Corporation.
+  (C) Copyright 2022-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -141,7 +141,8 @@ def process_core_files(self, directory, delete, test=None):
                     if os.path.splitext(core_name)[-1] == ".bz2":
                         # Decompress the file
                         command = f"lbzip2 -d -v '{os.path.join(core_dir, core_name)}'"
-                        run_local(self.log, command)
+                        if not run_local(self.log, command).passed:
+                            raise CoreFileException(f"Error decompressing {core_name}")
                         core_name = os.path.splitext(core_name)[0]
                     exe_name = self._get_exe_name(os.path.join(core_dir, core_name))
                     self._create_stacktrace(core_dir, core_name, exe_name)
@@ -187,22 +188,23 @@ def _create_stacktrace(self, core_dir, core_name, exe_name):
         stack_trace_file = os.path.join(core_dir, f"'{core_name}.stacktrace'")
 
         self.log.debug("Generating a stacktrace from the %s core file from %s", core_full, host)
-        run_local(self.log, f"ls -l '{core_full}'")
+        if not run_local(self.log, f"ls -l '{core_full}'").passed:
+            raise RunException(f"Error listing {core_full}")
 
         command = (
             f"gdb -cd='{core_dir}' -ex 'set pagination off' -ex 'thread apply all bt full' -ex "
             f"detach -ex quit '{exe_name}' '{core_name}'")
+        result = run_local(self.log, command, verbose=False)
+        if not result.passed:
+            raise RunException(f"Error creating {stack_trace_file}")
+
         try:
-            output = run_local(self.log, command, check=False, verbose=False)
             with open(stack_trace_file, "w", encoding="utf-8") as stack_trace:
-                stack_trace.writelines(output.stdout)
+                stack_trace.write(result.joined_stdout)
 
         except IOError as error:
             raise RunException(f"Error writing {stack_trace_file}") from error
 
-        except RunException as error:
-            raise RunException(f"Error creating {stack_trace_file}") from error
-
     def _get_exe_name(self, core_file):
         """Get the executable name from the core file.
 
@@ -219,7 +221,7 @@ def _get_exe_name(self, core_file):
         self.log.debug("Extracting the executable name from '%s'", core_file)
         command = f"gdb -c '{core_file}' -ex 'info proc exe' -ex quit"
         result = run_local(self.log, command, verbose=False)
-        last_line = result.stdout.splitlines()[-1]
+        last_line = result.joined_stdout.splitlines()[-1]
         self.log.debug("  last line:       %s", last_line)
         cmd = last_line[7:]
         self.log.debug("  last_line[7:-1]: %s", cmd)
@@ -277,7 +279,7 @@ def install_debuginfo_packages(self):
             cmds.append(["sudo", "rm", "-f", path])
 
         if self.USE_DEBUGINFO_INSTALL:
-            dnf_args = ["--exclude", "ompi-debuginfo"]
+            dnf_args = ["--nobest", "--exclude", "ompi-debuginfo"]
             if os.getenv("TEST_RPMS", 'false') == 'true':
                 if "suse" in self.distro_info.name.lower():
                     dnf_args.extend(["libpmemobj1", "python3", "openmpi3"])
@@ -291,9 +293,8 @@ def install_debuginfo_packages(self):
                 else:
                     raise RunException(f"Unsupported distro: {self.distro_info}")
                 cmds.append(["sudo", "dnf", "-y", "install"] + dnf_args)
-            output = run_local(
-                self.log, " ".join(["rpm", "-q", "--qf", "'%{evr}'", "daos"]), check=False)
-            rpm_version = output.stdout
+            result = run_local(self.log, " ".join(["rpm", "-q", "--qf", "'%{evr}'", "daos"]))
+            rpm_version = result.joined_stdout
             cmds.append(
                 ["sudo", "dnf", "debuginfo-install", "-y"] + dnf_args
                 + ["daos-" + rpm_version, "daos-*-" + rpm_version])
@@ -324,9 +325,7 @@ def install_debuginfo_packages(self):
 
         retry = False
         for cmd in cmds:
-            try:
-                run_local(self.log, " ".join(cmd), check=True)
-            except RunException:
+            if not run_local(self.log, " ".join(cmd)).passed:
                 # got an error, so abort this list of commands and re-run
                 # it with a dnf clean, makecache first
                 retry = True
@@ -339,9 +338,7 @@ def install_debuginfo_packages(self):
             cmds.insert(0, cmd_prefix + ["clean", "all"])
             cmds.insert(1, cmd_prefix + ["makecache"])
             for cmd in cmds:
-                try:
-                    run_local(self.log, " ".join(cmd))
-                except RunException:
+                if not run_local(self.log, " ".join(cmd)).passed:
                     break
 
     def is_el(self):
@@ -380,14 +377,11 @@ def resolve_debuginfo(self, pkg):
 
         """
         package_info = None
-        try:
-            # Eventually use python libraries for this rather than exec()ing out to rpm
-            output = run_local(
-                self.log,
-                " ".join(
-                    ["rpm", "-q", "--qf", "'%{name} %{version} %{release} %{epoch}'", pkg]),
-                check=False)
-            name, version, release, epoch = output.stdout.split()
+        # Eventually use python libraries for this rather than exec()ing out to rpm
+        command = f"rpm -q --qf '%{{name}} %{{version}} %{{release}} %{{epoch}}' {pkg}"
+        result = run_local(self.log, command)
+        if result.passed:
+            name, version, release, epoch = result.joined_stdout.split()
 
             debuginfo_map = {"glibc": "glibc-debuginfo-common"}
             try:
@@ -400,7 +394,7 @@ def resolve_debuginfo(self, pkg):
                 "release": release,
                 "epoch": epoch
             }
-        except ValueError:
+        else:
             self.log.debug("Package %s not installed, skipping debuginfo", pkg)
 
         return package_info
@@ -413,20 +407,16 @@ def delete_gdb_core_files(self):
 
         """
         self.log.debug("Checking core files generated by core file processing")
-        try:
-            results = run_local(self.log, "cat /proc/sys/kernel/core_pattern", check=True)
-        except RunException:
+        result = run_local(self.log, "cat /proc/sys/kernel/core_pattern")
+        if not result.passed:
             self.log.error("Unable to find local core file pattern")
             self.log.debug("Stacktrace", exc_info=True)
             return 1
-        core_path = os.path.split(results.stdout.splitlines()[-1])[0]
+        core_path = os.path.split(result.joined_stdout.splitlines()[-1])[0]
 
         self.log.debug("Deleting core.gdb.*.* core files located in %s", core_path)
         other = ["-printf '%M %n %-12u %-12g %12k %t %p\n' -delete"]
-        try:
-            run_local(
-                self.log, find_command(core_path, "core.gdb.*.*", 1, other), check=True)
-        except RunException:
+        if not run_local(self.log, find_command(core_path, "core.gdb.*.*", 1, other)).passed:
             self.log.debug("core.gdb.*.* files could not be removed")
             return 1
         return 0

diff --git a/src/tests/ftest/server/multiengine_persocket.py b/src/tests/ftest/server/multiengine_persocket.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2020-2023 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -181,7 +181,8 @@ def check_pmem(self, hosts, count):
 
     def storage_format(self):
         """Perform storage format."""
-        run_local(self.log, "dmg storage format")
+        if not run_local(self.log, "dmg storage format").passed:
+            self.fail("dmg storage format failed")
 
     def cleanup(self):
         """Servers clean up after test complete."""

diff --git a/src/tests/ftest/slurm_setup.py b/src/tests/ftest/slurm_setup.py
@@ -207,7 +207,7 @@ def _create_epilog_script(self, script):
         """
         self.log.debug('Creating the slurm epilog script to run after each job.')
         try:
-            with open(script, 'w') as script_file:
+            with open(script, 'w', encoding='utf-8') as script_file:
                 script_file.write('#!/bin/bash\n#\n')
                 script_file.write('/usr/bin/bash -c \'pkill --signal 9 dfuse\'\n')
                 script_file.write(
@@ -364,7 +364,7 @@ def _append_config_file(self, echo_command):
             echo_command (str): command adding contents to the config file
 
         Returns:
-            RemoteCommandResult: the result from the echo | tee command
+            CommandResult: the result from the echo | tee command
         """
         tee_command = command_as_user(f'tee -a {self.SLURM_CONF}', self.root)
         return run_remote(self.log, self.all_nodes, f'{echo_command} | {tee_command}')

diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2019-2023 Intel Corporation.
+  (C) Copyright 2019-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -294,9 +294,7 @@ def support_collect_log(self, **kwargs):
             CommandFailure: if the daos_agent command fails.
 
         Returns:
-            RemoteCommandResult: a grouping of the command results from
-                the same hosts with the same return status
-
+            CommandResult: groups of command results from the same hosts with the same return status
         """
         cmd = DaosAgentCommand(self.manager.job.command_path)
         cmd.sudo = True

diff --git a/src/tests/ftest/util/collection_utils.py b/src/tests/ftest/util/collection_utils.py
@@ -16,7 +16,7 @@
 # pylint: disable=import-error,no-name-in-module
 from util.environment_utils import TestEnvironment
 from util.host_utils import get_local_host
-from util.run_utils import RunException, find_command, run_local, run_remote, stop_processes
+from util.run_utils import find_command, run_local, run_remote, stop_processes
 from util.user_utils import get_chown_command
 from util.yaml_utils import get_test_category
 
@@ -562,20 +562,17 @@ def move_files(logger, hosts, source, pattern, destination, depth, timeout, test
     # Clush -rcopy the temporary remote directory to this host
     command = ["clush", "-w", str(hosts), "-pv", "--rcopy", f"'{tmp_copy_dir}'", "--dest",
                f"'{rcopy_dest}'"]
-    try:
-        run_local(logger, " ".join(command), check=True, timeout=timeout)
-    except RunException:
+    if not run_local(logger, " ".join(command), timeout=timeout).passed:
         message = f"Error copying remote files to {destination}"
         test_result.fail_test(logger, "Process", message, sys.exc_info())
         return_code = 16
 
-    finally:
-        # Remove the temporary remote directory on each host
-        command = f"{sudo_command}rm -fr '{tmp_copy_dir}'"
-        if not run_remote(logger, hosts, command).passed:
-            message = f"Error removing temporary remote copy directory '{tmp_copy_dir}'"
-            test_result.fail_test(logger, "Process", message)
-            return_code = 16
+    # Remove the temporary remote directory on each host
+    command = f"{sudo_command}rm -fr '{tmp_copy_dir}'"
+    if not run_remote(logger, hosts, command).passed:
+        message = f"Error removing temporary remote copy directory '{tmp_copy_dir}'"
+        test_result.fail_test(logger, "Process", message)
+        return_code = 16
 
     return return_code
 
@@ -648,14 +645,13 @@ def create_steps_log(logger, job_results_dir, test_result):
     job_log = os.path.join(test_logs_dir, 'job.log')
     step_log = os.path.join(test_logs_dir, 'steps.log')
     command = rf"grep -E '(INFO |ERROR)\| (==> Step|START|PASS|FAIL|ERROR)' {job_log}"
-    try:
-        result = run_local(logger, command)
-        with open(step_log, 'w', encoding="utf-8") as file:
-            file.write(result.stdout)
-    except Exception:   # pylint: disable=broad-except
+    result = run_local(logger, command)
+    if not result.passed:
         message = f"Error creating {step_log}"
         test_result.fail_test(logger, "Process", message, sys.exc_info())
         return 8192
+    with open(step_log, 'w', encoding="utf-8") as file:
+        file.write(result.joined_stdout)
     return 0
 
 
@@ -713,9 +709,7 @@ def rename_avocado_test_dir(logger, test, job_results_dir, test_result, jenkins_
         return 1024
 
     # Remove latest symlink directory to avoid inclusion in the Jenkins build artifacts
-    try:
-        run_local(logger, f"rm -fr '{test_logs_lnk}'")
-    except RunException:
+    if not run_local(logger, f"rm -fr '{test_logs_lnk}'").passed:
         message = f"Error removing {test_logs_lnk}"
         test_result.fail_test(logger, "Process", message, sys.exc_info())
         return 1024

diff --git a/src/tests/ftest/util/dfuse_utils.py b/src/tests/ftest/util/dfuse_utils.py
@@ -92,8 +92,7 @@ def _run_as_owner(self, hosts, command, timeout=120):
                 Defaults to 120 seconds.
 
         Returns:
-            RemoteCommandResult: result of the command
-
+            CommandResult: result of the command
         """
         return run_remote(
             self.log, hosts, command_as_user(command, self.run_user), timeout=timeout)
@@ -233,7 +232,7 @@ def run(self, check=True, mount_callback=None):
 
         Args:
             check (bool): Check if dfuse mounted properly after mount is executed.
-            mount_callback (method, optional): method to pass RemoteCommandResult to
+            mount_callback (method, optional): method to pass CommandResult to
                 after mount. Default simply raises an exception on failure.
 
         Raises:
@@ -504,7 +503,7 @@ def __init__(self, hosts, namespace="/run/verify_perms/*"):
 
         # run options
         self.hosts = hosts.copy()
-        self.timeout = 120
+        self.timeout = 240
 
         # Most usage requires root permission
         self.run_user = 'root'
@@ -517,8 +516,7 @@ def run(self):
             CommandFailure: If the command fails
 
         Returns:
-            RemoteCommandResult: result from run_remote
-
+            CommandResult: result from run_remote
         """
         self.log.info('Running verify_perms.py on %s', str(self.hosts))
         result = run_remote(self.log, self.hosts, self.with_exports, timeout=self.timeout)
@@ -568,9 +566,7 @@ def _run_process(self, raise_exception=None):
             CommandFailure: if there is an error running the command
 
         Returns:
-            RemoteCommandResult: a grouping of the command results from the same host with the
-                same return status
-
+            CommandResult: groups of command results from the same hosts with the same return status
         """
         if raise_exception is None:
             raise_exception = self.exit_status_exception

diff --git a/src/tests/ftest/util/fio_utils.py b/src/tests/ftest/util/fio_utils.py
@@ -173,9 +173,7 @@ def _run_process(self, raise_exception=None):
             CommandFailure: if there is an error running the command
 
         Returns:
-            RemoteCommandResult: a grouping of the command results from the same hosts with the
-                same return status
-
+            CommandResult: groups of command results from the same hosts with the same return status
         """
         if not self._hosts:
             raise CommandFailure('No hosts specified for fio command')