Skip to content

Commit

Permalink
DAOS-16217 test: Update run_local(). (#14748)
Browse files Browse the repository at this point in the history
Update the current run_local() command to return an object similar to
run_remote() to allow them to be used interchangeably.

increase verify_perms.py timeout.

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
  • Loading branch information
phender authored Jul 30, 2024
1 parent 18a7878 commit ed96c9d
Show file tree
Hide file tree
Showing 20 changed files with 488 additions and 415 deletions.
2 changes: 1 addition & 1 deletion src/tests/ftest/dfuse/pil4dfs_fio.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _get_bandwidth(self, fio_result, rw):
"""Returns FIO bandwidth of a given I/O pattern
Args:
fio_result (RemoteCommandResult): results of a FIO command.
fio_result (CommandResult): results of a FIO command.
rw (str): Type of I/O pattern.
Returns:
Expand Down
11 changes: 5 additions & 6 deletions src/tests/ftest/harness/core_files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2021-2023 Intel Corporation.
(C) Copyright 2021-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -9,7 +9,7 @@

from apricot import TestWithServers
from ClusterShell.NodeSet import NodeSet
from run_utils import RunException, run_local, run_remote
from run_utils import run_local, run_remote


class HarnessCoreFilesTest(TestWithServers):
Expand Down Expand Up @@ -40,11 +40,10 @@ def test_core_files(self):
"""
# create a core.gdb file
self.log.debug("Create a core.gdb.harness.advanced file in core_pattern dir.")
try:
results = run_local(self.log, "cat /proc/sys/kernel/core_pattern", check=True)
except RunException:
result = run_local(self.log, "cat /proc/sys/kernel/core_pattern")
if not result.passed:
self.fail("Unable to find local core file pattern")
core_path = os.path.split(results.stdout.splitlines()[-1])[0]
core_path = os.path.split(result.joined_stdout.splitlines()[-1])[0]
core_file = "{}/core.gdb.harness.advanced".format(core_path)

self.log.debug("Creating %s", core_file)
Expand Down
261 changes: 219 additions & 42 deletions src/tests/ftest/harness/unit.py

Large diffs are not rendered by default.

62 changes: 26 additions & 36 deletions src/tests/ftest/process_core_files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2022-2023 Intel Corporation.
(C) Copyright 2022-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -141,7 +141,8 @@ def process_core_files(self, directory, delete, test=None):
if os.path.splitext(core_name)[-1] == ".bz2":
# Decompress the file
command = f"lbzip2 -d -v '{os.path.join(core_dir, core_name)}'"
run_local(self.log, command)
if not run_local(self.log, command).passed:
raise CoreFileException(f"Error decompressing {core_name}")
core_name = os.path.splitext(core_name)[0]
exe_name = self._get_exe_name(os.path.join(core_dir, core_name))
self._create_stacktrace(core_dir, core_name, exe_name)
Expand Down Expand Up @@ -187,22 +188,23 @@ def _create_stacktrace(self, core_dir, core_name, exe_name):
stack_trace_file = os.path.join(core_dir, f"'{core_name}.stacktrace'")

self.log.debug("Generating a stacktrace from the %s core file from %s", core_full, host)
run_local(self.log, f"ls -l '{core_full}'")
if not run_local(self.log, f"ls -l '{core_full}'").passed:
raise RunException(f"Error listing {core_full}")

command = (
f"gdb -cd='{core_dir}' -ex 'set pagination off' -ex 'thread apply all bt full' -ex "
f"detach -ex quit '{exe_name}' '{core_name}'")
result = run_local(self.log, command, verbose=False)
if not result.passed:
raise RunException(f"Error creating {stack_trace_file}")

try:
output = run_local(self.log, command, check=False, verbose=False)
with open(stack_trace_file, "w", encoding="utf-8") as stack_trace:
stack_trace.writelines(output.stdout)
stack_trace.write(result.joined_stdout)

except IOError as error:
raise RunException(f"Error writing {stack_trace_file}") from error

except RunException as error:
raise RunException(f"Error creating {stack_trace_file}") from error

def _get_exe_name(self, core_file):
"""Get the executable name from the core file.
Expand All @@ -219,7 +221,7 @@ def _get_exe_name(self, core_file):
self.log.debug("Extracting the executable name from '%s'", core_file)
command = f"gdb -c '{core_file}' -ex 'info proc exe' -ex quit"
result = run_local(self.log, command, verbose=False)
last_line = result.stdout.splitlines()[-1]
last_line = result.joined_stdout.splitlines()[-1]
self.log.debug(" last line: %s", last_line)
cmd = last_line[7:]
self.log.debug(" last_line[7:-1]: %s", cmd)
Expand Down Expand Up @@ -277,7 +279,7 @@ def install_debuginfo_packages(self):
cmds.append(["sudo", "rm", "-f", path])

if self.USE_DEBUGINFO_INSTALL:
dnf_args = ["--exclude", "ompi-debuginfo"]
dnf_args = ["--nobest", "--exclude", "ompi-debuginfo"]
if os.getenv("TEST_RPMS", 'false') == 'true':
if "suse" in self.distro_info.name.lower():
dnf_args.extend(["libpmemobj1", "python3", "openmpi3"])
Expand All @@ -291,9 +293,8 @@ def install_debuginfo_packages(self):
else:
raise RunException(f"Unsupported distro: {self.distro_info}")
cmds.append(["sudo", "dnf", "-y", "install"] + dnf_args)
output = run_local(
self.log, " ".join(["rpm", "-q", "--qf", "'%{evr}'", "daos"]), check=False)
rpm_version = output.stdout
result = run_local(self.log, " ".join(["rpm", "-q", "--qf", "'%{evr}'", "daos"]))
rpm_version = result.joined_stdout
cmds.append(
["sudo", "dnf", "debuginfo-install", "-y"] + dnf_args
+ ["daos-" + rpm_version, "daos-*-" + rpm_version])
Expand Down Expand Up @@ -324,9 +325,7 @@ def install_debuginfo_packages(self):

retry = False
for cmd in cmds:
try:
run_local(self.log, " ".join(cmd), check=True)
except RunException:
if not run_local(self.log, " ".join(cmd)).passed:
# got an error, so abort this list of commands and re-run
# it with a dnf clean, makecache first
retry = True
Expand All @@ -339,9 +338,7 @@ def install_debuginfo_packages(self):
cmds.insert(0, cmd_prefix + ["clean", "all"])
cmds.insert(1, cmd_prefix + ["makecache"])
for cmd in cmds:
try:
run_local(self.log, " ".join(cmd))
except RunException:
if not run_local(self.log, " ".join(cmd)).passed:
break

def is_el(self):
Expand Down Expand Up @@ -380,14 +377,11 @@ def resolve_debuginfo(self, pkg):
"""
package_info = None
try:
# Eventually use python libraries for this rather than exec()ing out to rpm
output = run_local(
self.log,
" ".join(
["rpm", "-q", "--qf", "'%{name} %{version} %{release} %{epoch}'", pkg]),
check=False)
name, version, release, epoch = output.stdout.split()
# Eventually use python libraries for this rather than exec()ing out to rpm
command = f"rpm -q --qf '%{{name}} %{{version}} %{{release}} %{{epoch}}' {pkg}"
result = run_local(self.log, command)
if result.passed:
name, version, release, epoch = result.joined_stdout.split()

debuginfo_map = {"glibc": "glibc-debuginfo-common"}
try:
Expand All @@ -400,7 +394,7 @@ def resolve_debuginfo(self, pkg):
"release": release,
"epoch": epoch
}
except ValueError:
else:
self.log.debug("Package %s not installed, skipping debuginfo", pkg)

return package_info
Expand All @@ -413,20 +407,16 @@ def delete_gdb_core_files(self):
"""
self.log.debug("Checking core files generated by core file processing")
try:
results = run_local(self.log, "cat /proc/sys/kernel/core_pattern", check=True)
except RunException:
result = run_local(self.log, "cat /proc/sys/kernel/core_pattern")
if not result.passed:
self.log.error("Unable to find local core file pattern")
self.log.debug("Stacktrace", exc_info=True)
return 1
core_path = os.path.split(results.stdout.splitlines()[-1])[0]
core_path = os.path.split(result.joined_stdout.splitlines()[-1])[0]

self.log.debug("Deleting core.gdb.*.* core files located in %s", core_path)
other = ["-printf '%M %n %-12u %-12g %12k %t %p\n' -delete"]
try:
run_local(
self.log, find_command(core_path, "core.gdb.*.*", 1, other), check=True)
except RunException:
if not run_local(self.log, find_command(core_path, "core.gdb.*.*", 1, other)).passed:
self.log.debug("core.gdb.*.* files could not be removed")
return 1
return 0
Expand Down
5 changes: 3 additions & 2 deletions src/tests/ftest/server/multiengine_persocket.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2020-2023 Intel Corporation.
(C) Copyright 2020-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -181,7 +181,8 @@ def check_pmem(self, hosts, count):

def storage_format(self):
"""Perform storage format."""
run_local(self.log, "dmg storage format")
if not run_local(self.log, "dmg storage format").passed:
self.fail("dmg storage format failed")

def cleanup(self):
"""Servers clean up after test complete."""
Expand Down
4 changes: 2 additions & 2 deletions src/tests/ftest/slurm_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _create_epilog_script(self, script):
"""
self.log.debug('Creating the slurm epilog script to run after each job.')
try:
with open(script, 'w') as script_file:
with open(script, 'w', encoding='utf-8') as script_file:
script_file.write('#!/bin/bash\n#\n')
script_file.write('/usr/bin/bash -c \'pkill --signal 9 dfuse\'\n')
script_file.write(
Expand Down Expand Up @@ -364,7 +364,7 @@ def _append_config_file(self, echo_command):
echo_command (str): command adding contents to the config file
Returns:
RemoteCommandResult: the result from the echo | tee command
CommandResult: the result from the echo | tee command
"""
tee_command = command_as_user(f'tee -a {self.SLURM_CONF}', self.root)
return run_remote(self.log, self.all_nodes, f'{echo_command} | {tee_command}')
Expand Down
6 changes: 2 additions & 4 deletions src/tests/ftest/util/agent_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2019-2023 Intel Corporation.
(C) Copyright 2019-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -294,9 +294,7 @@ def support_collect_log(self, **kwargs):
CommandFailure: if the daos_agent command fails.
Returns:
RemoteCommandResult: a grouping of the command results from
the same hosts with the same return status
CommandResult: groups of command results from the same hosts with the same return status
"""
cmd = DaosAgentCommand(self.manager.job.command_path)
cmd.sudo = True
Expand Down
32 changes: 13 additions & 19 deletions src/tests/ftest/util/collection_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# pylint: disable=import-error,no-name-in-module
from util.environment_utils import TestEnvironment
from util.host_utils import get_local_host
from util.run_utils import RunException, find_command, run_local, run_remote, stop_processes
from util.run_utils import find_command, run_local, run_remote, stop_processes
from util.user_utils import get_chown_command
from util.yaml_utils import get_test_category

Expand Down Expand Up @@ -562,20 +562,17 @@ def move_files(logger, hosts, source, pattern, destination, depth, timeout, test
# Clush -rcopy the temporary remote directory to this host
command = ["clush", "-w", str(hosts), "-pv", "--rcopy", f"'{tmp_copy_dir}'", "--dest",
f"'{rcopy_dest}'"]
try:
run_local(logger, " ".join(command), check=True, timeout=timeout)
except RunException:
if not run_local(logger, " ".join(command), timeout=timeout).passed:
message = f"Error copying remote files to {destination}"
test_result.fail_test(logger, "Process", message, sys.exc_info())
return_code = 16

finally:
# Remove the temporary remote directory on each host
command = f"{sudo_command}rm -fr '{tmp_copy_dir}'"
if not run_remote(logger, hosts, command).passed:
message = f"Error removing temporary remote copy directory '{tmp_copy_dir}'"
test_result.fail_test(logger, "Process", message)
return_code = 16
# Remove the temporary remote directory on each host
command = f"{sudo_command}rm -fr '{tmp_copy_dir}'"
if not run_remote(logger, hosts, command).passed:
message = f"Error removing temporary remote copy directory '{tmp_copy_dir}'"
test_result.fail_test(logger, "Process", message)
return_code = 16

return return_code

Expand Down Expand Up @@ -648,14 +645,13 @@ def create_steps_log(logger, job_results_dir, test_result):
job_log = os.path.join(test_logs_dir, 'job.log')
step_log = os.path.join(test_logs_dir, 'steps.log')
command = rf"grep -E '(INFO |ERROR)\| (==> Step|START|PASS|FAIL|ERROR)' {job_log}"
try:
result = run_local(logger, command)
with open(step_log, 'w', encoding="utf-8") as file:
file.write(result.stdout)
except Exception: # pylint: disable=broad-except
result = run_local(logger, command)
if not result.passed:
message = f"Error creating {step_log}"
test_result.fail_test(logger, "Process", message, sys.exc_info())
return 8192
with open(step_log, 'w', encoding="utf-8") as file:
file.write(result.joined_stdout)
return 0


Expand Down Expand Up @@ -713,9 +709,7 @@ def rename_avocado_test_dir(logger, test, job_results_dir, test_result, jenkins_
return 1024

# Remove latest symlink directory to avoid inclusion in the Jenkins build artifacts
try:
run_local(logger, f"rm -fr '{test_logs_lnk}'")
except RunException:
if not run_local(logger, f"rm -fr '{test_logs_lnk}'").passed:
message = f"Error removing {test_logs_lnk}"
test_result.fail_test(logger, "Process", message, sys.exc_info())
return 1024
Expand Down
14 changes: 5 additions & 9 deletions src/tests/ftest/util/dfuse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ def _run_as_owner(self, hosts, command, timeout=120):
Defaults to 120 seconds.
Returns:
RemoteCommandResult: result of the command
CommandResult: result of the command
"""
return run_remote(
self.log, hosts, command_as_user(command, self.run_user), timeout=timeout)
Expand Down Expand Up @@ -233,7 +232,7 @@ def run(self, check=True, mount_callback=None):
Args:
check (bool): Check if dfuse mounted properly after mount is executed.
mount_callback (method, optional): method to pass RemoteCommandResult to
mount_callback (method, optional): method to pass CommandResult to
after mount. Default simply raises an exception on failure.
Raises:
Expand Down Expand Up @@ -504,7 +503,7 @@ def __init__(self, hosts, namespace="/run/verify_perms/*"):

# run options
self.hosts = hosts.copy()
self.timeout = 120
self.timeout = 240

# Most usage requires root permission
self.run_user = 'root'
Expand All @@ -517,8 +516,7 @@ def run(self):
CommandFailure: If the command fails
Returns:
RemoteCommandResult: result from run_remote
CommandResult: result from run_remote
"""
self.log.info('Running verify_perms.py on %s', str(self.hosts))
result = run_remote(self.log, self.hosts, self.with_exports, timeout=self.timeout)
Expand Down Expand Up @@ -568,9 +566,7 @@ def _run_process(self, raise_exception=None):
CommandFailure: if there is an error running the command
Returns:
RemoteCommandResult: a grouping of the command results from the same host with the
same return status
CommandResult: groups of command results from the same hosts with the same return status
"""
if raise_exception is None:
raise_exception = self.exit_status_exception
Expand Down
4 changes: 1 addition & 3 deletions src/tests/ftest/util/fio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,7 @@ def _run_process(self, raise_exception=None):
CommandFailure: if there is an error running the command
Returns:
RemoteCommandResult: a grouping of the command results from the same hosts with the
same return status
CommandResult: groups of command results from the same hosts with the same return status
"""
if not self._hosts:
raise CommandFailure('No hosts specified for fio command')
Expand Down
Loading

0 comments on commit ed96c9d

Please sign in to comment.