Skip to content

Commit

Permalink
DAOS-16265 test: Fix erasurecode/rebuild_fio.py out of space (#15020) (
Browse files Browse the repository at this point in the history
…#15340)

Prevent accumulating large server log files caused by temporarily
enabling the DEBUG log mask while creating or destroying pools.

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
  • Loading branch information
phender authored Oct 21, 2024
1 parent c821379 commit b913d3e
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/tests/ftest/erasurecode/multiple_failure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ server_config:
storage: auto
pool:
size: 93%
set_logmasks: False
container:
type: POSIX
control_method: daos
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/erasurecode/rebuild_fio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pool:
aggregation:
threshold: 50000000
aggr_timeout: 180
set_logmasks: False
container:
type: POSIX
control_method: daos
Expand Down
22 changes: 18 additions & 4 deletions src/tests/ftest/util/apricot/apricot/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ def __init__(self, *args, **kwargs):
self.setup_start_agents = True
self.slurm_exclude_servers = False
self.slurm_exclude_nodes = NodeSet()
self.max_test_dir_usage_check = 90
self.host_info = HostInfo()
self.hostlist_servers = NodeSet()
self.hostlist_clients = NodeSet()
Expand Down Expand Up @@ -693,6 +694,11 @@ def setUp(self):
self.slurm_exclude_servers = self.params.get(
"slurm_exclude_servers", "/run/setup/*", self.slurm_exclude_servers)

# Max test directory usage percentage - when exceeded will display sizes of files in the
# test directory
self.max_test_dir_usage_check = self.params.get(
"max_test_dir_usage_check", "/run/setup/*", self.max_test_dir_usage_check)

# The server config name should be obtained from each ServerManager
# object, but some tests still use this TestWithServers attribute.
self.server_group = self.params.get("name", "/run/server_config/*", "daos_server")
Expand Down Expand Up @@ -765,12 +771,20 @@ def setUp(self):

# List common test directory contents before running the test
self.log.info("-" * 100)
self.log.debug("Common test directory (%s) contents:", os.path.dirname(self.test_dir))
self.log.debug(
"Common test directory (%s) contents (check > %s%%):",
os.path.dirname(self.test_dir), self.max_test_dir_usage_check)
all_hosts = include_local_host(self.host_info.all_hosts)
test_dir_parent = os.path.dirname(self.test_dir)
result = run_remote(self.log, all_hosts, f"df -h {test_dir_parent}")
if int(max(re.findall(r" ([\d+])% ", result.joined_stdout) + ["0"])) > 90:
run_remote(self.log, all_hosts, f"du -sh {test_dir_parent}/*")
_result = run_remote(self.log, all_hosts, f"df -h {test_dir_parent}")
_details = NodeSet()
for _host, _stdout in _result.all_stdout.items():
_test_dir_usage = re.findall(r"\s+([\d]+)%\s+", _stdout)
_test_dir_usage_int = int(max(_test_dir_usage + ["0"]))
if _test_dir_usage_int > self.max_test_dir_usage_check:
_details.add(_host)
if _details:
run_remote(self.log, _details, f"du -sh {test_dir_parent}/*")
self.log.info("-" * 100)

if not self.start_servers_once or self.name.uid == 1:
Expand Down

0 comments on commit b913d3e

Please sign in to comment.