From 810d6a7c2968e68a5a0874d65f417d97776fc716 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 22 Apr 2022 14:51:54 -0700 Subject: [PATCH] frontera: config options for tcp and sockets (#127) - parse provider in get_results.py - add placeholder tcp and sockets env vars Signed-off-by: Dalton Bohning --- frontera/daos_server.yml | 5 +++++ frontera/env_daos | 16 +++++++++++++--- frontera/get_results.py | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/frontera/daos_server.yml b/frontera/daos_server.yml index a05997f..0b032f2 100644 --- a/frontera/daos_server.yml +++ b/frontera/daos_server.yml @@ -6,6 +6,8 @@ name: daos_server nr_hugepages: 4096 port: 10001 provider: ofi+verbs;ofi_rxm +#provider: ofi+tcp;ofi_rxm +#provider: ofi+sockets crt_timeout: 180 crt_ctx_share_addr: 0 engines: @@ -25,6 +27,9 @@ engines: - SWIM_PROTOCOL_PERIOD_LEN=2000 - SWIM_SUSPECT_TIMEOUT=19000 - SWIM_PING_TIMEOUT=1900 + # For sockets + #- FI_SOCKETS_MAX_CONN_RETRY=1 + #- FI_SOCKETS_CONN_TIMEOUT=2000 fabric_iface: ib0 fabric_iface_port: 31416 first_core: 0 diff --git a/frontera/env_daos b/frontera/env_daos index fbc4a65..393f8b0 100644 --- a/frontera/env_daos +++ b/frontera/env_daos @@ -3,11 +3,21 @@ export FI_OFI_RXM_USE_SRX=1 export FI_MR_CACHE_MONITOR=disabled export D_LOG_FILE=/tmp/daos_logs/daos_client.log export D_LOG_MASK=ERR -export CRT_PHY_ADDR_STR="ofi+verbs;ofi_rxm" -export OFI_INTERFACE=ib0 -export OFI_DOMAIN=mlx5_0 export DAOS_AGENT_DRPC_DIR=/tmp/daos_agent export DAOS_DISABLE_REQ_FWD=1 +export OFI_INTERFACE=ib0 + +# For verbs +export CRT_PHY_ADDR_STR="ofi+verbs;ofi_rxm" +export OFI_DOMAIN=mlx5_0 + +# For tcp +#export CRT_PHY_ADDR_STR="ofi+tcp;ofi_rxm" +#export OFI_DOMAIN=ib0 + +# For sockets +#export CRT_PHY_ADDR_STR="ofi+sockets" +#export OFI_DOMAIN=ib0 LOCATION=$1 diff --git a/frontera/get_results.py b/frontera/get_results.py index f4ef97e..9d3c42d 100755 --- a/frontera/get_results.py +++ b/frontera/get_results.py @@ -187,6 +187,32 @@ def get_num_targets(output_file_path, slurm_job_id): return None return match.group(1) +def get_provider(output_file_path, slurm_job_id): + """Get the provider from the server config. + + Args: + output_file_path (str): path to the log output. + slurm_job_id (str): the slurm job id. + + Returns: + str: the number of targets + None on failure. + + """ + if not slurm_job_id: + return None + + dir_name = dirname(output_file_path) + + config_path = join(dir_name, "daos_server.yml") + config = read_file(config_path) + if not config: + return None + match = re.search("^provider: (.*)$", config, re.MULTILINE) + if not match: + return None + return match.group(1) + def get_mdtest_metric_max(metric, output): """Get the "max" for an mdtest metric. @@ -500,6 +526,7 @@ def __init__(self, csv_file_path, output_style="full"): "start_time": "Date", "end_time": "End", "daos_commit": "Commit", + "provider": "Provider", "oclass": "Oclass", "num_servers": "Num_Servers", "num_targets": "Num Targets", @@ -518,7 +545,7 @@ def __init__(self, csv_file_path, output_style="full"): "write_gib": "Write (GiB/sec)", "read_gib": "Read (GiB/sec)" } - row_order = ["test_case", "start_time", "daos_commit", "oclass", + row_order = ["test_case", "start_time", "daos_commit", "provider", "oclass", "num_servers", "num_clients", "ppc", "write_gib", "read_gib", "notes", "status"] @@ -553,6 +580,7 @@ def process_result_file(self, file_path): row["daos_commit"] = get_daos_commit(file_path, row["slurm_job_id"]) row["num_targets"] = get_num_targets(file_path, row["slurm_job_id"]) + row["provider"] = get_provider(file_path, row["slurm_job_id"]) row["write_gib"] = format_float(wr_gib) row["read_gib"] = format_float(rd_gib) row["status"] = status.get_status_str() @@ -576,6 +604,7 @@ def __init__(self, csv_file_path, output_style="full"): "start_time": "Date", "end_time": "End", "daos_commit": "Commit", + "provider": "Provider", "oclass": "Oclass", "dir_oclass": "Dir Oclass", "num_servers": "Num Servers", @@ -594,7 +623,7 @@ def __init__(self, csv_file_path, output_style="full"): "read_kops": "read(Kops/sec)", "remove_kops": "remove(Kops/sec)" } - row_order = ["test_case", "start_time", "daos_commit", "oclass", + row_order = ["test_case", "start_time", "daos_commit", "provider", "oclass", "num_servers", "num_clients", "ppc", "create_kops", "stat_kops", "read_kops", "remove_kops", "notes", "status"] @@ -649,6 +678,7 @@ def process_result_file(self, file_path): row["daos_commit"] = get_daos_commit(file_path, row["slurm_job_id"]) row["num_targets"] = get_num_targets(file_path, row["slurm_job_id"]) + row["provider"] = get_provider(file_path, row["slurm_job_id"]) row["status"] = status.get_status_str() row["notes"] = status.get_notes_str()