From ba718a15d0520b981844adbfeaf8a6ae0d28ea7e Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Sun, 5 Mar 2023 17:41:21 -0600 Subject: [PATCH 01/22] fix: correct subfile name at open --- src/H5VL_log_filei.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index 07bdc6ef..7221a41d 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -929,7 +929,8 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, MPI_INFO_NULL); CHECK_ERR H5VL_LOGI_PROFILING_TIMER_START; - fp->subname = fp->name + ".subfiles/" + fp->name + "." + std::to_string (fp->group_id); + fp->subname = fp->name + ".subfiles/" + std::string (basename ((char *)(fp->name.c_str ()))) + + "." + std::to_string (fp->group_id); fp->sfp = H5VLfile_open (fp->subname.c_str (), flags, fapl_id, dxpl_id, NULL); CHECK_PTR (fp->sfp) H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLFILE_CREATE); From f7049e4345e770dbeaa466869628013f69a266bb Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Sun, 5 Mar 2023 17:57:32 -0600 Subject: [PATCH 02/22] temporarily diable subfile read --- src/H5VL_logi_nb.cpp | 70 +++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/H5VL_logi_nb.cpp b/src/H5VL_logi_nb.cpp index 7061ca1c..537e9f0d 100644 --- a/src/H5VL_logi_nb.cpp +++ b/src/H5VL_logi_nb.cpp @@ -547,40 +547,42 @@ void H5VL_log_nb_flush_read_reqs (void *file, std::vector &re (fp->config & H5VL_FILEI_CONFIG_SINGLE_SUBFILE_READ)) { H5VL_log_nb_perform_read (fp, reqs, dxplid); } else { - group_id = fp->group_id; // Backup group ID - // Process our own subfile last so wew don't need to reopen it - for (i = 1; i <= fp->ngroup; i++) { - H5VL_LOGI_PROFILING_TIMER_START; - // Close the log group - err = H5VLgroup_close (fp->lgp, fp->uvlid, fp->dxplid, NULL); - CHECK_ERR - // Close previous subfile with MPI - mpierr = MPI_File_close (&(fp->fh)); - CHECK_MPIERR - // Close previous subfile - err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); - CHECK_ERR - // Erase the index table of previous subfile - fp->idx->clear (); - fp->idxvalid = false; - - // Open the current subfile - fp->group_id = (group_id + i) % fp->ngroup; - H5VL_log_filei_open_subfile (fp, fp->flag, fp->ufaplid, fp->dxplid); - - // Open the LOG group - loc.obj_type = H5I_FILE; - loc.type = H5VL_OBJECT_BY_SELF; - fp->lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, - H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); - CHECK_PTR (fp->lgp) - // Open the file with MPI - mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDWR, fp->info, - &(fp->fh)); - CHECK_MPIERR - - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_NB_FLUSH_READ_REQS_SWITCH_SUBFILE); - } + // TODO: Fix subfile read + + // group_id = fp->group_id; // Backup group ID + // // Process our own subfile last so wew don't need to reopen it + // for (i = 1; i <= fp->ngroup; i++) { + // H5VL_LOGI_PROFILING_TIMER_START; + // // Close the log group + // err = H5VLgroup_close (fp->lgp, fp->uvlid, fp->dxplid, NULL); + // CHECK_ERR + // // Close previous subfile with MPI + // mpierr = MPI_File_close (&(fp->fh)); + // CHECK_MPIERR + // // Close previous subfile + // err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); + // CHECK_ERR + // // Erase the index table of previous subfile + // fp->idx->clear (); + // fp->idxvalid = false; + + // // Open the current subfile + // fp->group_id = (group_id + i) % fp->ngroup; + // H5VL_log_filei_open_subfile (fp, fp->flag, fp->ufaplid, fp->dxplid); + + // // Open the LOG group + // loc.obj_type = H5I_FILE; + // loc.type = H5VL_OBJECT_BY_SELF; + // fp->lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, + // H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); + // CHECK_PTR (fp->lgp) + // // Open the file with MPI + // mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDWR, fp->info, + // &(fp->fh)); + // CHECK_MPIERR + + // H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_NB_FLUSH_READ_REQS_SWITCH_SUBFILE); + // } } // Clear the request queue From d905666791f89a8d9fc94e36d7a8cdf69b255c4e Mon Sep 17 00:00:00 2001 From: wkliao Date: Tue, 7 Mar 2023 14:57:33 -0600 Subject: [PATCH 03/22] check return error --- src/H5VL_log_file.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/H5VL_log_file.cpp b/src/H5VL_log_file.cpp index 3882bed3..b4917c79 100644 --- a/src/H5VL_log_file.cpp +++ b/src/H5VL_log_file.cpp @@ -450,6 +450,7 @@ herr_t H5VL_log_file_specific (void *file, H5VL_log_filei_flush(fp, dxpl_id); } else { err = H5VLfile_specific (fp->uo, fp->uvlid, args, dxpl_id, req); + CHECK_ERR } H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_SPECIFIC); } break; @@ -458,6 +459,7 @@ herr_t H5VL_log_file_specific (void *file, ERR_OUT ("Unsupported args->op_type") } else { err = H5VLfile_specific (fp->uo, fp->uvlid, args, dxpl_id, req); + CHECK_ERR } } From 493ea356c67915af3041e5bb376233d120a3acf4 Mon Sep 17 00:00:00 2001 From: wkliao Date: Tue, 7 Mar 2023 14:57:57 -0600 Subject: [PATCH 04/22] check env H5VL_LOG_NSUBFILES --- tests/common/testutils.cpp | 7 ++++++- tests/common/testutils.hpp | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/common/testutils.cpp b/tests/common/testutils.cpp index 3f65b7d5..214f5324 100644 --- a/tests/common/testutils.cpp +++ b/tests/common/testutils.cpp @@ -3,7 +3,7 @@ * See COPYRIGHT notice in top-level directory. */ -#include /* getenv() */ +#include /* getenv(), atoi() */ #include /* strcasestr() */ #include "testutils.hpp" @@ -17,6 +17,7 @@ void check_env(vol_env *env) { env->cache_env = 0; env->async_env = 0; env->passthru = 0; + env->nsubfiles = -1; env_str = getenv("TEST_NATIVE_VOL_ONLY"); if (env_str != NULL && env_str[0] == '1') { @@ -24,6 +25,10 @@ void check_env(vol_env *env) { return; } + env_str = getenv("H5VL_LOG_NSUBFILES"); + if (env_str != NULL) + env->nsubfiles = atoi(env_str); + env_str = getenv("H5VL_LOG_PASSTHRU"); if (env_str != NULL && env_str[0] == '1') env->passthru = 1; diff --git a/tests/common/testutils.hpp b/tests/common/testutils.hpp index e70546cd..de2ea039 100644 --- a/tests/common/testutils.hpp +++ b/tests/common/testutils.hpp @@ -108,6 +108,7 @@ typedef struct { int cache_env; int async_env; int passthru; + int nsubfiles; } vol_env; extern void check_env(vol_env *env); From 63963d0224dc67e946db49f7e62049fc01c32147 Mon Sep 17 00:00:00 2001 From: wkliao Date: Tue, 7 Mar 2023 14:58:36 -0600 Subject: [PATCH 05/22] add test program subfile_dwrite.cpp --- tests/basic/Makefile.am | 3 +- tests/basic/subfile_dwrite.cpp | 117 +++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 tests/basic/subfile_dwrite.cpp diff --git a/tests/basic/Makefile.am b/tests/basic/Makefile.am index 5b8f8320..44bcdd4e 100644 --- a/tests/basic/Makefile.am +++ b/tests/basic/Makefile.am @@ -31,7 +31,8 @@ check_PROGRAMS = attr \ group \ memsel \ multiblockselection \ - multipointselection + multipointselection \ + subfile_dwrite EXTRA_DIST = seq_runs.sh parallel_run.sh diff --git a/tests/basic/subfile_dwrite.cpp b/tests/basic/subfile_dwrite.cpp new file mode 100644 index 00000000..7758a862 --- /dev/null +++ b/tests/basic/subfile_dwrite.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2022, Northwestern University and Argonne National Laboratory + * See COPYRIGHT notice in top-level directory. + */ + +#include +#include /* getenv() */ +#include /* strcpy() */ +#include /* basename() */ +#include +#include + +#include "H5VL_log.h" +#include "testutils.hpp" + +#define N 10 + +int main(int argc, char **argv) { + herr_t err = 0; + int i, rank, np, nerrs=0, mpi_required, nsubfiles, buf[N]; + char file_name[256], *env_str; + hid_t fid=-1, did=-1, sid=-1, msid=-1; + hid_t fapl_id=-1, fcpl_id=H5P_DEFAULT, log_vlid=H5I_INVALID_HID; + hsize_t dims[2] = {0, N}, start[2], count[2]; + vol_env env; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_required); + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + sprintf(file_name, "%s.h5", basename(argv[0])); + if (argc > 2) { + if (!rank) printf("Usage: %s [filename]\n", argv[0]); + MPI_Finalize(); + return 1; + } + else if (argc > 1) + strcpy(file_name, argv[1]); + + /* check VOL related environment variables */ + check_env(&env); + SHOW_TEST_INFO("subfileing write") + + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + CHECK_ERR(fapl_id) + // MPI and collective metadata is required by LOG VOL + err = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); + CHECK_ERR(err) + err = H5Pset_all_coll_metadata_ops(fapl_id, 1); + CHECK_ERR(err) + + if (env.native_only == 0 && env.connector == 0) { + // Register LOG VOL plugin + log_vlid = H5VLregister_connector(&H5VL_log_g, H5P_DEFAULT); + CHECK_ERR(log_vlid) + err = H5Pset_vol(fapl_id, log_vlid, NULL); + CHECK_ERR(err) + } + + env_str = getenv("H5VL_LOG_NSUBFILES"); + if (env_str == NULL) { + /* set the number of subfiles */ + fcpl_id = H5Pcreate(H5P_FILE_CREATE); + CHECK_ERR(fcpl_id) + nsubfiles = np / 2; + if (nsubfiles == 0) nsubfiles = -1; + err = H5Pset_subfiling(fcpl_id, nsubfiles); + CHECK_ERR(err) + } + + // Create file + fid = H5Fcreate(file_name, H5F_ACC_TRUNC, fcpl_id, fapl_id); + CHECK_ERR(fid) + + // Create a dataset of 2D array of size np x N + dims[0] = np; + sid = H5Screate_simple(2, dims, dims); + CHECK_ERR(sid); + did = H5Dcreate2(fid, "D", H5T_STD_I32LE, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + CHECK_ERR(did) + + for (i = 0; i < N; i++) { buf[i] = rank + i; } + + /* create a hyperslab of 1 x N */ + start[0] = rank; + start[1] = 0; + count[0] = 1; + count[1] = N; + err = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR(err) + + msid = H5Screate_simple(1, dims + 1, dims + 1); + CHECK_ERR(msid); + + // Write to dataset in parallel + err = H5Dwrite(did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR(err) + + err = H5Fflush(fid, H5F_SCOPE_LOCAL); + CHECK_ERR(err) + +err_out:; + if (msid >= 0) H5Sclose(msid); + if (sid >= 0) H5Sclose(sid); + if (did >= 0) H5Dclose(did); + if (fapl_id >= 0) H5Pclose(fapl_id); + if (fcpl_id != H5P_DEFAULT) H5Pclose(fcpl_id); + if (log_vlid != H5I_INVALID_HID) H5VLclose(log_vlid); + if (fid >= 0) H5Fclose(fid); + + SHOW_TEST_RESULT + + MPI_Finalize(); + + return (nerrs > 0); +} + From 5d1fed5841578d37335caf14c9fa138302952503 Mon Sep 17 00:00:00 2001 From: wkliao Date: Tue, 7 Mar 2023 15:29:58 -0600 Subject: [PATCH 06/22] test subfiling by setting env H5VL_LOG_NSUBFILES --- tests/dynamic/parallel_run.sh | 6 ++++++ tests/dynamic/seq_runs.sh | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/tests/dynamic/parallel_run.sh b/tests/dynamic/parallel_run.sh index 0ffcd8af..c46733f8 100755 --- a/tests/dynamic/parallel_run.sh +++ b/tests/dynamic/parallel_run.sh @@ -17,3 +17,9 @@ for p in ${check_PROGRAMS} ; do test_func ./$p done +# test subfiling feature by setting env variable H5VL_LOG_NSUBFILES +export H5VL_LOG_NSUBFILES=-1 +for p in ${check_PROGRAMS} ; do + test_func ./$p +done + diff --git a/tests/dynamic/seq_runs.sh b/tests/dynamic/seq_runs.sh index 03e69a4b..de4e1197 100755 --- a/tests/dynamic/seq_runs.sh +++ b/tests/dynamic/seq_runs.sh @@ -13,3 +13,7 @@ log_vol_file_only=0 test_func $1 +# test subfiling feature by setting env variable H5VL_LOG_NSUBFILES +export H5VL_LOG_NSUBFILES=-1 +test_func $1 + From ebd19c21c11c10072590244f23cbe735b60a0771 Mon Sep 17 00:00:00 2001 From: wkliao Date: Tue, 7 Mar 2023 15:54:23 -0600 Subject: [PATCH 07/22] clean test output subfile folders --- tests/basic/Makefile.am | 6 +++++- tests/dynamic/Makefile.am | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/basic/Makefile.am b/tests/basic/Makefile.am index 44bcdd4e..b829eb54 100644 --- a/tests/basic/Makefile.am +++ b/tests/basic/Makefile.am @@ -73,5 +73,9 @@ ptest: $(check_PROGRAMS) # build check targets but not invoke tests-local: all $(check_PROGRAMS) -.PHONY: ptest test +clean-local: clean-local-check +clean-local-check: + -rm -rf $(TESTOUTDIR)/*.h5.subfiles + +.PHONY: ptest test clean-local-check diff --git a/tests/dynamic/Makefile.am b/tests/dynamic/Makefile.am index ada54f2a..32c4aa47 100644 --- a/tests/dynamic/Makefile.am +++ b/tests/dynamic/Makefile.am @@ -68,5 +68,9 @@ ptest: $(check_PROGRAMS) # build check targets but not invoke tests-local: all $(check_PROGRAMS) -.PHONY: ptest tests +clean-local: clean-local-check +clean-local-check: + -rm -rf $(TESTOUTDIR)/*.h5.subfiles + +.PHONY: ptest tests clean-local-check From 0e11f4ff6a1b10d75c7909dedc8ec01aa107b975 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Sun, 23 Apr 2023 23:34:13 -0500 Subject: [PATCH 08/22] change single_subfile_read to fapl --- src/H5VL_log.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/H5VL_log.cpp b/src/H5VL_log.cpp index d0ebb6dd..a94b07b8 100644 --- a/src/H5VL_log.cpp +++ b/src/H5VL_log.cpp @@ -657,9 +657,9 @@ herr_t H5Pset_single_subfile_read (hid_t plist, hbool_t single_subfile_read) { htri_t pexist; try { - isfapl = H5Pisa_class (plist, H5P_FILE_CREATE); + isfapl = H5Pisa_class (plist, H5P_FILE_ACCESS); CHECK_ID (isfapl) - if (isfapl == 0) ERR_OUT ("Not fcplid") + if (isfapl == 0) ERR_OUT ("Not faplid") pexist = H5Pexist (plist, SINGLE_SUBFILE_READ_PROPERTY_NAME); CHECK_ID (pexist) @@ -684,7 +684,7 @@ herr_t H5Pget_single_subfile_read (hid_t plist, hbool_t *single_subfile_read) { htri_t isfapl, pexist; try { - isfapl = H5Pisa_class (plist, H5P_FILE_CREATE); + isfapl = H5Pisa_class (plist, H5P_FILE_ACCESS); CHECK_ID (isfapl) if (isfapl == 0) *single_subfile_read = false; // Default property will not pass class check From d21c041c4b0314bf8d43c9dd2951ea2ba38fb88e Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Sun, 23 Apr 2023 23:36:06 -0500 Subject: [PATCH 09/22] subfile create/open: use fp->info --- src/H5VL_log_filei.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index 7221a41d..c1c4b4bb 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -883,7 +883,7 @@ void H5VL_log_filei_create_subfile (H5VL_log_file_t *fp, if (stat != 0) { RET_ERR ("Cannot create subfile dir") } // Create the subfiles with underlying VOL - err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, MPI_INFO_NULL); + err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, fp->info); CHECK_ERR H5VL_LOGI_PROFILING_TIMER_START; fp->subname = fp->name + ".subfiles/" + std::string (basename ((char *)(fp->name.c_str ()))) + @@ -926,7 +926,7 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, if (stat != 0) { RET_ERR ("Cannot open subfile dir") } // Create the subfiles with underlying VOL - err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, MPI_INFO_NULL); + err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, fp->info); CHECK_ERR H5VL_LOGI_PROFILING_TIMER_START; fp->subname = fp->name + ".subfiles/" + std::string (basename ((char *)(fp->name.c_str ()))) + From eeca154e5268f6510c56bc5d2723906f1148a8df Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 24 Apr 2023 00:30:47 -0500 Subject: [PATCH 10/22] fix: correct nldset and nmdset for subfiles when opening a subfile, fp->nldset and fp->nmdset are not read from subfiles. Instead, they are read from the master file. This commit fix this issue and read from the subfile correctly. --- src/H5VL_log_filei.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index c1c4b4bb..91893dfe 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -935,9 +935,8 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, CHECK_PTR (fp->sfp) H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLFILE_CREATE); - // Att - H5VL_logi_get_att (fp, H5VL_LOG_FILEI_ATTR, H5T_NATIVE_INT32, attbuf, fp->dxplid); - + // Update nldset and nmdset + H5VL_logi_get_att (fp->sfp, fp->uvlid, fp->type, H5VL_LOG_FILEI_ATTR, H5T_NATIVE_INT32, attbuf, fp->dxplid); fp->nldset = attbuf[1]; fp->nmdset = attbuf[2]; } From 9436082a13c2bf9741b7dbdb23a7b0c0014364de Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 24 Apr 2023 02:43:01 -0500 Subject: [PATCH 11/22] subfile read: support read after open --- src/H5VL_log_file.hpp | 10 ++++ src/H5VL_log_filei.cpp | 127 +++++++++++++++++++++++++++++------------ src/H5VL_logi_nb.cpp | 75 +++++++++++------------- src/H5VL_logi_nb.hpp | 1 + 4 files changed, 135 insertions(+), 78 deletions(-) diff --git a/src/H5VL_log_file.hpp b/src/H5VL_log_file.hpp index e2c650b6..bde13d53 100644 --- a/src/H5VL_log_file.hpp +++ b/src/H5VL_log_file.hpp @@ -33,6 +33,14 @@ typedef struct H5VL_log_cord_t { MPI_Offset cord[H5S_MAX_RANK]; } H5VL_log_cord_t; +typedef struct H5VL_log_subfile_record_t { + MPI_File fh; // MPI file handle to the subfile + void* uo; // Under VOL object of the subfile + int nldset; // the number of log data datasets, of the subfile + int nmdset; // the number of log metadata datasets , of the subfile + void* lgp; // log group of the subfile +} H5VL_log_subfile_record_t; + using stcrtstat = struct stat; /* The log VOL file object */ @@ -118,6 +126,8 @@ typedef struct H5VL_log_file_t : H5VL_log_obj_t { bool is_log_based_file; // indicate if a file is a regular file (false) or a log-based file // (false) + H5VL_log_subfile_record_t *subfile_records; // records of all subfiles, only used for reading + #ifdef ENABLE_PROFILING #ifndef REPLAY_BUILD //#pragma message ( "C Preprocessor got here!" ) diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index 91893dfe..d156aeaa 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -131,7 +131,7 @@ void H5VL_log_filei_post_open (H5VL_log_file_t *fp) { fp->nldset = attbuf[1]; fp->nmdset = attbuf[2]; fp->config = attbuf[3]; - fp->ngroup = attbuf[3]; + fp->ngroup = attbuf[4]; fp->mreqs.resize (fp->ndset, NULL); // Merge write reqeusts fp->dsets_info.resize (fp->ndset, NULL); // Dataset info fp->group_rank = fp->rank; @@ -170,20 +170,28 @@ void H5VL_log_filei_post_open (H5VL_log_file_t *fp) { // Open the LOG group loc.obj_type = H5I_FILE; loc.type = H5VL_OBJECT_BY_SELF; - H5VL_LOGI_PROFILING_TIMER_START - fp->lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, - H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); + if (fp->subfile_records) { + fp->lgp = fp->subfile_records[fp->group_id].lgp; + } else { + H5VL_LOGI_PROFILING_TIMER_START + fp->lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, + H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); - CHECK_PTR (fp->lgp) - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLGROUP_OPEN); + CHECK_PTR (fp->lgp) + H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLGROUP_OPEN); + } // Open the file with MPI - H5VL_LOGI_PROFILING_TIMER_START; - mpierr = - MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDWR, fp->info, &(fp->fh)); + if (fp->subfile_records) { + fp->fh = fp->subfile_records[fp->group_id].fh; + } else { + H5VL_LOGI_PROFILING_TIMER_START; + mpierr = + MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDWR, fp->info, &(fp->fh)); - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_FH); - CHECK_MPIERR + H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_FH); + CHECK_MPIERR + } // Visit all dataasets for info args.op_type = H5VL_OBJECT_VISIT; @@ -710,7 +718,7 @@ static inline void print_info (MPI_Info *info_used) { void H5VL_log_filei_close (H5VL_log_file_t *fp) { herr_t err = 0; - int mpierr; + int mpierr, i; int attbuf[5]; void *lib_state = NULL; H5VL_logi_err_finally finally ( @@ -769,10 +777,23 @@ void H5VL_log_filei_close (H5VL_log_file_t *fp) { } // Close the log group - H5VL_LOGI_PROFILING_TIMER_START - err = H5VLgroup_close (fp->lgp, fp->uvlid, fp->dxplid, NULL); - CHECK_ERR - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLGROUP_CLOSE); + if (fp->subfile_records) { + for (i = 0; i < fp->ngroup; i++) { + if (fp->subfile_records[i].lgp) { + H5VL_LOGI_PROFILING_TIMER_START + err = H5VLgroup_close (fp->subfile_records[i].lgp, fp->uvlid, fp->dxplid, NULL); + CHECK_ERR + H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLGROUP_CLOSE); + } + } + } else { + if (fp->lgp) { + H5VL_LOGI_PROFILING_TIMER_START + err = H5VLgroup_close (fp->lgp, fp->uvlid, fp->dxplid, NULL); + CHECK_ERR + H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLGROUP_CLOSE); + } + } H5VL_logi_restore_lib_stat (lib_state); @@ -791,8 +812,19 @@ void H5VL_log_filei_close (H5VL_log_file_t *fp) { #endif // Close the file with MPI - mpierr = MPI_File_close (&(fp->fh)); - CHECK_MPIERR + if (fp->subfile_records) { + for (i = 0; i < fp->ngroup; i++) { + if (fp->subfile_records[i].fh != MPI_FILE_NULL) { + mpierr = MPI_File_close (&(fp->subfile_records[i].fh)); + CHECK_MPIERR + } + } + } else { + if (fp->fh != MPI_FILE_NULL) { + mpierr = MPI_File_close (&(fp->fh)); + CHECK_MPIERR + } + } // Close the file with posix if (fp->config & H5VL_FILEI_CONFIG_DATA_ALIGN) { close (fp->fd); } @@ -815,13 +847,19 @@ void H5VL_log_filei_close (H5VL_log_file_t *fp) { H5VL_LOGI_PROFILING_TIMER_START; err = H5VLfile_close (fp->uo, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); CHECK_ERR - if (fp->sfp && (fp->sfp != fp->uo)) { - err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); - CHECK_ERR + if (fp->sfp && (fp->sfp != fp->uo)) { // if subfiling is enabled + if (fp->subfile_records) { // this means subfiles are opened not created, e.g. for read. + for (i = 0; i < fp->ngroup; i++) { + fp->sfp = fp->subfile_records[i].uo; + err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); + CHECK_ERR + } + free (fp->subfile_records); + } else { + err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); + CHECK_ERR + } } - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLFILE_CLOSE); - - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CLOSE); #ifdef ENABLE_PROFILING { @@ -909,7 +947,8 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, hid_t dxpl_id) { herr_t err = 0; int attbuf[5]; - int stat; + int stat, subfile_id, mpierr; + H5VL_loc_params_t loc; // Open subfile dir if (fp->rank == 0) { @@ -925,20 +964,38 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, MPI_Bcast (&stat, 1, MPI_INT, 0, fp->comm); if (stat != 0) { RET_ERR ("Cannot open subfile dir") } - // Create the subfiles with underlying VOL + loc.obj_type = H5I_FILE; + loc.type = H5VL_OBJECT_BY_SELF; + + fp->subfile_records = (H5VL_log_subfile_record_t *) malloc (fp->ngroup * sizeof (H5VL_log_subfile_record_t)); + err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, fp->info); CHECK_ERR - H5VL_LOGI_PROFILING_TIMER_START; + for (subfile_id = 0; subfile_id < fp->ngroup; subfile_id ++) { + fp->subname = fp->name + ".subfiles/" + std::string (basename ((char *)(fp->name.c_str ()))) + + "." + std::to_string (subfile_id); + fp->sfp = H5VLfile_open (fp->subname.c_str (), flags, fapl_id, dxpl_id, NULL); + CHECK_PTR ((fp->sfp)) + + // Update nldset and nmdset + H5VL_logi_get_att (fp->sfp, fp->uvlid, fp->type, H5VL_LOG_FILEI_ATTR, H5T_NATIVE_INT32, attbuf, fp->dxplid); + fp->subfile_records[subfile_id].nldset = attbuf[1]; + fp->subfile_records[subfile_id].nmdset = attbuf[2]; + + fp->subfile_records[subfile_id].uo = fp->sfp; + + mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDONLY, fp->info, &(fp->subfile_records[subfile_id].fh)); + CHECK_MPIERR + + fp->subfile_records[subfile_id].lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, + H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); + } + fp->subname = fp->name + ".subfiles/" + std::string (basename ((char *)(fp->name.c_str ()))) + "." + std::to_string (fp->group_id); - fp->sfp = H5VLfile_open (fp->subname.c_str (), flags, fapl_id, dxpl_id, NULL); - CHECK_PTR (fp->sfp) - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLFILE_CREATE); - - // Update nldset and nmdset - H5VL_logi_get_att (fp->sfp, fp->uvlid, fp->type, H5VL_LOG_FILEI_ATTR, H5T_NATIVE_INT32, attbuf, fp->dxplid); - fp->nldset = attbuf[1]; - fp->nmdset = attbuf[2]; + fp->sfp = fp->subfile_records[fp->group_id].uo; + fp->nldset = fp->subfile_records[fp->group_id].nldset; + fp->nmdset = fp->subfile_records[fp->group_id].nmdset; } void H5VL_log_filei_calc_node_rank (H5VL_log_file_t *fp) { diff --git a/src/H5VL_logi_nb.cpp b/src/H5VL_logi_nb.cpp index 537e9f0d..bde6acf6 100644 --- a/src/H5VL_logi_nb.cpp +++ b/src/H5VL_logi_nb.cpp @@ -482,19 +482,23 @@ void H5VL_log_nb_perform_read (H5VL_log_file_t *fp, } } + H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_NB_PERFORM_READ); +} + +void H5VL_log_nb_perform_read_post_process(H5VL_log_file_t *fp, std::vector &reqs, hid_t dxplid) { + int i; + herr_t err; + hsize_t esize; + // Post processing for (auto &r : reqs) { // Type convertion if (r->dtype != r->mtype) { - // void *bg = NULL; - esize = H5Tget_size (r->mtype); CHECK_ID (esize) - // if (H5Tget_class (r->mtype) == H5T_COMPOUND) bg = malloc (r->rsize * esize); err = H5Tconvert (r->dtype, r->mtype, r->rsize, r->xbuf, NULL, dxplid); CHECK_ERR - // free (bg); H5Tclose (r->dtype); H5Tclose (r->mtype); @@ -515,10 +519,7 @@ void H5VL_log_nb_perform_read (H5VL_log_file_t *fp, H5VL_log_filei_bfree (fp, r->xbuf); } } - - H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_NB_PERFORM_READ); } - void H5VL_log_nb_flush_read_reqs (void *file, std::vector &reqs, hid_t dxplid) { herr_t err = 0; int mpierr; @@ -546,45 +547,33 @@ void H5VL_log_nb_flush_read_reqs (void *file, std::vector &re if ((!(fp->config & H5VL_FILEI_CONFIG_SUBFILING)) || (fp->config & H5VL_FILEI_CONFIG_SINGLE_SUBFILE_READ)) { H5VL_log_nb_perform_read (fp, reqs, dxplid); + } else if (fp->subfile_records) { + group_id = fp->group_id; // Backup group ID + + for (i = 1; i <= fp->ngroup; i++) { + fp->group_id = (group_id + i) % fp->ngroup; + fp->idx->clear (); + fp->idxvalid = false; + fp->fh = fp->subfile_records[fp->group_id].fh; + fp->sfp = fp->subfile_records[fp->group_id].uo; + fp->nldset = fp->subfile_records[fp->group_id].nldset; + fp->nmdset = fp->subfile_records[fp->group_id].nmdset; + fp->lgp = fp->subfile_records[fp->group_id].lgp; + + H5VL_log_nb_perform_read (fp, reqs, dxplid); + } + + // no need to restore group_id, fp->lgp, etc + // as the iteratation above ends with group_id = fp->group_id } else { - // TODO: Fix subfile read - - // group_id = fp->group_id; // Backup group ID - // // Process our own subfile last so wew don't need to reopen it - // for (i = 1; i <= fp->ngroup; i++) { - // H5VL_LOGI_PROFILING_TIMER_START; - // // Close the log group - // err = H5VLgroup_close (fp->lgp, fp->uvlid, fp->dxplid, NULL); - // CHECK_ERR - // // Close previous subfile with MPI - // mpierr = MPI_File_close (&(fp->fh)); - // CHECK_MPIERR - // // Close previous subfile - // err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); - // CHECK_ERR - // // Erase the index table of previous subfile - // fp->idx->clear (); - // fp->idxvalid = false; - - // // Open the current subfile - // fp->group_id = (group_id + i) % fp->ngroup; - // H5VL_log_filei_open_subfile (fp, fp->flag, fp->ufaplid, fp->dxplid); - - // // Open the LOG group - // loc.obj_type = H5I_FILE; - // loc.type = H5VL_OBJECT_BY_SELF; - // fp->lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, - // H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); - // CHECK_PTR (fp->lgp) - // // Open the file with MPI - // mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDWR, fp->info, - // &(fp->fh)); - // CHECK_MPIERR - - // H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_NB_FLUSH_READ_REQS_SWITCH_SUBFILE); - // } + // This means we are trying to read something in create mode. + // Currently this is not supported/implemented. + // Only support read an existing file (using file open mode) + // Do nothing. } + H5VL_log_nb_perform_read_post_process (fp, reqs, dxplid); + // Clear the request queue for (auto rp : reqs) { delete rp; } reqs.clear (); diff --git a/src/H5VL_logi_nb.hpp b/src/H5VL_logi_nb.hpp index cadd57a8..f1516bd1 100644 --- a/src/H5VL_logi_nb.hpp +++ b/src/H5VL_logi_nb.hpp @@ -115,6 +115,7 @@ void H5VL_log_nb_flush_read_reqs (void *file, std::vector &re void H5VL_log_nb_perform_read (H5VL_log_file_t *fp, std::vector &reqs, hid_t dxplid); +void H5VL_log_nb_perform_read_post_process(H5VL_log_file_t *fp, std::vector &reqs, hid_t dxplid); void H5VL_log_nb_flush_write_reqs (void *file); void H5VL_log_nb_ost_write (void *file, off_t doff, off_t off, int cnt, int *mlens, off_t *moffs); void H5VL_log_nb_flush_write_reqs_align (void *file, hid_t dxplid); From 3601bbcf1902594cb407d615f0df97ba6b184db0 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 24 Apr 2023 03:16:15 -0500 Subject: [PATCH 12/22] add subfile read testcases --- tests/basic/Makefile.am | 1 + tests/basic/subfile_dread.cpp | 216 ++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 tests/basic/subfile_dread.cpp diff --git a/tests/basic/Makefile.am b/tests/basic/Makefile.am index b829eb54..81f58c86 100644 --- a/tests/basic/Makefile.am +++ b/tests/basic/Makefile.am @@ -32,6 +32,7 @@ check_PROGRAMS = attr \ memsel \ multiblockselection \ multipointselection \ + subfile_dread \ subfile_dwrite EXTRA_DIST = seq_runs.sh parallel_run.sh diff --git a/tests/basic/subfile_dread.cpp b/tests/basic/subfile_dread.cpp new file mode 100644 index 00000000..3ddbae48 --- /dev/null +++ b/tests/basic/subfile_dread.cpp @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2022, Northwestern University and Argonne National Laboratory + * See COPYRIGHT notice in top-level directory. + */ + +#include +#include /* getenv() */ +#include /* strcpy() */ +#include /* basename() */ +#include +#include + +#include "H5VL_log.h" +#include "testutils.hpp" + +#define N 10 + +int create_subfile(const char* file_name, int rank, int np, vol_env* env_ptr); +int read_subfile(const char* file_name, int rank, int np, vol_env* env_ptr); +int expected_buf_val(int rank, int np, int i, int is_write); + +int main(int argc, char **argv) { + herr_t err = 0; + int i, rank, np, nerrs=0, mpi_required; + char file_name[256], *env_str; + vol_env env; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_required); + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + sprintf(file_name, "%s.h5", basename(argv[0])); + if (argc > 2) { + if (!rank) printf("Usage: %s [filename]\n", argv[0]); + MPI_Finalize(); + return 1; + } + else if (argc > 1) + strcpy(file_name, argv[1]); + + /* check VOL related environment variables */ + check_env(&env); + SHOW_TEST_INFO("subfileing read") + + // pre-process: create a file with a dataset + err = create_subfile(file_name, rank, np, &env); + CHECK_ERR(err) + + // start testing subfile read + err = read_subfile(file_name, rank, np, &env); + CHECK_ERR(err) + + SHOW_TEST_RESULT + +err_out:; + MPI_Finalize(); + + return (nerrs > 0); +} + +int expected_buf_val(int rank, int np, int i, int is_write) { + if (is_write) return rank * 100 + i; + + // below: is_read + return (np - rank - 1) * 100 + i; +} + +int read_subfile(const char* file_name, int rank, int np, vol_env* env_ptr) { + herr_t err = 0; + int i,nerrs=0, nsubfiles, buf[N]; + hid_t fid=-1, did=-1, sid=-1, msid=-1; + hid_t fapl_id=-1, fcpl_id=H5P_DEFAULT, log_vlid=H5I_INVALID_HID; + hsize_t dims[2] = {0, N}, start[2], count[2]; + + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + CHECK_ERR(fapl_id) + // MPI and collective metadata is required by LOG VOL + err = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); + CHECK_ERR(err) + err = H5Pset_all_coll_metadata_ops(fapl_id, 1); + CHECK_ERR(err) + + if (env_ptr->native_only == 0 && env_ptr->connector == 0) { + // Register LOG VOL plugin + log_vlid = H5VLregister_connector(&H5VL_log_g, H5P_DEFAULT); + CHECK_ERR(log_vlid) + err = H5Pset_vol(fapl_id, log_vlid, NULL); + CHECK_ERR(err) + } + + // Open file + fid = H5Fopen(file_name, H5F_ACC_RDONLY, fapl_id); + CHECK_ERR(fid) + + // Open a dataset of 2D array of size np x N + dims[0] = np; + sid = H5Screate_simple(2, dims, dims); + CHECK_ERR(sid); + did = H5Dopen2(fid, "D", H5P_DEFAULT); + CHECK_ERR(did) + + // reset buffer + for (i = 0; i < N; i++) { buf[i] = 0; } + + /* create a hyperslab of 1 x N */ + start[0] = np - rank - 1; + start[1] = 0; + count[0] = 1; + count[1] = N; + err = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR(err) + + msid = H5Screate_simple(1, dims + 1, dims + 1); + CHECK_ERR(msid); + + // read from dataset + err = H5Dread(did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR(err) + + for (i = 0; i < N; i++) { + if (buf[i] != expected_buf_val(rank, np, i, 0)) { + nerrs++; + } + } + MPI_Barrier(MPI_COMM_WORLD); + if (nerrs > 0) { + printf("Rank %d: Error: %d errors found\n", rank, nerrs); + } + MPI_Barrier(MPI_COMM_WORLD); + +err_out:; + if (msid >= 0) H5Sclose(msid); + if (sid >= 0) H5Sclose(sid); + if (did >= 0) H5Dclose(did); + if (fapl_id >= 0) H5Pclose(fapl_id); + if (fcpl_id != H5P_DEFAULT) H5Pclose(fcpl_id); + if (log_vlid != H5I_INVALID_HID) H5VLclose(log_vlid); + if (fid >= 0) H5Fclose(fid); + + return nerrs > 0; +} + +int create_subfile(const char* file_name, int rank, int np, vol_env* env_ptr) { + herr_t err = 0; + int i,nerrs=0, nsubfiles, buf[N]; + hid_t fid=-1, did=-1, sid=-1, msid=-1; + hid_t fapl_id=-1, fcpl_id=H5P_DEFAULT, log_vlid=H5I_INVALID_HID; + hsize_t dims[2] = {0, N}, start[2], count[2]; + char *env_str; + + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + CHECK_ERR(fapl_id) + // MPI and collective metadata is required by LOG VOL + err = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); + CHECK_ERR(err) + err = H5Pset_all_coll_metadata_ops(fapl_id, 1); + CHECK_ERR(err) + + if (env_ptr->native_only == 0 && env_ptr->connector == 0) { + // Register LOG VOL plugin + log_vlid = H5VLregister_connector(&H5VL_log_g, H5P_DEFAULT); + CHECK_ERR(log_vlid) + err = H5Pset_vol(fapl_id, log_vlid, NULL); + CHECK_ERR(err) + } + + env_str = getenv("H5VL_LOG_NSUBFILES"); + if (env_str == NULL) { + /* set the number of subfiles */ + fcpl_id = H5Pcreate(H5P_FILE_CREATE); + CHECK_ERR(fcpl_id) + nsubfiles = np / 2; + if (nsubfiles == 0) nsubfiles = -1; + err = H5Pset_subfiling(fcpl_id, nsubfiles); + CHECK_ERR(err) + } + + // Create file + fid = H5Fcreate(file_name, H5F_ACC_TRUNC, fcpl_id, fapl_id); + CHECK_ERR(fid) + + // Create a dataset of 2D array of size np x N + dims[0] = np; + sid = H5Screate_simple(2, dims, dims); + CHECK_ERR(sid); + did = H5Dcreate2(fid, "D", H5T_STD_I32LE, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + CHECK_ERR(did) + + for (i = 0; i < N; i++) { buf[i] = expected_buf_val(rank, np, i, 1); } + + /* create a hyperslab of 1 x N */ + start[0] = rank; + start[1] = 0; + count[0] = 1; + count[1] = N; + err = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR(err) + + msid = H5Screate_simple(1, dims + 1, dims + 1); + CHECK_ERR(msid); + + // Write to dataset in parallel + err = H5Dwrite(did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR(err) + +err_out:; + if (msid >= 0) H5Sclose(msid); + if (sid >= 0) H5Sclose(sid); + if (did >= 0) H5Dclose(did); + if (fapl_id >= 0) H5Pclose(fapl_id); + if (fcpl_id != H5P_DEFAULT) H5Pclose(fcpl_id); + if (log_vlid != H5I_INVALID_HID) H5VLclose(log_vlid); + if (fid >= 0) H5Fclose(fid); + + return nerrs > 0; +} \ No newline at end of file From b21a80c044224fca2acc209800aa6a4d5d908c15 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 24 Apr 2023 12:17:44 -0500 Subject: [PATCH 13/22] init fp->subfilerecords to NULL --- src/H5VL_log_file.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/H5VL_log_file.cpp b/src/H5VL_log_file.cpp index b4917c79..f9ae49f4 100644 --- a/src/H5VL_log_file.cpp +++ b/src/H5VL_log_file.cpp @@ -117,6 +117,7 @@ void *H5VL_log_file_create ( fp->zbuf = NULL; fp->is_log_based_file = true; fp->is_new = true; + fp->subfile_records = NULL; mpierr = MPI_Comm_dup (comm, &(fp->comm)); CHECK_MPIERR if (mpiinfo != MPI_INFO_NULL) { @@ -266,6 +267,7 @@ void *H5VL_log_file_open ( fp->zbuf = NULL; fp->is_log_based_file = true; fp->is_new = false; + fp->subfile_records = NULL; mpierr = MPI_Comm_dup (comm, &(fp->comm)); CHECK_MPIERR if (mpiinfo != MPI_INFO_NULL) { From c32690c2f6f2d0eba647491d52945ac48588ce4f Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Wed, 26 Apr 2023 14:27:28 -0500 Subject: [PATCH 14/22] fix: subfile read: mismatch of nsubfiles and nproc It is possible that, for example, a file is created with 8 subfiles. But when openning and reading the file, we only use 4 processes. In the original implementation before this fix, the info of 8 subfiles is not saved. Only the first fp->ngroup subfiles will be opened for read, where fp->ngroup is a number bounded by the number of processes (i.e. <= 4 in this case). In this fix, we use fp->nsubfiles to store the number of subfiles for an opened file. All fp->nsubfiles subfiles will be opened for read. --- src/H5VL_log_file.cpp | 2 ++ src/H5VL_log_file.hpp | 4 ++++ src/H5VL_log_filei.cpp | 26 +++++++++++++------------- src/H5VL_logi_nb.cpp | 4 ++-- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/H5VL_log_file.cpp b/src/H5VL_log_file.cpp index f9ae49f4..ac431954 100644 --- a/src/H5VL_log_file.cpp +++ b/src/H5VL_log_file.cpp @@ -118,6 +118,7 @@ void *H5VL_log_file_create ( fp->is_log_based_file = true; fp->is_new = true; fp->subfile_records = NULL; + fp->nsubfiles = 0; mpierr = MPI_Comm_dup (comm, &(fp->comm)); CHECK_MPIERR if (mpiinfo != MPI_INFO_NULL) { @@ -268,6 +269,7 @@ void *H5VL_log_file_open ( fp->is_log_based_file = true; fp->is_new = false; fp->subfile_records = NULL; + fp->nsubfiles = 0; mpierr = MPI_Comm_dup (comm, &(fp->comm)); CHECK_MPIERR if (mpiinfo != MPI_INFO_NULL) { diff --git a/src/H5VL_log_file.hpp b/src/H5VL_log_file.hpp index bde13d53..5840289a 100644 --- a/src/H5VL_log_file.hpp +++ b/src/H5VL_log_file.hpp @@ -58,6 +58,10 @@ typedef struct H5VL_log_file_t : H5VL_log_obj_t { int group_np; // Number of processes in the group int ngroup; // Number of groups. NOTE: This value is only valid when // H5VL_FILEI_CONFIG_SUBFILING is set in config + int nsubfiles; // Number of subfiles. NOTE: This value is only valid when + // H5VL_FILEI_CONFIG_SUBFILING is set in config. This is + // used in case of H5Fopen, where ngroup might be different + // from nsubfiles. int prev_rank; // We only start writing after prev_rank finishes writing int next_rank; // We have to notify next_rank to start writing after we finish int target_ost; // What OST should we write to in aligned data layout diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index d156aeaa..d40fb878 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -132,6 +132,7 @@ void H5VL_log_filei_post_open (H5VL_log_file_t *fp) { fp->nmdset = attbuf[2]; fp->config = attbuf[3]; fp->ngroup = attbuf[4]; + fp->nsubfiles = attbuf[4]; fp->mreqs.resize (fp->ndset, NULL); // Merge write reqeusts fp->dsets_info.resize (fp->ndset, NULL); // Dataset info fp->group_rank = fp->rank; @@ -778,7 +779,7 @@ void H5VL_log_filei_close (H5VL_log_file_t *fp) { // Close the log group if (fp->subfile_records) { - for (i = 0; i < fp->ngroup; i++) { + for (i = 0; i < fp->nsubfiles; i++) { if (fp->subfile_records[i].lgp) { H5VL_LOGI_PROFILING_TIMER_START err = H5VLgroup_close (fp->subfile_records[i].lgp, fp->uvlid, fp->dxplid, NULL); @@ -813,7 +814,7 @@ void H5VL_log_filei_close (H5VL_log_file_t *fp) { // Close the file with MPI if (fp->subfile_records) { - for (i = 0; i < fp->ngroup; i++) { + for (i = 0; i < fp->nsubfiles; i++) { if (fp->subfile_records[i].fh != MPI_FILE_NULL) { mpierr = MPI_File_close (&(fp->subfile_records[i].fh)); CHECK_MPIERR @@ -849,7 +850,7 @@ void H5VL_log_filei_close (H5VL_log_file_t *fp) { CHECK_ERR if (fp->sfp && (fp->sfp != fp->uo)) { // if subfiling is enabled if (fp->subfile_records) { // this means subfiles are opened not created, e.g. for read. - for (i = 0; i < fp->ngroup; i++) { + for (i = 0; i < fp->nsubfiles; i++) { fp->sfp = fp->subfile_records[i].uo; err = H5VLfile_close (fp->sfp, fp->uvlid, H5P_DATASET_XFER_DEFAULT, NULL); CHECK_ERR @@ -947,7 +948,7 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, hid_t dxpl_id) { herr_t err = 0; int attbuf[5]; - int stat, subfile_id, mpierr; + int stat, i, mpierr; H5VL_loc_params_t loc; // Open subfile dir @@ -967,27 +968,26 @@ void H5VL_log_filei_open_subfile (H5VL_log_file_t *fp, loc.obj_type = H5I_FILE; loc.type = H5VL_OBJECT_BY_SELF; - fp->subfile_records = (H5VL_log_subfile_record_t *) malloc (fp->ngroup * sizeof (H5VL_log_subfile_record_t)); - + fp->subfile_records = (H5VL_log_subfile_record_t *) malloc (fp->nsubfiles * sizeof (H5VL_log_subfile_record_t)); err = H5Pset_fapl_mpio (fapl_id, fp->group_comm, fp->info); CHECK_ERR - for (subfile_id = 0; subfile_id < fp->ngroup; subfile_id ++) { + for (i = 0; i < fp->nsubfiles; i ++) { fp->subname = fp->name + ".subfiles/" + std::string (basename ((char *)(fp->name.c_str ()))) + - "." + std::to_string (subfile_id); + "." + std::to_string (i); fp->sfp = H5VLfile_open (fp->subname.c_str (), flags, fapl_id, dxpl_id, NULL); CHECK_PTR ((fp->sfp)) // Update nldset and nmdset H5VL_logi_get_att (fp->sfp, fp->uvlid, fp->type, H5VL_LOG_FILEI_ATTR, H5T_NATIVE_INT32, attbuf, fp->dxplid); - fp->subfile_records[subfile_id].nldset = attbuf[1]; - fp->subfile_records[subfile_id].nmdset = attbuf[2]; + fp->subfile_records[i].nldset = attbuf[1]; + fp->subfile_records[i].nmdset = attbuf[2]; - fp->subfile_records[subfile_id].uo = fp->sfp; + fp->subfile_records[i].uo = fp->sfp; - mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDONLY, fp->info, &(fp->subfile_records[subfile_id].fh)); + mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDONLY, fp->info, &(fp->subfile_records[i].fh)); CHECK_MPIERR - fp->subfile_records[subfile_id].lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, + fp->subfile_records[i].lgp = H5VLgroup_open (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, H5P_GROUP_ACCESS_DEFAULT, fp->dxplid, NULL); } diff --git a/src/H5VL_logi_nb.cpp b/src/H5VL_logi_nb.cpp index bde6acf6..48a11703 100644 --- a/src/H5VL_logi_nb.cpp +++ b/src/H5VL_logi_nb.cpp @@ -550,8 +550,8 @@ void H5VL_log_nb_flush_read_reqs (void *file, std::vector &re } else if (fp->subfile_records) { group_id = fp->group_id; // Backup group ID - for (i = 1; i <= fp->ngroup; i++) { - fp->group_id = (group_id + i) % fp->ngroup; + for (i = 1; i <= fp->nsubfiles; i++) { + fp->group_id = (group_id + i) % fp->nsubfiles; fp->idx->clear (); fp->idxvalid = false; fp->fh = fp->subfile_records[fp->group_id].fh; From 6c0a05317f749d3483f712d3e33a0af345dad324 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Wed, 26 Apr 2023 16:47:20 -0500 Subject: [PATCH 15/22] fix: possible hangs during dataset read. MPI_FILE_set_view is a collective call. Befroe this fix, not all processes call this function during dataset read, introducing possible hangs. A subfile read test case may trigger this issue more easiliy. This commit fix this issue. --- src/H5VL_logi_nb.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/H5VL_logi_nb.cpp b/src/H5VL_logi_nb.cpp index 48a11703..6a2d520a 100644 --- a/src/H5VL_logi_nb.cpp +++ b/src/H5VL_logi_nb.cpp @@ -417,8 +417,16 @@ void H5VL_log_nb_perform_read (H5VL_log_file_t *fp, mpierr = MPI_File_set_view (fp->fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); CHECK_MPIERR } else { + // MPI_File_set_view is collective call, so we need to call it even if there is no data + mpierr = MPI_File_set_view (fp->fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + CHECK_MPIERR + + // read 0 bytes mpierr = MPI_File_read_at_all (fp->fh, 0, MPI_BOTTOM, 0, MPI_BYTE, &stat); CHECK_MPIERR + + mpierr = MPI_File_set_view (fp->fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + CHECK_MPIERR } } From 817fd11c51908daf6b142e885d9019714c840aa4 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Wed, 26 Apr 2023 18:05:19 -0500 Subject: [PATCH 16/22] testcase: subfile_dread tests multiple scenarios Test the following: 1. nsbufile > nproc 2. nsubfile == nproc 3. nsubfile < nproc For each of the above, test: 1. read pattern same as write pattern (row wise) 2. read pattern is row wise, but each process read a different row than it writes. (read from one subfile that process is not responsible for) 3. read pattern is column wise (read from several subfiles) 4. read all dataset. This means a total of 12 scenarios are tested. Also, we test each scenario using 1 to 12 number of processes. This makes sures Log VOL also works for odd number of processes. --- tests/basic/parallel_run.sh | 9 +- tests/basic/subfile_dread.cpp | 365 ++++++++++++++++++++++------------ 2 files changed, 243 insertions(+), 131 deletions(-) diff --git a/tests/basic/parallel_run.sh b/tests/basic/parallel_run.sh index 5924dd29..2adbcdb7 100755 --- a/tests/basic/parallel_run.sh +++ b/tests/basic/parallel_run.sh @@ -13,6 +13,13 @@ RUN_CMD=`echo ${TESTMPIRUN} | ${SED} -e "s/NP/$1/g"` log_vol_file_only=1 for p in ${check_PROGRAMS} ; do - test_func ./$p + if [ "x$p" != "xsubfile_dread" ]; then + test_func ./$p + fi done +for nproc in {1..12} ; do + RUN_CMD=`echo ${TESTMPIRUN} | ${SED} -e "s/NP/$nproc/g"` + test_func ./subfile_dread +done +exit 0 diff --git a/tests/basic/subfile_dread.cpp b/tests/basic/subfile_dread.cpp index 3ddbae48..26804e13 100644 --- a/tests/basic/subfile_dread.cpp +++ b/tests/basic/subfile_dread.cpp @@ -3,214 +3,319 @@ * See COPYRIGHT notice in top-level directory. */ +#include +#include /* basename() */ +#include #include #include /* getenv() */ #include /* strcpy() */ -#include /* basename() */ -#include -#include #include "H5VL_log.h" #include "testutils.hpp" -#define N 10 +#define INDEP_CHECK_ERR(A, COMM) \ + { \ + err_macro = (A); \ + MPI_Allreduce (MPI_IN_PLACE, &err_macro, 1, MPI_INT, MPI_MIN, COMM); \ + if (err_macro < 0) { \ + nerrs++; \ + printf ("Error at line %d in %s:\n", __LINE__, __FILE__); \ + goto err_out; \ + } \ + } -int create_subfile(const char* file_name, int rank, int np, vol_env* env_ptr); -int read_subfile(const char* file_name, int rank, int np, vol_env* env_ptr); -int expected_buf_val(int rank, int np, int i, int is_write); +int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr); +int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI_Comm comm); +int expected_buf_val (int rank, int np, int i, int is_write, int is_columwise); -int main(int argc, char **argv) { - herr_t err = 0; - int i, rank, np, nerrs=0, mpi_required; +int main (int argc, char **argv) { + int err_macro = 0; + herr_t err = 0; + int i, rank, np, nerrs = 0, mpi_required; char file_name[256], *env_str; vol_env env; + MPI_Comm comm1 = MPI_COMM_WORLD, comm2 = MPI_COMM_WORLD; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_required); - MPI_Comm_size(MPI_COMM_WORLD, &np); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_required); + MPI_Comm_size (MPI_COMM_WORLD, &np); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); - sprintf(file_name, "%s.h5", basename(argv[0])); + sprintf (file_name, "%s.h5", basename (argv[0])); if (argc > 2) { - if (!rank) printf("Usage: %s [filename]\n", argv[0]); - MPI_Finalize(); + if (!rank) printf ("Usage: %s [filename]\n", argv[0]); + MPI_Finalize (); return 1; - } - else if (argc > 1) - strcpy(file_name, argv[1]); + } else if (argc > 1) + strcpy (file_name, argv[1]); - /* check VOL related environment variables */ - check_env(&env); - SHOW_TEST_INFO("subfileing read") + // check VOL related environment variables + check_env (&env); + SHOW_TEST_INFO ("subfileing read") - // pre-process: create a file with a dataset - err = create_subfile(file_name, rank, np, &env); - CHECK_ERR(err) + // pre-process: create a file with (np / 2) subfiles + err = create_subfile (file_name, rank, np, &env); + INDEP_CHECK_ERR (err, MPI_COMM_WORLD) - // start testing subfile read - err = read_subfile(file_name, rank, np, &env); - CHECK_ERR(err) + { // test reading with np processes (i.e. nproc > nsubfiles) + MPI_Barrier (MPI_COMM_WORLD); + err = read_subfile (file_name, rank, np, &env, MPI_COMM_WORLD); + INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + } - SHOW_TEST_RESULT + { // test reading with np / 2 processes (i.e. nproc == nsubfiles) + MPI_Barrier (MPI_COMM_WORLD); + MPI_Comm_split (MPI_COMM_WORLD, rank * 2 / np, rank, &comm1); + if (rank < (np + 1) / 2) { err = read_subfile (file_name, rank, np, &env, comm1); } + INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + } + + { // test reading with np / 4 processes (i.e. nproc < nsubfiles) + MPI_Barrier (MPI_COMM_WORLD); + MPI_Comm_split (MPI_COMM_WORLD, rank * 4 / np, rank, &comm2); + if (rank < (np + 3) / 4) { err = read_subfile (file_name, rank, np, &env, comm2); } + INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + } + SHOW_TEST_RESULT err_out:; - MPI_Finalize(); + if (comm1 != MPI_COMM_WORLD) MPI_Comm_free (&comm1); + if (comm2 != MPI_COMM_WORLD) MPI_Comm_free (&comm2); + MPI_Finalize (); return (nerrs > 0); } -int expected_buf_val(int rank, int np, int i, int is_write) { +int expected_buf_val (int rank, int np, int i, int is_write, int is_columwise) { if (is_write) return rank * 100 + i; - // below: is_read + if (is_columwise) return i * 100 + (np - rank - 1); return (np - rank - 1) * 100 + i; } -int read_subfile(const char* file_name, int rank, int np, vol_env* env_ptr) { +int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI_Comm comm) { herr_t err = 0; - int i,nerrs=0, nsubfiles, buf[N]; - hid_t fid=-1, did=-1, sid=-1, msid=-1; - hid_t fapl_id=-1, fcpl_id=H5P_DEFAULT, log_vlid=H5I_INVALID_HID; - hsize_t dims[2] = {0, N}, start[2], count[2]; - - fapl_id = H5Pcreate(H5P_FILE_ACCESS); - CHECK_ERR(fapl_id) + int i, nerrs = 0, nsubfiles; + int err_macro = 0; + hid_t fid = -1, did = -1, sid = -1, msid = -1, msid2 = -1; + hid_t fapl_id = -1, fcpl_id = H5P_DEFAULT, log_vlid = H5I_INVALID_HID; + hsize_t dims[2] = {0, 0}, start[2], count[2]; + int *buf = NULL; + int ii = 0; + + CHECK_ERR (np); + buf = (int *)malloc (sizeof (int) * np * np); + + fapl_id = H5Pcreate (H5P_FILE_ACCESS); + CHECK_ERR (fapl_id) // MPI and collective metadata is required by LOG VOL - err = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); - CHECK_ERR(err) - err = H5Pset_all_coll_metadata_ops(fapl_id, 1); - CHECK_ERR(err) + err = H5Pset_fapl_mpio (fapl_id, comm, MPI_INFO_NULL); + CHECK_ERR (err) + err = H5Pset_all_coll_metadata_ops (fapl_id, 1); + CHECK_ERR (err) if (env_ptr->native_only == 0 && env_ptr->connector == 0) { // Register LOG VOL plugin - log_vlid = H5VLregister_connector(&H5VL_log_g, H5P_DEFAULT); - CHECK_ERR(log_vlid) - err = H5Pset_vol(fapl_id, log_vlid, NULL); - CHECK_ERR(err) + log_vlid = H5VLregister_connector (&H5VL_log_g, H5P_DEFAULT); + CHECK_ERR (log_vlid) + err = H5Pset_vol (fapl_id, log_vlid, NULL); + CHECK_ERR (err) } // Open file - fid = H5Fopen(file_name, H5F_ACC_RDONLY, fapl_id); - CHECK_ERR(fid) + fid = H5Fopen (file_name, H5F_ACC_RDONLY, fapl_id); + CHECK_ERR (fid) - // Open a dataset of 2D array of size np x N + // Open a dataset of 2D array of size np x np dims[0] = np; - sid = H5Screate_simple(2, dims, dims); - CHECK_ERR(sid); - did = H5Dopen2(fid, "D", H5P_DEFAULT); - CHECK_ERR(did) + dims[1] = np; + sid = H5Screate_simple (2, dims, dims); + CHECK_ERR (sid); + did = H5Dopen2 (fid, "D", H5P_DEFAULT); + CHECK_ERR (did) + + msid = H5Screate_simple (1, dims + 1, dims + 1); + CHECK_ERR (msid); + + { // test1: read pattern same as write pattern + // reset buffer + for (i = 0; i < np * np; i++) { buf[i] = 0; } + + // create a hyperslab of 1 x np + start[0] = rank; + start[1] = 0; + count[0] = 1; + count[1] = np; + err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR (err) + + // read from dataset + err = H5Dread (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR (err) + + for (i = 0; i < np; i++) { + if (buf[i] != expected_buf_val (rank, np, i, 1, 0)) { nerrs++; } + } + INDEP_CHECK_ERR ((-nerrs), comm); + } - // reset buffer - for (i = 0; i < N; i++) { buf[i] = 0; } + { // test2: read pattern different from write pattern, but still row-wise. + // reset buffer + for (i = 0; i < np * np; i++) { buf[i] = 0; } - /* create a hyperslab of 1 x N */ - start[0] = np - rank - 1; - start[1] = 0; - count[0] = 1; - count[1] = N; - err = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, NULL, count, NULL); - CHECK_ERR(err) + /* create a hyperslab of 1 x np */ + start[0] = np - rank - 1; + start[1] = 0; + count[0] = 1; + count[1] = np; + err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR (err) + + // read from dataset + err = H5Dread (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR (err) + + for (i = 0; i < np; i++) { + if (buf[i] != expected_buf_val (rank, np, i, 0, 0)) { nerrs++; } + } + + INDEP_CHECK_ERR ((-nerrs), comm); + } - msid = H5Screate_simple(1, dims + 1, dims + 1); - CHECK_ERR(msid); + { // test3: read pattern different from write pattern, read column-wise. + // reset buffer + for (i = 0; i < np * np; i++) { buf[i] = 0; } - // read from dataset - err = H5Dread(did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); - CHECK_ERR(err) + // create a hyperslab of np x 1 + start[0] = 0; + start[1] = np - rank - 1; + count[0] = np; + count[1] = 1; + err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR (err) - for (i = 0; i < N; i++) { - if (buf[i] != expected_buf_val(rank, np, i, 0)) { - nerrs++; + // read from dataset + err = H5Dread (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR (err) + + for (i = 0; i < np; i++) { + if (buf[i] != expected_buf_val (rank, np, i, 0, 1)) { nerrs++; } } + INDEP_CHECK_ERR ((-nerrs), comm); } - MPI_Barrier(MPI_COMM_WORLD); - if (nerrs > 0) { - printf("Rank %d: Error: %d errors found\n", rank, nerrs); + + { // test4: each process read entire dataset + // reset buffer + for (i = 0; i < np * np; i++) { buf[i] = 0; } + + // create the entire region as hyperslab + err = H5Sselect_all (sid); + CHECK_ERR (err) + + dims[1] = np * np; + msid2 = H5Screate_simple (1, dims + 1, dims + 1); + CHECK_ERR (msid); + + // read from dataset + err = H5Dread (did, H5T_NATIVE_INT, msid2, sid, H5P_DEFAULT, buf); + CHECK_ERR (err) + + for (i = 0; i < np * np; i++) { + if (buf[i] != ((i / np) * 100 + (i % np))) { nerrs++; } + } + INDEP_CHECK_ERR ((-nerrs), comm); } - MPI_Barrier(MPI_COMM_WORLD); err_out:; - if (msid >= 0) H5Sclose(msid); - if (sid >= 0) H5Sclose(sid); - if (did >= 0) H5Dclose(did); - if (fapl_id >= 0) H5Pclose(fapl_id); - if (fcpl_id != H5P_DEFAULT) H5Pclose(fcpl_id); - if (log_vlid != H5I_INVALID_HID) H5VLclose(log_vlid); - if (fid >= 0) H5Fclose(fid); - - return nerrs > 0; + if (buf) free (buf); + if (msid >= 0) H5Sclose (msid); + if (msid2 >= 0) H5Sclose (msid2); + if (sid >= 0) H5Sclose (sid); + if (did >= 0) H5Dclose (did); + if (fapl_id >= 0) H5Pclose (fapl_id); + if (fcpl_id != H5P_DEFAULT) H5Pclose (fcpl_id); + if (log_vlid != H5I_INVALID_HID) H5VLclose (log_vlid); + if (fid >= 0) H5Fclose (fid); + + return -nerrs; } -int create_subfile(const char* file_name, int rank, int np, vol_env* env_ptr) { +int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr) { herr_t err = 0; - int i,nerrs=0, nsubfiles, buf[N]; - hid_t fid=-1, did=-1, sid=-1, msid=-1; - hid_t fapl_id=-1, fcpl_id=H5P_DEFAULT, log_vlid=H5I_INVALID_HID; - hsize_t dims[2] = {0, N}, start[2], count[2]; + int i, nerrs = 0, nsubfiles; + hid_t fid = -1, did = -1, sid = -1, msid = -1; + hid_t fapl_id = -1, fcpl_id = H5P_DEFAULT, log_vlid = H5I_INVALID_HID; + hsize_t dims[2] = {0, 0}, start[2], count[2]; + int *buf = NULL; char *env_str; - fapl_id = H5Pcreate(H5P_FILE_ACCESS); - CHECK_ERR(fapl_id) + CHECK_ERR (np); + buf = (int *)malloc (sizeof (int) * np); + + fapl_id = H5Pcreate (H5P_FILE_ACCESS); + CHECK_ERR (fapl_id) // MPI and collective metadata is required by LOG VOL - err = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); - CHECK_ERR(err) - err = H5Pset_all_coll_metadata_ops(fapl_id, 1); - CHECK_ERR(err) + err = H5Pset_fapl_mpio (fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); + CHECK_ERR (err) + err = H5Pset_all_coll_metadata_ops (fapl_id, 1); + CHECK_ERR (err) if (env_ptr->native_only == 0 && env_ptr->connector == 0) { // Register LOG VOL plugin - log_vlid = H5VLregister_connector(&H5VL_log_g, H5P_DEFAULT); - CHECK_ERR(log_vlid) - err = H5Pset_vol(fapl_id, log_vlid, NULL); - CHECK_ERR(err) + log_vlid = H5VLregister_connector (&H5VL_log_g, H5P_DEFAULT); + CHECK_ERR (log_vlid) + err = H5Pset_vol (fapl_id, log_vlid, NULL); + CHECK_ERR (err) } - env_str = getenv("H5VL_LOG_NSUBFILES"); + env_str = getenv ("H5VL_LOG_NSUBFILES"); if (env_str == NULL) { /* set the number of subfiles */ - fcpl_id = H5Pcreate(H5P_FILE_CREATE); - CHECK_ERR(fcpl_id) + fcpl_id = H5Pcreate (H5P_FILE_CREATE); + CHECK_ERR (fcpl_id) nsubfiles = np / 2; if (nsubfiles == 0) nsubfiles = -1; - err = H5Pset_subfiling(fcpl_id, nsubfiles); - CHECK_ERR(err) + err = H5Pset_subfiling (fcpl_id, nsubfiles); + CHECK_ERR (err) } // Create file - fid = H5Fcreate(file_name, H5F_ACC_TRUNC, fcpl_id, fapl_id); - CHECK_ERR(fid) + fid = H5Fcreate (file_name, H5F_ACC_TRUNC, fcpl_id, fapl_id); + CHECK_ERR (fid) - // Create a dataset of 2D array of size np x N + // Create a dataset of 2D array of size np x np dims[0] = np; - sid = H5Screate_simple(2, dims, dims); - CHECK_ERR(sid); - did = H5Dcreate2(fid, "D", H5T_STD_I32LE, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK_ERR(did) + dims[1] = np; + sid = H5Screate_simple (2, dims, dims); + CHECK_ERR (sid); + did = H5Dcreate2 (fid, "D", H5T_STD_I32LE, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + CHECK_ERR (did) - for (i = 0; i < N; i++) { buf[i] = expected_buf_val(rank, np, i, 1); } + for (i = 0; i < np; i++) { buf[i] = expected_buf_val (rank, np, i, 1, 0); } - /* create a hyperslab of 1 x N */ + // create a hyperslab of 1 x N start[0] = rank; start[1] = 0; count[0] = 1; - count[1] = N; - err = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, NULL, count, NULL); - CHECK_ERR(err) + count[1] = np; + err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); + CHECK_ERR (err) - msid = H5Screate_simple(1, dims + 1, dims + 1); - CHECK_ERR(msid); + msid = H5Screate_simple (1, dims + 1, dims + 1); + CHECK_ERR (msid); // Write to dataset in parallel - err = H5Dwrite(did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); - CHECK_ERR(err) + err = H5Dwrite (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); + CHECK_ERR (err) err_out:; - if (msid >= 0) H5Sclose(msid); - if (sid >= 0) H5Sclose(sid); - if (did >= 0) H5Dclose(did); - if (fapl_id >= 0) H5Pclose(fapl_id); - if (fcpl_id != H5P_DEFAULT) H5Pclose(fcpl_id); - if (log_vlid != H5I_INVALID_HID) H5VLclose(log_vlid); - if (fid >= 0) H5Fclose(fid); - - return nerrs > 0; + if (buf) free (buf); + if (msid >= 0) H5Sclose (msid); + if (sid >= 0) H5Sclose (sid); + if (did >= 0) H5Dclose (did); + if (fapl_id >= 0) H5Pclose (fapl_id); + if (fcpl_id != H5P_DEFAULT) H5Pclose (fcpl_id); + if (log_vlid != H5I_INVALID_HID) H5VLclose (log_vlid); + if (fid >= 0) H5Fclose (fid); + + return -nerrs; } \ No newline at end of file From c15c3606c3b92a2bf8d5c82ad924d0f853855e13 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Tue, 30 May 2023 11:46:06 -0500 Subject: [PATCH 17/22] subfile_read test: fewer processes cases --- tests/basic/parallel_run.sh | 2 +- tests/basic/subfile_dread.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/basic/parallel_run.sh b/tests/basic/parallel_run.sh index 2adbcdb7..6d483a3a 100755 --- a/tests/basic/parallel_run.sh +++ b/tests/basic/parallel_run.sh @@ -18,7 +18,7 @@ for p in ${check_PROGRAMS} ; do fi done -for nproc in {1..12} ; do +for nproc in 4 8 3 5 ; do RUN_CMD=`echo ${TESTMPIRUN} | ${SED} -e "s/NP/$nproc/g"` test_func ./subfile_dread done diff --git a/tests/basic/subfile_dread.cpp b/tests/basic/subfile_dread.cpp index 26804e13..9922778e 100644 --- a/tests/basic/subfile_dread.cpp +++ b/tests/basic/subfile_dread.cpp @@ -292,7 +292,7 @@ int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr) { for (i = 0; i < np; i++) { buf[i] = expected_buf_val (rank, np, i, 1, 0); } - // create a hyperslab of 1 x N + // create a hyperslab of 1 x np start[0] = rank; start[1] = 0; count[0] = 1; From 3b3a0a2ce14e4ef5b4917e9a6b1af416aa1f9e1d Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 5 Jun 2023 07:38:41 -0500 Subject: [PATCH 18/22] DEBUG PRINT --- tests/basic/subfile_dread.cpp | 45 ++++++++++++++++++++++++++++++++-- tests/basic/subfile_dwrite.cpp | 23 ++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/tests/basic/subfile_dread.cpp b/tests/basic/subfile_dread.cpp index 9922778e..95aef7c8 100644 --- a/tests/basic/subfile_dread.cpp +++ b/tests/basic/subfile_dread.cpp @@ -27,7 +27,7 @@ int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr); int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI_Comm comm); int expected_buf_val (int rank, int np, int i, int is_write, int is_columwise); - +#define DEBUG_PRINT {printf("DEBUG: %s:%d\n", __FILE__, __LINE__);} int main (int argc, char **argv) { int err_macro = 0; herr_t err = 0; @@ -39,6 +39,7 @@ int main (int argc, char **argv) { MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_required); MPI_Comm_size (MPI_COMM_WORLD, &np); MPI_Comm_rank (MPI_COMM_WORLD, &rank); + DEBUG_PRINT sprintf (file_name, "%s.h5", basename (argv[0])); if (argc > 2) { @@ -47,19 +48,22 @@ int main (int argc, char **argv) { return 1; } else if (argc > 1) strcpy (file_name, argv[1]); - + DEBUG_PRINT // check VOL related environment variables check_env (&env); SHOW_TEST_INFO ("subfileing read") + DEBUG_PRINT // pre-process: create a file with (np / 2) subfiles err = create_subfile (file_name, rank, np, &env); INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + DEBUG_PRINT { // test reading with np processes (i.e. nproc > nsubfiles) MPI_Barrier (MPI_COMM_WORLD); err = read_subfile (file_name, rank, np, &env, MPI_COMM_WORLD); INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + DEBUG_PRINT } { // test reading with np / 2 processes (i.e. nproc == nsubfiles) @@ -67,6 +71,7 @@ int main (int argc, char **argv) { MPI_Comm_split (MPI_COMM_WORLD, rank * 2 / np, rank, &comm1); if (rank < (np + 1) / 2) { err = read_subfile (file_name, rank, np, &env, comm1); } INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + DEBUG_PRINT } { // test reading with np / 4 processes (i.e. nproc < nsubfiles) @@ -74,6 +79,7 @@ int main (int argc, char **argv) { MPI_Comm_split (MPI_COMM_WORLD, rank * 4 / np, rank, &comm2); if (rank < (np + 3) / 4) { err = read_subfile (file_name, rank, np, &env, comm2); } INDEP_CHECK_ERR (err, MPI_COMM_WORLD) + DEBUG_PRINT } SHOW_TEST_RESULT @@ -104,14 +110,17 @@ int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI CHECK_ERR (np); buf = (int *)malloc (sizeof (int) * np * np); + DEBUG_PRINT fapl_id = H5Pcreate (H5P_FILE_ACCESS); CHECK_ERR (fapl_id) + DEBUG_PRINT // MPI and collective metadata is required by LOG VOL err = H5Pset_fapl_mpio (fapl_id, comm, MPI_INFO_NULL); CHECK_ERR (err) err = H5Pset_all_coll_metadata_ops (fapl_id, 1); CHECK_ERR (err) + DEBUG_PRINT if (env_ptr->native_only == 0 && env_ptr->connector == 0) { // Register LOG VOL plugin @@ -120,21 +129,26 @@ int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI err = H5Pset_vol (fapl_id, log_vlid, NULL); CHECK_ERR (err) } + DEBUG_PRINT // Open file fid = H5Fopen (file_name, H5F_ACC_RDONLY, fapl_id); CHECK_ERR (fid) + DEBUG_PRINT // Open a dataset of 2D array of size np x np dims[0] = np; dims[1] = np; sid = H5Screate_simple (2, dims, dims); CHECK_ERR (sid); + DEBUG_PRINT did = H5Dopen2 (fid, "D", H5P_DEFAULT); CHECK_ERR (did) + DEBUG_PRINT msid = H5Screate_simple (1, dims + 1, dims + 1); CHECK_ERR (msid); + DEBUG_PRINT { // test1: read pattern same as write pattern // reset buffer @@ -147,10 +161,12 @@ int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI count[1] = np; err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); CHECK_ERR (err) + DEBUG_PRINT // read from dataset err = H5Dread (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); CHECK_ERR (err) + DEBUG_PRINT for (i = 0; i < np; i++) { if (buf[i] != expected_buf_val (rank, np, i, 1, 0)) { nerrs++; } @@ -169,10 +185,12 @@ int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI count[1] = np; err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); CHECK_ERR (err) + DEBUG_PRINT // read from dataset err = H5Dread (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); CHECK_ERR (err) + DEBUG_PRINT for (i = 0; i < np; i++) { if (buf[i] != expected_buf_val (rank, np, i, 0, 0)) { nerrs++; } @@ -192,10 +210,12 @@ int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI count[1] = 1; err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); CHECK_ERR (err) + DEBUG_PRINT // read from dataset err = H5Dread (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); CHECK_ERR (err) + DEBUG_PRINT for (i = 0; i < np; i++) { if (buf[i] != expected_buf_val (rank, np, i, 0, 1)) { nerrs++; } @@ -214,10 +234,12 @@ int read_subfile (const char *file_name, int rank, int np, vol_env *env_ptr, MPI dims[1] = np * np; msid2 = H5Screate_simple (1, dims + 1, dims + 1); CHECK_ERR (msid); + DEBUG_PRINT // read from dataset err = H5Dread (did, H5T_NATIVE_INT, msid2, sid, H5P_DEFAULT, buf); CHECK_ERR (err) + DEBUG_PRINT for (i = 0; i < np * np; i++) { if (buf[i] != ((i / np) * 100 + (i % np))) { nerrs++; } @@ -230,11 +252,16 @@ err_out:; if (msid >= 0) H5Sclose (msid); if (msid2 >= 0) H5Sclose (msid2); if (sid >= 0) H5Sclose (sid); + DEBUG_PRINT if (did >= 0) H5Dclose (did); + DEBUG_PRINT if (fapl_id >= 0) H5Pclose (fapl_id); if (fcpl_id != H5P_DEFAULT) H5Pclose (fcpl_id); + DEBUG_PRINT if (log_vlid != H5I_INVALID_HID) H5VLclose (log_vlid); + DEBUG_PRINT if (fid >= 0) H5Fclose (fid); + DEBUG_PRINT return -nerrs; } @@ -258,6 +285,7 @@ int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr) { CHECK_ERR (err) err = H5Pset_all_coll_metadata_ops (fapl_id, 1); CHECK_ERR (err) + DEBUG_PRINT if (env_ptr->native_only == 0 && env_ptr->connector == 0) { // Register LOG VOL plugin @@ -266,6 +294,7 @@ int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr) { err = H5Pset_vol (fapl_id, log_vlid, NULL); CHECK_ERR (err) } + DEBUG_PRINT env_str = getenv ("H5VL_LOG_NSUBFILES"); if (env_str == NULL) { @@ -277,18 +306,22 @@ int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr) { err = H5Pset_subfiling (fcpl_id, nsubfiles); CHECK_ERR (err) } + DEBUG_PRINT // Create file fid = H5Fcreate (file_name, H5F_ACC_TRUNC, fcpl_id, fapl_id); CHECK_ERR (fid) + DEBUG_PRINT // Create a dataset of 2D array of size np x np dims[0] = np; dims[1] = np; sid = H5Screate_simple (2, dims, dims); CHECK_ERR (sid); + DEBUG_PRINT did = H5Dcreate2 (fid, "D", H5T_STD_I32LE, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); CHECK_ERR (did) + DEBUG_PRINT for (i = 0; i < np; i++) { buf[i] = expected_buf_val (rank, np, i, 1, 0); } @@ -299,23 +332,31 @@ int create_subfile (const char *file_name, int rank, int np, vol_env *env_ptr) { count[1] = np; err = H5Sselect_hyperslab (sid, H5S_SELECT_SET, start, NULL, count, NULL); CHECK_ERR (err) + DEBUG_PRINT msid = H5Screate_simple (1, dims + 1, dims + 1); CHECK_ERR (msid); + DEBUG_PRINT // Write to dataset in parallel err = H5Dwrite (did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); CHECK_ERR (err) + DEBUG_PRINT err_out:; if (buf) free (buf); if (msid >= 0) H5Sclose (msid); if (sid >= 0) H5Sclose (sid); + DEBUG_PRINT if (did >= 0) H5Dclose (did); + DEBUG_PRINT if (fapl_id >= 0) H5Pclose (fapl_id); if (fcpl_id != H5P_DEFAULT) H5Pclose (fcpl_id); + DEBUG_PRINT if (log_vlid != H5I_INVALID_HID) H5VLclose (log_vlid); + DEBUG_PRINT if (fid >= 0) H5Fclose (fid); + DEBUG_PRINT return -nerrs; } \ No newline at end of file diff --git a/tests/basic/subfile_dwrite.cpp b/tests/basic/subfile_dwrite.cpp index 7758a862..33125f8e 100644 --- a/tests/basic/subfile_dwrite.cpp +++ b/tests/basic/subfile_dwrite.cpp @@ -14,6 +14,7 @@ #include "testutils.hpp" #define N 10 +#define DEBUG_PRINT {printf("DEBUG: %s:%d\n", __FILE__, __LINE__);} int main(int argc, char **argv) { herr_t err = 0; @@ -27,6 +28,7 @@ int main(int argc, char **argv) { MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_required); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Comm_rank(MPI_COMM_WORLD, &rank); + DEBUG_PRINT sprintf(file_name, "%s.h5", basename(argv[0])); if (argc > 2) { @@ -37,16 +39,21 @@ int main(int argc, char **argv) { else if (argc > 1) strcpy(file_name, argv[1]); + DEBUG_PRINT /* check VOL related environment variables */ check_env(&env); SHOW_TEST_INFO("subfileing write") + DEBUG_PRINT fapl_id = H5Pcreate(H5P_FILE_ACCESS); CHECK_ERR(fapl_id) + DEBUG_PRINT // MPI and collective metadata is required by LOG VOL err = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); + DEBUG_PRINT CHECK_ERR(err) err = H5Pset_all_coll_metadata_ops(fapl_id, 1); + DEBUG_PRINT CHECK_ERR(err) if (env.native_only == 0 && env.connector == 0) { @@ -56,27 +63,32 @@ int main(int argc, char **argv) { err = H5Pset_vol(fapl_id, log_vlid, NULL); CHECK_ERR(err) } - + DEBUG_PRINT env_str = getenv("H5VL_LOG_NSUBFILES"); if (env_str == NULL) { /* set the number of subfiles */ fcpl_id = H5Pcreate(H5P_FILE_CREATE); + DEBUG_PRINT CHECK_ERR(fcpl_id) nsubfiles = np / 2; if (nsubfiles == 0) nsubfiles = -1; err = H5Pset_subfiling(fcpl_id, nsubfiles); CHECK_ERR(err) } + DEBUG_PRINT // Create file fid = H5Fcreate(file_name, H5F_ACC_TRUNC, fcpl_id, fapl_id); CHECK_ERR(fid) + DEBUG_PRINT // Create a dataset of 2D array of size np x N dims[0] = np; sid = H5Screate_simple(2, dims, dims); + DEBUG_PRINT CHECK_ERR(sid); did = H5Dcreate2(fid, "D", H5T_STD_I32LE, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + DEBUG_PRINT CHECK_ERR(did) for (i = 0; i < N; i++) { buf[i] = rank + i; } @@ -88,25 +100,34 @@ int main(int argc, char **argv) { count[1] = N; err = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, NULL, count, NULL); CHECK_ERR(err) + DEBUG_PRINT msid = H5Screate_simple(1, dims + 1, dims + 1); CHECK_ERR(msid); + DEBUG_PRINT // Write to dataset in parallel err = H5Dwrite(did, H5T_NATIVE_INT, msid, sid, H5P_DEFAULT, buf); CHECK_ERR(err) + DEBUG_PRINT err = H5Fflush(fid, H5F_SCOPE_LOCAL); CHECK_ERR(err) + DEBUG_PRINT err_out:; if (msid >= 0) H5Sclose(msid); if (sid >= 0) H5Sclose(sid); if (did >= 0) H5Dclose(did); + DEBUG_PRINT if (fapl_id >= 0) H5Pclose(fapl_id); + DEBUG_PRINT if (fcpl_id != H5P_DEFAULT) H5Pclose(fcpl_id); + DEBUG_PRINT if (log_vlid != H5I_INVALID_HID) H5VLclose(log_vlid); + DEBUG_PRINT if (fid >= 0) H5Fclose(fid); + DEBUG_PRINT SHOW_TEST_RESULT From 4e61d1838348b61e0606764f0088396003c1aa34 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 5 Jun 2023 08:19:36 -0500 Subject: [PATCH 19/22] add print internally --- src/H5VL_log_file.cpp | 27 ++++++++++++++++++++++++--- src/H5VL_log_filei.cpp | 19 +++++++++++++++++-- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/H5VL_log_file.cpp b/src/H5VL_log_file.cpp index ac431954..81bfccae 100644 --- a/src/H5VL_log_file.cpp +++ b/src/H5VL_log_file.cpp @@ -20,6 +20,8 @@ #include "H5VL_logi.hpp" #include "H5VL_logi_util.hpp" +#define DEBUG_PRINT {printf("\tDEBUG: %s:%d\n", __FILE__, __LINE__);} + /********************* */ /* Function prototypes */ /********************* */ @@ -69,8 +71,9 @@ void *H5VL_log_file_create ( req); } #endif - + DEBUG_PRINT fp = H5VL_log_filei_search (name); + DEBUG_PRINT if (fp) { fp = NULL; RET_ERR ( @@ -80,6 +83,7 @@ void *H5VL_log_file_create ( H5VL_LOGI_PROFILING_TIMER_START; // Try get info about under VOL H5Pget_vol_info (fapl_id, (void **)&info); + DEBUG_PRINT if (info) { uvlid = info->uvlid; @@ -92,6 +96,7 @@ void *H5VL_log_file_create ( CHECK_ID (uvlid) under_vol_info = NULL; } + DEBUG_PRINT // Make sure we have mpi enabled fdid = H5Pget_driver (fapl_id); @@ -104,6 +109,7 @@ void *H5VL_log_file_create ( comm = MPI_COMM_SELF; mpiinfo = MPI_INFO_NULL; } + DEBUG_PRINT // Init file obj fp = new H5VL_log_file_t (uvlid); @@ -127,6 +133,7 @@ void *H5VL_log_file_create ( } else { fp->info = MPI_INFO_NULL; } + DEBUG_PRINT mpierr = MPI_Comm_rank (comm, &(fp->rank)); CHECK_MPIERR mpierr = MPI_Comm_size (comm, &(fp->np)); @@ -135,42 +142,56 @@ void *H5VL_log_file_create ( fp->name = std::string (name); err = H5Pget_nb_buffer_size (fapl_id, &(fp->bsize)); CHECK_ERR + DEBUG_PRINT H5VL_log_filei_parse_fapl (fp, fapl_id); - + DEBUG_PRINT H5VL_log_filei_parse_fcpl (fp, fcpl_id); - + DEBUG_PRINT H5VL_log_filei_init_idx (fp); + DEBUG_PRINT H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_INIT); // Create the file with underlying VOL H5VL_LOGI_PROFILING_TIMER_START; + DEBUG_PRINT fp->ufaplid = H5VL_log_filei_get_under_plist (fapl_id); + DEBUG_PRINT err = H5Pset_vol (fp->ufaplid, uvlid, under_vol_info); + DEBUG_PRINT CHECK_ERR err = H5Pset_all_coll_metadata_ops (fp->ufaplid, (hbool_t) false); + DEBUG_PRINT CHECK_ERR err = H5Pset_coll_metadata_write (fp->ufaplid, (hbool_t) true); + DEBUG_PRINT CHECK_ERR // err = H5Pset_alignment (fp->ufaplid, 4096, 4096); // CHECK_ERR ufcplid = H5VL_log_filei_get_under_plist (fcpl_id); CHECK_ID (ufcplid) + DEBUG_PRINT if (fp->config & H5VL_FILEI_CONFIG_SUBFILING) { + DEBUG_PRINT ufaplid = H5Pcreate (H5P_FILE_ACCESS); CHECK_ID (ufaplid) + DEBUG_PRINT err = H5Pset_vol (ufaplid, uvlid, under_vol_info); CHECK_ERR + DEBUG_PRINT if (fp->rank) { err = H5Pset_fapl_core (ufaplid, 16 * 1048576, false); CHECK_ERR } + DEBUG_PRINT } else { ufaplid = fp->ufaplid; } + DEBUG_PRINT H5VL_LOGI_PROFILING_TIMER_START; fp->uo = H5VLfile_create (name, flags, ufcplid, ufaplid, dxpl_id, NULL); + DEBUG_PRINT CHECK_PTR (fp->uo) H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VLFILE_CREATE); H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_FILE); diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index d40fb878..4f1dc80a 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -45,6 +45,8 @@ } \ } while (0) +#define DEBUG_PRINT {printf("\t\tDEBUG: %s:%d\n", __FILE__, __LINE__);} + std::map files; H5VL_log_file_t *H5VL_log_filei_search (const char *path) { int err; @@ -219,6 +221,7 @@ void H5VL_log_filei_post_create (H5VL_log_file_t *fp) { // Reset hdf5 context to allow group and attr operations within a file operation H5VL_logi_reset_lib_stat (lib_state); + DEBUG_PRINT // Figure out lustre configuration H5VL_LOGI_PROFILING_TIMER_START; @@ -235,12 +238,15 @@ void H5VL_log_filei_post_create (H5VL_log_file_t *fp) { } } } + DEBUG_PRINT H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_STRIPE); H5VL_LOGI_PROFILING_TIMER_START; if ((fp->config & H5VL_FILEI_CONFIG_DATA_ALIGN) || (fp->config & H5VL_FILEI_CONFIG_SUBFILING)) { + DEBUG_PRINT H5VL_log_filei_calc_node_rank (fp); + DEBUG_PRINT } else { fp->group_rank = fp->rank; fp->group_np = fp->np; @@ -248,14 +254,16 @@ void H5VL_log_filei_post_create (H5VL_log_file_t *fp) { fp->group_id = 0; fp->ngroup = 1; } + DEBUG_PRINT H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_GROUP_RANK); H5VL_LOGI_PROFILING_TIMER_START; if (fp->config & H5VL_FILEI_CONFIG_SUBFILING) { // Aligned write not supported in subfiles fp->config &= ~H5VL_FILEI_CONFIG_DATA_ALIGN; - + DEBUG_PRINT H5VL_log_filei_create_subfile (fp, fp->flag, fp->ufaplid, fp->dxplid); + DEBUG_PRINT } else { fp->sfp = fp->uo; fp->subname = fp->name; @@ -266,23 +274,30 @@ void H5VL_log_filei_post_create (H5VL_log_file_t *fp) { H5VL_LOGI_PROFILING_TIMER_START; loc.obj_type = H5I_FILE; loc.type = H5VL_OBJECT_BY_SELF; + DEBUG_PRINT fp->lgp = H5VLgroup_create (fp->sfp, &loc, fp->uvlid, H5VL_LOG_FILEI_GROUP_LOG, H5P_LINK_CREATE_DEFAULT, H5P_GROUP_CREATE_DEFAULT, H5P_GROUP_CREATE_DEFAULT, fp->dxplid, NULL); + DEBUG_PRINT CHECK_PTR (fp->lgp) H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_GROUP); if (fp->config & H5VL_FILEI_CONFIG_DATA_ALIGN) { + DEBUG_PRINT fp->fd = open (fp->name.c_str(), O_RDWR); if (fp->fd < 0) { ERR_OUT ("open fail") } + DEBUG_PRINT } else { fp->fd = -1; } + DEBUG_PRINT // Open the file with MPI H5VL_LOGI_PROFILING_TIMER_START; + DEBUG_PRINT mpierr = MPI_File_open (fp->group_comm, fp->subname.c_str (), MPI_MODE_RDWR, fp->info, &(fp->fh)); + DEBUG_PRINT H5VL_LOGI_PROFILING_TIMER_STOP (fp, TIMER_H5VL_LOG_FILE_CREATE_FH); CHECK_MPIERR @@ -936,7 +951,7 @@ void H5VL_log_filei_create_subfile (H5VL_log_file_t *fp, attbuf[0] = fp->ndset; attbuf[1] = fp->nldset; attbuf[2] = fp->nmdset; - attbuf[3] = fp->config & !(H5VL_FILEI_CONFIG_SUBFILING); // No subfiling flag in a subfile + attbuf[3] = fp->config & ~(H5VL_FILEI_CONFIG_SUBFILING); // No subfiling flag in a subfile attbuf[4] = fp->ngroup; H5VL_logi_add_att (fp->sfp, fp->uvlid, H5I_FILE, H5VL_LOG_FILEI_ATTR, H5T_STD_I32LE, H5T_NATIVE_INT32, H5VL_LOG_FILEI_NATTR, attbuf, dxpl_id, NULL); From 2f85ab9391df0328f75e94ffeb286f45ef253f0f Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 5 Jun 2023 08:57:47 -0500 Subject: [PATCH 20/22] add more print internally --- src/H5VL_log_filei.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index 4f1dc80a..a6a5e6be 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -1024,8 +1024,11 @@ void H5VL_log_filei_calc_node_rank (H5VL_log_file_t *fp) { H5VL_log_free (group_ranks); }); + DEBUG_PRINT + group_ranks = (int *)malloc (sizeof (int) * fp->np); CHECK_PTR (group_ranks); + DEBUG_PRINT /* H5VL_FILEI_CONFIG_SUBFILING has been checked before entering this * subroutine in H5VL_log_filei_post_open(). Thus fp->ngroup is not 0. @@ -1033,31 +1036,39 @@ void H5VL_log_filei_calc_node_rank (H5VL_log_file_t *fp) { if (fp->ngroup > 0) { mpierr = MPI_Comm_split (fp->comm, fp->rank * fp->ngroup / fp->np, fp->rank, &(fp->group_comm)); + DEBUG_PRINT } else { /* fp->ngroup < 0 */ mpierr = MPI_Comm_split_type (fp->comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(fp->group_comm)); + DEBUG_PRINT } CHECK_ERR mpierr = MPI_Comm_rank (fp->group_comm, &(fp->group_rank)); + DEBUG_PRINT CHECK_MPIERR mpierr = MPI_Comm_size (fp->group_comm, &(fp->group_np)); + DEBUG_PRINT CHECK_MPIERR mpierr = MPI_Allgather (&(fp->group_rank), 1, MPI_INT, group_ranks, 1, MPI_INT, fp->comm); + DEBUG_PRINT CHECK_MPIERR // Assign group ID based on the global rank of group rank 0 fp->group_id = 0; for (i = 0; i < fp->rank; i++) { if (group_ranks[i] == 0) { fp->group_id++; } } + DEBUG_PRINT // Calculate number of groups fp->ngroup = fp->group_id; for (; i < fp->np; i++) { if (group_ranks[i] == 0) { fp->ngroup++; } } + DEBUG_PRINT mpierr = MPI_Bcast (&(fp->group_id), 1, MPI_INT, 0, fp->group_comm); CHECK_MPIERR + DEBUG_PRINT if (fp->config & H5VL_FILEI_CONFIG_DATA_ALIGN) { // What ost it should write to @@ -1087,6 +1098,7 @@ void H5VL_log_filei_calc_node_rank (H5VL_log_file_t *fp) { } } } + DEBUG_PRINT } /*------------------------------------------------------------------------- From 7f368c6f183f271811ead30427231c53e974dcf2 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 5 Jun 2023 11:24:48 -0500 Subject: [PATCH 21/22] test smaller mem --- src/H5VL_log_file.cpp | 2 +- src/H5VL_log_filei.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/H5VL_log_file.cpp b/src/H5VL_log_file.cpp index 81bfccae..23abc485 100644 --- a/src/H5VL_log_file.cpp +++ b/src/H5VL_log_file.cpp @@ -181,7 +181,7 @@ void *H5VL_log_file_create ( CHECK_ERR DEBUG_PRINT if (fp->rank) { - err = H5Pset_fapl_core (ufaplid, 16 * 1048576, false); + err = H5Pset_fapl_core (ufaplid, ((size_t)2) * ((size_t)1024), false); CHECK_ERR } DEBUG_PRINT diff --git a/src/H5VL_log_filei.cpp b/src/H5VL_log_filei.cpp index a6a5e6be..6e3a7a71 100644 --- a/src/H5VL_log_filei.cpp +++ b/src/H5VL_log_filei.cpp @@ -1036,11 +1036,11 @@ void H5VL_log_filei_calc_node_rank (H5VL_log_file_t *fp) { if (fp->ngroup > 0) { mpierr = MPI_Comm_split (fp->comm, fp->rank * fp->ngroup / fp->np, fp->rank, &(fp->group_comm)); - DEBUG_PRINT + DEBUG_PRINT } else { /* fp->ngroup < 0 */ mpierr = MPI_Comm_split_type (fp->comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(fp->group_comm)); - DEBUG_PRINT + DEBUG_PRINT } CHECK_ERR From 815918f4db51bec9806f8c9033d57b94e0020e44 Mon Sep 17 00:00:00 2001 From: Zanhua Huang Date: Mon, 5 Jun 2023 13:00:45 -0500 Subject: [PATCH 22/22] possible fix 1 --- src/H5VL_log_file.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/H5VL_log_file.cpp b/src/H5VL_log_file.cpp index 23abc485..29dcceaf 100644 --- a/src/H5VL_log_file.cpp +++ b/src/H5VL_log_file.cpp @@ -173,18 +173,19 @@ void *H5VL_log_file_create ( DEBUG_PRINT if (fp->config & H5VL_FILEI_CONFIG_SUBFILING) { - DEBUG_PRINT - ufaplid = H5Pcreate (H5P_FILE_ACCESS); - CHECK_ID (ufaplid) - DEBUG_PRINT - err = H5Pset_vol (ufaplid, uvlid, under_vol_info); - CHECK_ERR - DEBUG_PRINT - if (fp->rank) { - err = H5Pset_fapl_core (ufaplid, ((size_t)2) * ((size_t)1024), false); - CHECK_ERR - } - DEBUG_PRINT + // DEBUG_PRINT + // ufaplid = H5Pcreate (H5P_FILE_ACCESS); + // CHECK_ID (ufaplid) + // DEBUG_PRINT + // err = H5Pset_vol (ufaplid, uvlid, under_vol_info); + // CHECK_ERR + // DEBUG_PRINT + // if (fp->rank) { + // err = H5Pset_fapl_core (ufaplid, ((size_t)2) * ((size_t)1024), false); + // CHECK_ERR + // } + // DEBUG_PRINT + ufaplid = fp->ufaplid; } else { ufaplid = fp->ufaplid; }