From 7e22dde94fa3b0cd235243df57158187296348c7 Mon Sep 17 00:00:00 2001 From: Sergey Lebedev Date: Tue, 26 Dec 2023 16:39:55 +0100 Subject: [PATCH] TEST: improve test output (#890) --- src/utils/ucc_coll_utils.h | 25 +------ src/utils/ucc_log.h | 23 +++++++ test/mpi/main.cc | 129 +++++++++++++++++++++++++++++++------ test/mpi/test_mpi.cc | 20 +++--- test/mpi/test_mpi.h | 10 +-- 5 files changed, 150 insertions(+), 57 deletions(-) diff --git a/src/utils/ucc_coll_utils.h b/src/utils/ucc_coll_utils.h index ad7939837e..c5cb2ef392 100644 --- a/src/utils/ucc_coll_utils.h +++ b/src/utils/ucc_coll_utils.h @@ -123,29 +123,6 @@ ucc_coll_args_get_displacement(const ucc_coll_args_t *args, return ((uint32_t *)displacements)[idx]; } -static inline const char* ucc_mem_type_str(ucc_memory_type_t ct) -{ - switch((int)ct) { - case UCC_MEMORY_TYPE_HOST: - return "Host"; - case UCC_MEMORY_TYPE_CUDA: - return "Cuda"; - case UCC_MEMORY_TYPE_CUDA_MANAGED: - return "CudaManaged"; - case UCC_MEMORY_TYPE_ROCM: - return "Rocm"; - case UCC_MEMORY_TYPE_ROCM_MANAGED: - return "RocmManaged"; - case UCC_MEMORY_TYPE_ASYMMETRIC: - return "asymmetric"; - case UCC_MEMORY_TYPE_NOT_APPLY: - return "n/a"; - default: - break; - } - return "invalid"; -} - static inline size_t ucc_coll_args_get_total_count(const ucc_coll_args_t *args, const ucc_count_t *counts, ucc_rank_t size) @@ -248,7 +225,7 @@ ucc_status_t ucc_ep_map_create_nested(ucc_ep_map_t *base_map, ucc_ep_map_t *sub_map, ucc_ep_map_t *out); -ucc_status_t ucc_ep_map_is_identity(const ucc_ep_map_t *map); +int ucc_ep_map_is_identity(const ucc_ep_map_t *map); void ucc_ep_map_destroy_nested(ucc_ep_map_t *out); diff --git a/src/utils/ucc_log.h b/src/utils/ucc_log.h index 21ad88dd05..b480ee55ae 100644 --- a/src/utils/ucc_log.h +++ b/src/utils/ucc_log.h @@ -187,4 +187,27 @@ static inline const char* ucc_reduction_op_str(ucc_reduction_op_t op) } } +static inline const char* ucc_mem_type_str(ucc_memory_type_t ct) +{ + switch((int)ct) { + case UCC_MEMORY_TYPE_HOST: + return "Host"; + case UCC_MEMORY_TYPE_CUDA: + return "Cuda"; + case UCC_MEMORY_TYPE_CUDA_MANAGED: + return "CudaManaged"; + case UCC_MEMORY_TYPE_ROCM: + return "Rocm"; + case UCC_MEMORY_TYPE_ROCM_MANAGED: + return "RocmManaged"; + case UCC_MEMORY_TYPE_ASYMMETRIC: + return "asymmetric"; + case UCC_MEMORY_TYPE_NOT_APPLY: + return "n/a"; + default: + break; + } + return "invalid"; +} + #endif diff --git a/test/mpi/main.cc b/test/mpi/main.cc index 074c2fcad7..f4a571fa14 100644 --- a/test/mpi/main.cc +++ b/test/mpi/main.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include "test_mpi.h" int test_rand_seed = -1; @@ -135,6 +136,23 @@ static ucc_test_mpi_team_t team_str_to_type(std::string team) throw std::string("incorrect team type: ") + team; } +static std::string team_type_to_str(ucc_test_mpi_team_t team) +{ + switch (team) { + case TEAM_WORLD: + return "world"; + case TEAM_SPLIT_HALF: + return "half"; + case TEAM_SPLIT_ODD_EVEN: + return "odd_even"; + case TEAM_REVERSE: + return "reverse"; + default: + break; + } + throw std::string("incorrect team type: "); +} + static ucc_coll_type_t coll_str_to_type(std::string coll) { if (coll == "barrier") { @@ -395,15 +413,52 @@ int init_rand_seed(int user_seed) void print_info() { int world_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); if (world_rank) { return; } - std::cout << "\n===== UCC MPI TEST INFO =======\n" - << " seed : " << std::to_string(test_rand_seed) << "\n" - << "===============================\n" - << std::endl; + + std::cout << "===== UCC MPI TEST INFO =======" << std::endl; + std::cout <<"seed: " << std::to_string(test_rand_seed) << std::endl; + std::cout <<"collectives: "; + for (const auto &c : colls) { + std::cout << ucc_coll_type_str(c); + if (c != colls.back()) { + std::cout << ", "; + } else { + std::cout << std::endl; + } + } + std::cout <<"data types: "; + for (const auto &d : dtypes) { + std::cout << ucc_datatype_str(d); + if (d != dtypes.back()) { + std::cout << ", "; + } else { + std::cout << std::endl; + } + } + + std::cout <<"memory types: "; + for (const auto &m : mtypes) { + std::cout << ucc_mem_type_str(m); + if (m != mtypes.back()) { + std::cout << ", "; + } else { + std::cout << std::endl; + } + } + + std::cout <<"teams: "; + for (const auto &t : teams) { + std::cout << team_type_to_str(t); + if (t != teams.back()) { + std::cout << ", "; + } else { + std::cout << std::endl; + } + } } void ProcessArgs(int argc, char** argv) @@ -519,8 +574,8 @@ void ProcessArgs(int argc, char** argv) int main(int argc, char *argv[]) { - int failed = 0; - int total_done_skipped_failed[4] = {0}; + int failed = 0; + int total_done_skipped_failed[ucc_ilog2(UCC_COLL_TYPE_LAST) + 1][4] = {0}; std::chrono::steady_clock::time_point begin; int size, required, provided, completed, rank; UccTestMpi *test; @@ -623,19 +678,20 @@ int main(int argc, char *argv[]) } std::cout << std::flush; - total_done_skipped_failed[0] = test->results.size(); for (auto s : test->results) { - switch(s) { + int coll_num = ucc_ilog2(std::get<0>(s)); + switch(std::get<1>(s)) { case UCC_OK: - total_done_skipped_failed[1]++; + total_done_skipped_failed[coll_num][1]++; break; case UCC_ERR_NOT_IMPLEMENTED: case UCC_ERR_LAST: - total_done_skipped_failed[2]++; + total_done_skipped_failed[coll_num][2]++; break; default: - total_done_skipped_failed[3]++; + total_done_skipped_failed[coll_num][3]++; } + total_done_skipped_failed[coll_num][0]++; } MPI_Iallreduce(MPI_IN_PLACE, total_done_skipped_failed, sizeof(total_done_skipped_failed)/sizeof(int), @@ -648,23 +704,58 @@ int main(int argc, char *argv[]) if (0 == rank) { std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + ucc_coll_type_t coll_type; + int num_all = 0, num_skipped = 0, num_done =0, num_failed = 0; + std::ios iostate(nullptr); + + iostate.copyfmt(std::cout); std::cout << "\n===== UCC MPI TEST REPORT =====\n" << - " total tests : " << total_done_skipped_failed[0] << "\n" << - " passed : " << total_done_skipped_failed[1] << "\n" << - " skipped : " << total_done_skipped_failed[2] << "\n" << - " failed : " << total_done_skipped_failed[3] << "\n" << - " elapsed : " << + std::setw(22) << std::left << "collective" << + std::setw(10) << std::right << "tests" << + std::setw(10) << std::right << "passed" << + std::setw(10) << std::right << "failed" << + std::setw(10) << std::right << "skipped" << std::endl; + + for (coll_type = (ucc_coll_type_t)1; + coll_type < UCC_COLL_TYPE_LAST; + coll_type = (ucc_coll_type_t)(coll_type << 1)) + { + int coll_num = ucc_ilog2(coll_type); + if (total_done_skipped_failed[coll_num][0] == 0) { + continue; + } + num_all += total_done_skipped_failed[coll_num][0]; + num_done += total_done_skipped_failed[coll_num][1]; + num_skipped += total_done_skipped_failed[coll_num][2]; + num_failed += total_done_skipped_failed[coll_num][3]; + std::cout << + std::setw(22) << std::left << ucc_coll_type_str(coll_type) << + std::setw(10) << std::right << total_done_skipped_failed[coll_num][0] << + std::setw(10) << std::right << total_done_skipped_failed[coll_num][1] << + std::setw(10) << std::right << total_done_skipped_failed[coll_num][3] << + std::setw(10) << std::right << total_done_skipped_failed[coll_num][2] << + std::endl; + + } + std::cout << + " \n===== UCC MPI TEST SUMMARY =====\n" << + "total tests: " << num_all << "\n" << + "passed: " << num_done << "\n" << + "skipped: " << num_skipped << "\n" << + "failed: " << num_failed << "\n" << + "elapsed: " << std::chrono::duration_cast(end - begin).count() << "s" << std::endl; + std::cout.copyfmt(iostate); /* check if all tests have been skipped */ - if (total_done_skipped_failed[0] == total_done_skipped_failed[2]) { + if (num_all == num_skipped) { std::cout << "\n All tests have been skipped, indicating most likely " "a problem\n"; failed = 1; } - if (total_done_skipped_failed[3] != 0) { + if (num_failed != 0) { failed = 1; } } diff --git a/test/mpi/test_mpi.cc b/test/mpi/test_mpi.cc index 870dd71617..147ce1fd7d 100644 --- a/test/mpi/test_mpi.cc +++ b/test/mpi/test_mpi.cc @@ -474,7 +474,7 @@ void set_gpu_device(test_set_gpu_device_t set_device) #endif -std::vector UccTestMpi::exec_tests( +std::vector UccTestMpi::exec_tests( std::vector> tcs, bool triggered, bool persistent) { @@ -483,7 +483,7 @@ std::vector UccTestMpi::exec_tests( ucc_status_t status; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - std::vector rst; + std::vector rst; for (i = 0; i < n_persistent; i++) { for (auto tc: tcs) { @@ -501,7 +501,7 @@ std::vector UccTestMpi::exec_tests( std::cout << "SKIPPED: " << skip_str(tc->test_skip) << ": " << tc->str() << " " << std::endl; } - rst.push_back(UCC_ERR_LAST); + rst.push_back(std::make_tuple(tc->args.coll_type, UCC_ERR_LAST)); return rst; } } @@ -528,14 +528,14 @@ std::vector UccTestMpi::exec_tests( if (UCC_OK != status) { std::cerr << "FAILURE in: " << tc->str() << std::endl; } - rst.push_back(status); + rst.push_back(std::make_tuple(tc->args.coll_type, status)); } } return rst; } void UccTestMpi::run_all_at_team(ucc_test_team_t &team, - std::vector &rst) + std::vector &rst) { TestCaseParams params; @@ -586,7 +586,7 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, for (auto r : roots) { for (auto mt: test_memtypes) { if (triggered && !ucc_coll_triggered_supported(mt)) { - rst.push_back(UCC_ERR_NOT_IMPLEMENTED); + rst.push_back(std::make_tuple(c, UCC_ERR_NOT_IMPLEMENTED)); continue; } @@ -642,10 +642,10 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, } typedef struct ucc_test_thread { - pthread_t thread; - int id; - UccTestMpi * test; - std::vector rst; + pthread_t thread; + int id; + UccTestMpi * test; + std::vector rst; } ucc_test_thread_t; static void *thread_start(void *arg) diff --git a/test/mpi/test_mpi.h b/test/mpi/test_mpi.h index 32f81f5e55..391cb21996 100644 --- a/test/mpi/test_mpi.h +++ b/test/mpi/test_mpi.h @@ -267,7 +267,6 @@ class TestCase { size_t msgsize; bool inplace; bool persistent; - ucc_coll_args_t args; ucc_coll_req_h req; ucc_mc_buffer_header_t *sbuf_mc_header, *rbuf_mc_header; void *sbuf; @@ -279,6 +278,7 @@ class TestCase { ucc_datatype_t dt; int iter_persistent; public: + ucc_coll_args_t args; void mpi_progress(void); test_skip_cause_t test_skip; static std::shared_ptr init_single( @@ -304,6 +304,7 @@ class TestCase { MPI_Comm comm); }; +typedef std::tuple ucc_test_mpi_result_t; class UccTestMpi { ucc_thread_mode_t tm; ucc_context_h ctx; @@ -331,14 +332,15 @@ class UccTestMpi { std::vector gen_roots(ucc_test_team_t &team); std::vector counts_vsize; std::vector displs_vsize; - std::vector exec_tests( + std::vector exec_tests( std::vector> tcs, bool triggered, bool persistent); public: std::vector teams; std::vector onesided_teams; - void run_all_at_team(ucc_test_team_t &team, std::vector &rst); - std::vector results; + void run_all_at_team(ucc_test_team_t &team, + std::vector &rst); + std::vector results; UccTestMpi(int argc, char *argv[], ucc_thread_mode_t tm, int is_local, bool with_onesided); ~UccTestMpi();