Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TEST: improve test output #890

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 1 addition & 24 deletions src/utils/ucc_coll_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,29 +123,6 @@ ucc_coll_args_get_displacement(const ucc_coll_args_t *args,
return ((uint32_t *)displacements)[idx];
}

static inline const char* ucc_mem_type_str(ucc_memory_type_t ct)
{
switch((int)ct) {
case UCC_MEMORY_TYPE_HOST:
return "Host";
case UCC_MEMORY_TYPE_CUDA:
return "Cuda";
case UCC_MEMORY_TYPE_CUDA_MANAGED:
return "CudaManaged";
case UCC_MEMORY_TYPE_ROCM:
return "Rocm";
case UCC_MEMORY_TYPE_ROCM_MANAGED:
return "RocmManaged";
case UCC_MEMORY_TYPE_ASYMMETRIC:
return "asymmetric";
case UCC_MEMORY_TYPE_NOT_APPLY:
return "n/a";
default:
break;
}
return "invalid";
}

static inline size_t
ucc_coll_args_get_total_count(const ucc_coll_args_t *args,
const ucc_count_t *counts, ucc_rank_t size)
Expand Down Expand Up @@ -248,7 +225,7 @@ ucc_status_t ucc_ep_map_create_nested(ucc_ep_map_t *base_map,
ucc_ep_map_t *sub_map,
ucc_ep_map_t *out);

ucc_status_t ucc_ep_map_is_identity(const ucc_ep_map_t *map);
int ucc_ep_map_is_identity(const ucc_ep_map_t *map);

void ucc_ep_map_destroy_nested(ucc_ep_map_t *out);

Expand Down
23 changes: 23 additions & 0 deletions src/utils/ucc_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,27 @@ static inline const char* ucc_reduction_op_str(ucc_reduction_op_t op)
}
}

static inline const char* ucc_mem_type_str(ucc_memory_type_t ct)
{
switch((int)ct) {
case UCC_MEMORY_TYPE_HOST:
return "Host";
case UCC_MEMORY_TYPE_CUDA:
return "Cuda";
case UCC_MEMORY_TYPE_CUDA_MANAGED:
return "CudaManaged";
case UCC_MEMORY_TYPE_ROCM:
return "Rocm";
case UCC_MEMORY_TYPE_ROCM_MANAGED:
return "RocmManaged";
case UCC_MEMORY_TYPE_ASYMMETRIC:
return "asymmetric";
case UCC_MEMORY_TYPE_NOT_APPLY:
return "n/a";
default:
break;
}
return "invalid";
}

#endif
129 changes: 110 additions & 19 deletions test/mpi/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <sstream>
#include <algorithm>
#include <chrono>
#include <iomanip>
#include "test_mpi.h"

int test_rand_seed = -1;
Expand Down Expand Up @@ -135,6 +136,23 @@ static ucc_test_mpi_team_t team_str_to_type(std::string team)
throw std::string("incorrect team type: ") + team;
}

static std::string team_type_to_str(ucc_test_mpi_team_t team)
{
switch (team) {
case TEAM_WORLD:
return "world";
case TEAM_SPLIT_HALF:
return "half";
case TEAM_SPLIT_ODD_EVEN:
return "odd_even";
case TEAM_REVERSE:
return "reverse";
default:
break;
}
throw std::string("incorrect team type: ");
}

static ucc_coll_type_t coll_str_to_type(std::string coll)
{
if (coll == "barrier") {
Expand Down Expand Up @@ -395,15 +413,52 @@ int init_rand_seed(int user_seed)
void print_info()
{
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
if (world_rank) {
return;
}
std::cout << "\n===== UCC MPI TEST INFO =======\n"
<< " seed : " << std::to_string(test_rand_seed) << "\n"
<< "===============================\n"
<< std::endl;

std::cout << "===== UCC MPI TEST INFO =======" << std::endl;
std::cout <<"seed: " << std::to_string(test_rand_seed) << std::endl;
std::cout <<"collectives: ";
for (const auto &c : colls) {
std::cout << ucc_coll_type_str(c);
if (c != colls.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}
std::cout <<"data types: ";
for (const auto &d : dtypes) {
std::cout << ucc_datatype_str(d);
if (d != dtypes.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}

std::cout <<"memory types: ";
for (const auto &m : mtypes) {
std::cout << ucc_mem_type_str(m);
if (m != mtypes.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}

std::cout <<"teams: ";
for (const auto &t : teams) {
std::cout << team_type_to_str(t);
if (t != teams.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}
}

void ProcessArgs(int argc, char** argv)
Expand Down Expand Up @@ -519,8 +574,8 @@ void ProcessArgs(int argc, char** argv)

int main(int argc, char *argv[])
{
int failed = 0;
int total_done_skipped_failed[4] = {0};
int failed = 0;
int total_done_skipped_failed[ucc_ilog2(UCC_COLL_TYPE_LAST) + 1][4] = {0};
std::chrono::steady_clock::time_point begin;
int size, required, provided, completed, rank;
UccTestMpi *test;
Expand Down Expand Up @@ -623,19 +678,20 @@ int main(int argc, char *argv[])
}
std::cout << std::flush;

total_done_skipped_failed[0] = test->results.size();
for (auto s : test->results) {
switch(s) {
int coll_num = ucc_ilog2(std::get<0>(s));
switch(std::get<1>(s)) {
case UCC_OK:
total_done_skipped_failed[1]++;
total_done_skipped_failed[coll_num][1]++;
break;
case UCC_ERR_NOT_IMPLEMENTED:
case UCC_ERR_LAST:
total_done_skipped_failed[2]++;
total_done_skipped_failed[coll_num][2]++;
break;
default:
total_done_skipped_failed[3]++;
total_done_skipped_failed[coll_num][3]++;
}
total_done_skipped_failed[coll_num][0]++;
}
MPI_Iallreduce(MPI_IN_PLACE, total_done_skipped_failed,
sizeof(total_done_skipped_failed)/sizeof(int),
Expand All @@ -648,23 +704,58 @@ int main(int argc, char *argv[])
if (0 == rank) {
std::chrono::steady_clock::time_point end =
std::chrono::steady_clock::now();
ucc_coll_type_t coll_type;
int num_all = 0, num_skipped = 0, num_done =0, num_failed = 0;
std::ios iostate(nullptr);

iostate.copyfmt(std::cout);
std::cout << "\n===== UCC MPI TEST REPORT =====\n" <<
" total tests : " << total_done_skipped_failed[0] << "\n" <<
" passed : " << total_done_skipped_failed[1] << "\n" <<
" skipped : " << total_done_skipped_failed[2] << "\n" <<
" failed : " << total_done_skipped_failed[3] << "\n" <<
" elapsed : " <<
std::setw(22) << std::left << "collective" <<
std::setw(10) << std::right << "tests" <<
std::setw(10) << std::right << "passed" <<
std::setw(10) << std::right << "failed" <<
std::setw(10) << std::right << "skipped" << std::endl;

for (coll_type = (ucc_coll_type_t)1;
coll_type < UCC_COLL_TYPE_LAST;
coll_type = (ucc_coll_type_t)(coll_type << 1))
{
int coll_num = ucc_ilog2(coll_type);
if (total_done_skipped_failed[coll_num][0] == 0) {
continue;
}
num_all += total_done_skipped_failed[coll_num][0];
num_done += total_done_skipped_failed[coll_num][1];
num_skipped += total_done_skipped_failed[coll_num][2];
num_failed += total_done_skipped_failed[coll_num][3];
std::cout <<
std::setw(22) << std::left << ucc_coll_type_str(coll_type) <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][0] <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][1] <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][3] <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][2] <<
std::endl;

}
std::cout <<
" \n===== UCC MPI TEST SUMMARY =====\n" <<
"total tests: " << num_all << "\n" <<
"passed: " << num_done << "\n" <<
"skipped: " << num_skipped << "\n" <<
"failed: " << num_failed << "\n" <<
"elapsed: " <<
std::chrono::duration_cast<std::chrono::seconds>(end - begin).count()
<< "s" << std::endl;
std::cout.copyfmt(iostate);

/* check if all tests have been skipped */
if (total_done_skipped_failed[0] == total_done_skipped_failed[2]) {
if (num_all == num_skipped) {
std::cout << "\n All tests have been skipped, indicating most likely "
"a problem\n";
failed = 1;
}

if (total_done_skipped_failed[3] != 0) {
if (num_failed != 0) {
failed = 1;
}
}
Expand Down
20 changes: 10 additions & 10 deletions test/mpi/test_mpi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ void set_gpu_device(test_set_gpu_device_t set_device)

#endif

std::vector<ucc_status_t> UccTestMpi::exec_tests(
std::vector<ucc_test_mpi_result_t> UccTestMpi::exec_tests(
std::vector<std::shared_ptr<TestCase>> tcs, bool triggered,
bool persistent)
{
Expand All @@ -483,7 +483,7 @@ std::vector<ucc_status_t> UccTestMpi::exec_tests(
ucc_status_t status;

MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
std::vector<ucc_status_t> rst;
std::vector<ucc_test_mpi_result_t> rst;

for (i = 0; i < n_persistent; i++) {
for (auto tc: tcs) {
Expand All @@ -501,7 +501,7 @@ std::vector<ucc_status_t> UccTestMpi::exec_tests(
std::cout << "SKIPPED: " << skip_str(tc->test_skip) << ": "
<< tc->str() << " " << std::endl;
}
rst.push_back(UCC_ERR_LAST);
rst.push_back(std::make_tuple(tc->args.coll_type, UCC_ERR_LAST));
return rst;
}
}
Expand All @@ -528,14 +528,14 @@ std::vector<ucc_status_t> UccTestMpi::exec_tests(
if (UCC_OK != status) {
std::cerr << "FAILURE in: " << tc->str() << std::endl;
}
rst.push_back(status);
rst.push_back(std::make_tuple(tc->args.coll_type, status));
}
}
return rst;
}

void UccTestMpi::run_all_at_team(ucc_test_team_t &team,
std::vector<ucc_status_t> &rst)
std::vector<ucc_test_mpi_result_t> &rst)
{
TestCaseParams params;

Expand Down Expand Up @@ -586,7 +586,7 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team,
for (auto r : roots) {
for (auto mt: test_memtypes) {
if (triggered && !ucc_coll_triggered_supported(mt)) {
rst.push_back(UCC_ERR_NOT_IMPLEMENTED);
rst.push_back(std::make_tuple(c, UCC_ERR_NOT_IMPLEMENTED));
continue;
}

Expand Down Expand Up @@ -642,10 +642,10 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team,
}

typedef struct ucc_test_thread {
pthread_t thread;
int id;
UccTestMpi * test;
std::vector<ucc_status_t> rst;
pthread_t thread;
int id;
UccTestMpi * test;
std::vector<ucc_test_mpi_result_t> rst;
} ucc_test_thread_t;

static void *thread_start(void *arg)
Expand Down
10 changes: 6 additions & 4 deletions test/mpi/test_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ class TestCase {
size_t msgsize;
bool inplace;
bool persistent;
ucc_coll_args_t args;
ucc_coll_req_h req;
ucc_mc_buffer_header_t *sbuf_mc_header, *rbuf_mc_header;
void *sbuf;
Expand All @@ -279,6 +278,7 @@ class TestCase {
ucc_datatype_t dt;
int iter_persistent;
public:
ucc_coll_args_t args;
void mpi_progress(void);
test_skip_cause_t test_skip;
static std::shared_ptr<TestCase> init_single(
Expand All @@ -304,6 +304,7 @@ class TestCase {
MPI_Comm comm);
};

typedef std::tuple<ucc_coll_type_t, ucc_status_t> ucc_test_mpi_result_t;
class UccTestMpi {
ucc_thread_mode_t tm;
ucc_context_h ctx;
Expand Down Expand Up @@ -331,14 +332,15 @@ class UccTestMpi {
std::vector<int> gen_roots(ucc_test_team_t &team);
std::vector<ucc_test_vsize_flag_t> counts_vsize;
std::vector<ucc_test_vsize_flag_t> displs_vsize;
std::vector<ucc_status_t> exec_tests(
std::vector<ucc_test_mpi_result_t> exec_tests(
std::vector<std::shared_ptr<TestCase>> tcs,
bool triggered, bool persistent);
public:
std::vector<ucc_test_team_t> teams;
std::vector<ucc_test_team_t> onesided_teams;
void run_all_at_team(ucc_test_team_t &team, std::vector<ucc_status_t> &rst);
std::vector<ucc_status_t> results;
void run_all_at_team(ucc_test_team_t &team,
std::vector<ucc_test_mpi_result_t> &rst);
std::vector<ucc_test_mpi_result_t> results;
UccTestMpi(int argc, char *argv[], ucc_thread_mode_t tm, int is_local,
bool with_onesided);
~UccTestMpi();
Expand Down
Loading