Skip to content

Commit

Permalink
CORE: improve logging
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed Mar 5, 2023
1 parent d816307 commit c7c4375
Show file tree
Hide file tree
Showing 54 changed files with 458 additions and 247 deletions.
10 changes: 5 additions & 5 deletions src/components/cl/basic/cl_basic_context.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -35,8 +35,8 @@ UCC_CLASS_INIT_FUNC(ucc_cl_basic_context_t,
status = ucc_tl_context_get(params->context, tls->names[i],
&self->super.tl_ctxs[self->super.n_tl_ctxs]);
if (UCC_OK != status) {
cl_info(cl_config->cl_lib,
"TL %s context is not available, skipping", tls->names[i]);
cl_debug(cl_config->cl_lib,
"TL %s context is not available, skipping", tls->names[i]);
} else {
self->super.n_tl_ctxs++;
}
Expand All @@ -47,14 +47,14 @@ UCC_CLASS_INIT_FUNC(ucc_cl_basic_context_t,
self->super.tl_ctxs = NULL;
return UCC_ERR_NOT_FOUND;
}
cl_info(cl_config->cl_lib, "initialized cl context: %p", self);
cl_debug(cl_config->cl_lib, "initialized cl context: %p", self);
return UCC_OK;
}

UCC_CLASS_CLEANUP_FUNC(ucc_cl_basic_context_t)
{
int i;
cl_info(self->super.super.lib, "finalizing cl context: %p", self);
cl_debug(self->super.super.lib, "finalizing cl context: %p", self);
for (i = 0; i < self->super.n_tl_ctxs; i++) {
ucc_tl_context_put(self->super.tl_ctxs[i]);
}
Expand Down
4 changes: 2 additions & 2 deletions src/components/cl/basic/cl_basic_lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ UCC_CLASS_INIT_FUNC(ucc_cl_basic_lib_t, const ucc_base_lib_params_t *params,
const ucc_cl_lib_config_t *cl_config =
ucc_derived_of(config, ucc_cl_lib_config_t);
UCC_CLASS_CALL_SUPER_INIT(ucc_cl_lib_t, &ucc_cl_basic.super, cl_config);
cl_info(&self->super, "initialized lib object: %p", self);
cl_debug(&self->super, "initialized lib object: %p", self);
return UCC_OK;
}

UCC_CLASS_CLEANUP_FUNC(ucc_cl_basic_lib_t)
{
cl_info(&self->super, "finalizing lib object: %p", self);
cl_debug(&self->super, "finalizing lib object: %p", self);
}

UCC_CLASS_DEFINE(ucc_cl_basic_lib_t, ucc_cl_lib_t);
Expand Down
18 changes: 9 additions & 9 deletions src/components/cl/basic/cl_basic_team.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -49,7 +49,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_basic_team_t, ucc_base_context_t *cl_context,
status);
goto err;
}
cl_info(cl_context->lib, "posted cl team: %p", self);
cl_debug(cl_context->lib, "posted cl team: %p", self);
return UCC_OK;
err:
ucc_free(self->tl_teams);
Expand All @@ -58,7 +58,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_basic_team_t, ucc_base_context_t *cl_context,

UCC_CLASS_CLEANUP_FUNC(ucc_cl_basic_team_t)
{
cl_info(self->super.super.context->lib, "finalizing cl team: %p", self);
cl_debug(self->super.super.context->lib, "finalizing cl team: %p", self);
}

UCC_CLASS_DEFINE_DELETE_FUNC(ucc_cl_basic_team_t, ucc_base_team_t);
Expand Down Expand Up @@ -117,13 +117,13 @@ ucc_status_t ucc_cl_basic_team_create_test(ucc_base_team_t *cl_team)
if (team->team_create_req->descs[i].status == UCC_OK) {
team->tl_teams[team->n_tl_teams++] =
team->team_create_req->descs[i].team;
cl_info(ctx->super.super.lib, "initialized tl %s team",
UCC_TL_CTX_IFACE(team->team_create_req->descs[i].ctx)->
super.name);
cl_debug(ctx->super.super.lib, "initialized tl %s team",
UCC_TL_CTX_IFACE(team->team_create_req->descs[i].ctx)->
super.name);
} else {
cl_info(ctx->super.super.lib, "failed to create tl %s team: (%d)",
UCC_TL_CTX_IFACE(team->team_create_req->descs[i].ctx)->
super.name, team->team_create_req->descs[i].status);
cl_debug(ctx->super.super.lib, "failed to create tl %s team: (%d)",
UCC_TL_CTX_IFACE(team->team_create_req->descs[i].ctx)->
super.name, team->team_create_req->descs[i].status);
}
}
ucc_team_multiple_req_free(team->team_create_req);
Expand Down
4 changes: 2 additions & 2 deletions src/components/cl/hier/allreduce/allreduce_split_rail.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -264,7 +264,7 @@ ucc_cl_hier_split_rail_allreduce_start(ucc_coll_task_t *task)
ucc_schedule_pipelined_t *schedule =
ucc_derived_of(task, ucc_schedule_pipelined_t);

cl_info(task->team->context->lib,
cl_debug(task->team->context->lib,
"posting split_rail ar, sbuf %p, rbuf %p, count %zd, dt %s, op %s, "
"inplace %d, pdepth %d, frags_total %d",
task->bargs.args.src.info.buffer, task->bargs.args.dst.info.buffer,
Expand Down
10 changes: 5 additions & 5 deletions src/components/cl/hier/cl_hier_context.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -39,8 +39,8 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_context_t,
status = ucc_tl_context_get(params->context, tls->names[i],
&self->super.tl_ctxs[self->super.n_tl_ctxs]);
if (UCC_OK != status) {
cl_info(cl_config->cl_lib,
"TL %s context is not available, skipping", tls->names[i]);
cl_debug(cl_config->cl_lib,
"TL %s context is not available, skipping", tls->names[i]);
} else {
self->super.n_tl_ctxs++;
}
Expand All @@ -61,7 +61,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_context_t,
goto out;
}

cl_info(cl_config->cl_lib, "initialized cl context: %p", self);
cl_debug(cl_config->cl_lib, "initialized cl context: %p", self);
return UCC_OK;

out:
Expand All @@ -72,7 +72,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_context_t,
UCC_CLASS_CLEANUP_FUNC(ucc_cl_hier_context_t)
{
int i;
cl_info(self->super.super.lib, "finalizing cl context: %p", self);
cl_debug(self->super.super.lib, "finalizing cl context: %p", self);

ucc_mpool_cleanup(&self->sched_mp, 1);
for (i = 0; i < self->super.n_tl_ctxs; i++) {
Expand Down
4 changes: 2 additions & 2 deletions src/components/cl/hier/cl_hier_lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,15 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_lib_t, const ucc_base_lib_params_t *params,
self->tls.array.count = requested_sbgp_tls.count;
self->tls.array.names = requested_sbgp_tls.names;

cl_info(&self->super, "initialized lib object: %p", self);
cl_debug(&self->super, "initialized lib object: %p", self);
return status;
}

UCC_CLASS_CLEANUP_FUNC(ucc_cl_hier_lib_t)
{
int i;

cl_info(&self->super, "finalizing lib object: %p", self);
cl_debug(&self->super, "finalizing lib object: %p", self);
for (i = 0; i < UCC_HIER_SBGP_LAST; i++) {
ucc_config_names_array_free(&self->cfg.sbgp_tls[i].array);
}
Expand Down
14 changes: 7 additions & 7 deletions src/components/cl/hier/cl_hier_team.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -42,7 +42,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_team_t, ucc_base_context_t *cl_context,
ucc_subset_t subset;
struct ucc_team_team_desc *d;
if (!params->team->topo) {
cl_info(cl_context->lib,
cl_debug(cl_context->lib,
"can't create hier team without topology data");
return UCC_ERR_INVALID_PARAM;
}
Expand Down Expand Up @@ -139,7 +139,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_team_t, ucc_base_context_t *cl_context,
cl_error(cl_context->lib, "failed to post tl team create (%d)", status);
goto err;
}
cl_info(cl_context->lib, "posted cl team: %p", self);
cl_debug(cl_context->lib, "posted cl team: %p", self);
return UCC_OK;
err:
ucc_team_multiple_req_free(self->team_create_req);
Expand All @@ -148,7 +148,7 @@ UCC_CLASS_INIT_FUNC(ucc_cl_hier_team_t, ucc_base_context_t *cl_context,

UCC_CLASS_CLEANUP_FUNC(ucc_cl_hier_team_t)
{
cl_info(self->super.super.context->lib, "finalizing cl team: %p", self);
cl_debug(self->super.super.context->lib, "finalizing cl team: %p", self);
}

UCC_CLASS_DEFINE_DELETE_FUNC(ucc_cl_hier_team_t, ucc_base_team_t);
Expand Down Expand Up @@ -256,9 +256,9 @@ ucc_status_t ucc_cl_hier_team_create_test(ucc_base_team_t *cl_team)
hs->score = score_merge;
}
}
cl_info(ctx->super.super.lib, "initialized tl %s team for sbgp %s",
UCC_TL_CTX_IFACE(d->ctx)->super.name,
ucc_sbgp_str(hs->sbgp_type));
cl_debug(ctx->super.super.lib, "initialized tl %s team for sbgp %s",
UCC_TL_CTX_IFACE(d->ctx)->super.name,
ucc_sbgp_str(hs->sbgp_type));
} else {
cl_debug(ctx->super.super.lib, "failed to create tl %s team",
UCC_TL_CTX_IFACE(d->ctx)->super.name);
Expand Down
8 changes: 4 additions & 4 deletions src/components/ec/cuda/ec_cuda.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -185,7 +185,7 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
ucc_ec_cuda.thread_mode = ec_params->thread_mode;
cuda_st = cudaGetDeviceCount(&num_devices);
if ((cuda_st != cudaSuccess) || (num_devices == 0)) {
ec_info(&ucc_ec_cuda.super, "CUDA devices are not found");
ec_debug(&ucc_ec_cuda.super, "CUDA devices are not found");
return UCC_ERR_NO_RESOURCE;
}
CUDA_CHECK(cudaGetDevice(&device));
Expand Down Expand Up @@ -283,8 +283,8 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)

if (cfg->strm_task_mode == UCC_EC_CUDA_TASK_AUTO) {
if (attr == 0) {
ec_info(&ucc_ec_cuda.super,
"CUDA MEM OPS are not supported or disabled");
ec_debug(&ucc_ec_cuda.super,
"CUDA MEM OPS are not supported or disabled");
ucc_ec_cuda.strm_task_mode = UCC_EC_CUDA_TASK_KERNEL;
}
} else if (attr == 0) {
Expand Down
4 changes: 2 additions & 2 deletions src/components/ec/rocm/ec_rocm.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (C) Advanced Micro Devices, Inc. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -156,7 +156,7 @@ static ucc_status_t ucc_ec_rocm_init(const ucc_ec_params_t *ec_params)
ucc_ec_rocm.thread_mode = ec_params->thread_mode;
rocm_st = hipGetDeviceCount(&num_devices);
if ((rocm_st != hipSuccess) || (num_devices == 0)) {
ec_info(&ucc_ec_rocm.super, "rocm devices are not found");
ec_debug(&ucc_ec_rocm.super, "rocm devices are not found");
return UCC_ERR_NO_RESOURCE;
}
ROCMCHECK(hipGetDevice(&device));
Expand Down
10 changes: 5 additions & 5 deletions src/components/ec/ucc_ec.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -43,15 +43,15 @@ ucc_status_t ucc_ec_init(const ucc_ec_params_t *ec_params)
status = ucc_config_parser_fill_opts(
ec->config, &ec->config_table, "UCC_", 1);
if (UCC_OK != status) {
ucc_info("failed to parse config for EC component: %s (%d)",
ec->super.name, status);
ucc_debug("failed to parse config for EC component: %s (%d)",
ec->super.name, status);
ucc_free(ec->config);
continue;
}
status = ec->init(ec_params);
if (UCC_OK != status) {
ucc_info("ec_init failed for component: %s, skipping (%d)",
ec->super.name, status);
ucc_debug("ec_init failed for component: %s, skipping (%d)",
ec->super.name, status);
ucc_config_parser_release_opts(ec->config,
ec->config_table.table);
ucc_free(ec->config);
Expand Down
4 changes: 2 additions & 2 deletions src/components/mc/cuda/mc_cuda.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -58,7 +58,7 @@ static ucc_status_t ucc_mc_cuda_init(const ucc_mc_params_t *mc_params)
ucc_mc_cuda.thread_mode = mc_params->thread_mode;
cuda_st = cudaGetDeviceCount(&num_devices);
if ((cuda_st != cudaSuccess) || (num_devices == 0)) {
mc_info(&ucc_mc_cuda.super, "cuda devices are not found");
mc_debug(&ucc_mc_cuda.super, "cuda devices are not found");
return UCC_ERR_NO_RESOURCE;
}
CUDADRV_FUNC(cuDriverGetVersion(&driver_ver));
Expand Down
4 changes: 2 additions & 2 deletions src/components/mc/rocm/mc_rocm.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (C) Advanced Micro Devices, Inc. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -40,7 +40,7 @@ static ucc_status_t ucc_mc_rocm_init(const ucc_mc_params_t *mc_params)
ucc_mc_rocm.thread_mode = mc_params->thread_mode;
rocm_st = hipGetDeviceCount(&num_devices);
if ((rocm_st != hipSuccess) || (num_devices == 0)) {
mc_info(&ucc_mc_rocm.super, "rocm devices are not found");
mc_debug(&ucc_mc_rocm.super, "rocm devices are not found");
return hip_error_to_ucc_status(rocm_st);
}

Expand Down
10 changes: 5 additions & 5 deletions src/components/mc/ucc_mc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -50,15 +50,15 @@ ucc_status_t ucc_mc_init(const ucc_mc_params_t *mc_params)
status = ucc_config_parser_fill_opts(
mc->config, &mc->config_table, "UCC_", 1);
if (UCC_OK != status) {
ucc_info("failed to parse config for mc: %s (%d)",
mc->super.name, status);
ucc_debug("failed to parse config for mc: %s (%d)",
mc->super.name, status);
ucc_free(mc->config);
continue;
}
status = mc->init(mc_params);
if (UCC_OK != status) {
ucc_info("mc_init failed for component: %s, skipping (%d)",
mc->super.name, status);
ucc_debug("mc_init failed for component: %s, skipping (%d)",
mc->super.name, status);
ucc_config_parser_release_opts(mc->config,
mc->config_table.table);
ucc_free(mc->config);
Expand Down
18 changes: 9 additions & 9 deletions src/components/tl/cuda/tl_cuda_context.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -29,18 +29,18 @@ UCC_CLASS_INIT_FUNC(ucc_tl_cuda_context_t,

cuda_st = cudaGetDeviceCount(&num_devices);
if (cuda_st != cudaSuccess) {
tl_info(self->super.super.lib, "failed to get number of GPU devices"
"%d %s", cuda_st, cudaGetErrorName(cuda_st));
return UCC_ERR_NO_MESSAGE;
tl_debug(self->super.super.lib, "failed to get number of GPU devices"
"%d %s", cuda_st, cudaGetErrorName(cuda_st));
return UCC_ERR_NO_RESOURCE;
} else if (num_devices == 0) {
tl_info(self->super.super.lib, "no GPU devices found");
tl_debug(self->super.super.lib, "no GPU devices found");
return UCC_ERR_NO_RESOURCE;
}

cu_st = cuCtxGetCurrent(&cu_ctx);
if (cu_ctx == NULL || cu_st != CUDA_SUCCESS) {
tl_info(self->super.super.lib,
"cannot create CUDA TL context without active CUDA context");
tl_debug(self->super.super.lib,
"cannot create CUDA TL context without active CUDA context");
return UCC_ERR_NO_RESOURCE;
}

Expand Down Expand Up @@ -68,7 +68,7 @@ UCC_CLASS_INIT_FUNC(ucc_tl_cuda_context_t,
}

self->ipc_cache = kh_init(tl_cuda_ep_hash);
tl_info(self->super.super.lib, "initialized tl context: %p", self);
tl_debug(self->super.super.lib, "initialized tl context: %p", self);
return UCC_OK;

free_mpool:
Expand All @@ -78,7 +78,7 @@ UCC_CLASS_INIT_FUNC(ucc_tl_cuda_context_t,

UCC_CLASS_CLEANUP_FUNC(ucc_tl_cuda_context_t)
{
tl_info(self->super.super.lib, "finalizing tl context: %p", self);
tl_debug(self->super.super.lib, "finalizing tl context: %p", self);
ucc_tl_cuda_topo_destroy(self->topo);
ucc_mpool_cleanup(&self->req_mp, 1);
}
Expand Down
Loading

0 comments on commit c7c4375

Please sign in to comment.