Skip to content

Commit

Permalink
CORE: add post and finalize to debug log
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed Feb 2, 2023
1 parent d0484cc commit 4c90c34
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 26 deletions.
89 changes: 69 additions & 20 deletions src/core/ucc_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,22 @@ UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_post, (request),
{
ucc_coll_task_t *task = ucc_derived_of(request, ucc_coll_task_t);
ucc_status_t status;
ucc_debug("coll_post: req %p, seq_num %u", task, task->seq_num);

if (ucc_global_config.coll_trace.log_level >= UCC_LOG_LEVEL_DEBUG) {
ucc_rank_t rank = task->team->params.team->rank;
if (ucc_global_config.coll_trace.log_level == UCC_LOG_LEVEL_DEBUG) {
if (rank == 0) {
ucc_log_component_collective_trace(
ucc_global_config.coll_trace.log_level,
"coll post: req %p, seq_num %u", task, task->seq_num);
}
} else {
ucc_log_component_collective_trace(
ucc_global_config.coll_trace.log_level,
"coll post: rank %d req %p, seq_num %u", rank, task,
task->seq_num);
}
}

COLL_POST_STATUS_CHECK(task);
if (UCC_COLL_TIMEOUT_REQUIRED(task)) {
Expand All @@ -312,6 +327,34 @@ UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_post, (request),
return task->post(task);
}

ucc_status_t ucc_collective_triggered_post(ucc_ee_h ee, ucc_ev_t *ev)
{
ucc_coll_task_t *task = ucc_derived_of(ev->req, ucc_coll_task_t);

if (ucc_global_config.coll_trace.log_level >= UCC_LOG_LEVEL_DEBUG) {
ucc_rank_t rank = task->team->params.team->rank;
if (ucc_global_config.coll_trace.log_level == UCC_LOG_LEVEL_DEBUG) {
if (rank == 0) {
ucc_log_component_collective_trace(
ucc_global_config.coll_trace.log_level,
"coll triggered_post: req %p, seq_num %u", task,
task->seq_num);
}
} else {
ucc_log_component_collective_trace(
ucc_global_config.coll_trace.log_level,
"coll triggered_post: rank %d req %p, seq_num %u", rank, task,
task->seq_num);
}
}

COLL_POST_STATUS_CHECK(task);
if (UCC_COLL_TIMEOUT_REQUIRED(task)) {
task->start_time = ucc_get_time();
}
return task->triggered_post(ee, ev, task);
}

UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_init_and_post,
(coll_args, request, team), ucc_coll_args_t *coll_args, //NOLINT
ucc_coll_req_h *request, ucc_team_h team) //NOLINT
Expand All @@ -321,13 +364,10 @@ UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_init_and_post,
return UCC_ERR_NOT_IMPLEMENTED;
}

UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_finalize, (request),
ucc_coll_req_h request)
ucc_status_t ucc_collective_finalize_internal(ucc_coll_task_t *task)
{
ucc_coll_task_t *task = ucc_derived_of(request, ucc_coll_task_t);
ucc_status_t st;

ucc_debug("coll_finalize: req %p, seq_num %u", task, task->seq_num);
if (ucc_unlikely(task->super.status == UCC_INPROGRESS)) {
ucc_error("attempt to finalize task with status UCC_INPROGRESS");
return UCC_ERR_INVALID_PARAM;
Expand All @@ -336,13 +376,35 @@ UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_finalize, (request),
if (task->executor) {
st = ucc_ee_executor_finalize(task->executor);
if (ucc_unlikely(st != UCC_OK)) {
ucc_error("executor finalize error: %s",
ucc_status_string(st));
ucc_error("executor finalize error: %s", ucc_status_string(st));
}
}
return task->finalize(task);
}

UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_finalize, (request),
ucc_coll_req_h request)
{
ucc_coll_task_t *task = ucc_derived_of(request, ucc_coll_task_t);

if (ucc_global_config.coll_trace.log_level >= UCC_LOG_LEVEL_DEBUG) {
ucc_rank_t rank = task->team->params.team->rank;
if (ucc_global_config.coll_trace.log_level == UCC_LOG_LEVEL_DEBUG) {
if (rank == 0) {
ucc_log_component_collective_trace(
ucc_global_config.coll_trace.log_level,
"coll finalize: req %p, seq_num %u", task, task->seq_num);
}
} else {
ucc_log_component_collective_trace(
ucc_global_config.coll_trace.log_level,
"coll finalize: rank %d req %p, seq_num %u", rank, task,
task->seq_num);
}
}
return ucc_collective_finalize_internal(task);
}

static ucc_status_t ucc_triggered_task_finalize(ucc_coll_task_t *task)
{
ucc_trace("finalizing triggered ev task %p", task);
Expand Down Expand Up @@ -492,16 +554,3 @@ ucc_status_t ucc_triggered_post(ucc_ee_h ee, ucc_ev_t *ev,

return ucc_progress_queue_enqueue(UCC_TASK_CORE_CTX(ev_task)->pq, ev_task);
}

ucc_status_t ucc_collective_triggered_post(ucc_ee_h ee, ucc_ev_t *ev)
{
ucc_coll_task_t *task = ucc_derived_of(ev->req, ucc_coll_task_t);

ucc_debug("triggered_post: task %p, seq_num %u", task, task->seq_num);

COLL_POST_STATUS_CHECK(task);
if (UCC_COLL_TIMEOUT_REQUIRED(task)) {
task->start_time = ucc_get_time();
}
return task->triggered_post(ee, ev, task);
}
4 changes: 3 additions & 1 deletion src/core/ucc_service_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,13 @@ ucc_status_t ucc_service_coll_test(ucc_service_coll_req_t *req)
return status;
}

ucc_status_t ucc_collective_finalize_internal(ucc_coll_task_t *task);

ucc_status_t ucc_service_coll_finalize(ucc_service_coll_req_t *req)
{
ucc_status_t status;

status = ucc_collective_finalize(&req->task->super);
status = ucc_collective_finalize_internal(req->task);
ucc_free(req);
return status;
}
Expand Down
18 changes: 13 additions & 5 deletions src/utils/ucc_coll_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ void ucc_coll_str(const ucc_coll_task_t *task, char *str, size_t len,
int verbosity)
{
ucc_team_t *team = task->bargs.team;
int rc;

if (verbosity >= UCC_LOG_LEVEL_DIAG) {
ucc_coll_args_str(&task->bargs.args, team->rank, team->size, str, len);
Expand All @@ -550,17 +551,24 @@ void ucc_coll_str(const ucc_coll_task_t *task, char *str, size_t len,

if (task->team->context->lib->log_component.name[0] == 'C') {
/* it's not CL BASIC task */
strncpy(cl_info, task->team->context->lib->log_component.name, 16);
strncpy(cl_info, task->team->context->lib->log_component.name,
sizeof(cl_info));
ucc_coll_task_components_str(task, tl_info, &tl_info_len);
} else {
strncpy(cl_info, "CL_BASIC", 16);
strncpy(tl_info , task->team->context->lib->log_component.name, 16);
strncpy(cl_info, "CL_BASIC", sizeof(cl_info));
strncpy(tl_info , task->team->context->lib->log_component.name,
sizeof(tl_info));
}
ucc_coll_args_str(&task->bargs.args, team->rank, team->size, str, len);
ucc_snprintf_safe(task_info, sizeof(task_info), "; %s {%s}, team_id %d",
cl_info, tl_info, team->id);
rc = ucc_snprintf_safe(task_info, sizeof(task_info),
"; %s {%s}, team_id %d",
cl_info, tl_info, team->id);
if (rc < 0) {
return;
}
strncat(str, task_info, len - strlen(str));
}

if (verbosity >= UCC_LOG_LEVEL_DEBUG) {
char task_info[64];
ucc_snprintf_safe(task_info, sizeof(task_info),
Expand Down

0 comments on commit 4c90c34

Please sign in to comment.