Skip to content

Commit

Permalink
REVIEW: fix review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed Jan 3, 2024
1 parent 3c0dad3 commit c2aa99f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/components/tl/nccl/tl_nccl.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ static ucs_config_field_t ucc_tl_nccl_context_config_table[] = {
},

{"BLOCKING", "yes",
"If set to yes will use non-blocking mode communicator behavior, "
"if set to no will use blocking mode",
"If set to no will use non-blocking mode communicator behavior, "
"if set to yes will use blocking mode",
ucs_offsetof(ucc_tl_nccl_context_config_t, nccl_cfg_blocking),
UCS_CONFIG_TYPE_BOOL},

Expand Down
8 changes: 5 additions & 3 deletions src/components/tl/nccl/tl_nccl_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,11 @@ ucc_status_t ucc_tl_nccl_init_task(ucc_base_coll_args_t *coll_args,
return UCC_ERR_NOT_SUPPORTED;
}

status = ucc_tl_nccl_comm_init(nccl_team);
if (ucc_unlikely(status != UCC_OK)) {
return status;
if (ucc_unlikely(nccl_team->comm_state != TL_NCCL_COMM_STATE_READY)) {
status = ucc_tl_nccl_comm_init(nccl_team);
if (ucc_unlikely(status != UCC_OK)) {
return status;
}
}

task = ucc_mpool_get(&nccl_ctx->req_mp);
Expand Down
7 changes: 4 additions & 3 deletions src/components/tl/nccl/tl_nccl_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ ucc_status_t ucc_tl_nccl_comm_init(ucc_tl_nccl_team_t *team)
ucc_rank_t trank = UCC_TL_TEAM_RANK(team);
ucc_status_t status;
ncclResult_t nccl_status;
#if NCCL_USE_NON_BLOCKING
ncclConfig_t nccl_cfg = NCCL_CONFIG_INITIALIZER;
ncclResult_t async_status;
#endif

if (team->comm_state == TL_NCCL_COMM_STATE_READY) {
return UCC_OK;
Expand All @@ -138,9 +142,6 @@ ucc_status_t ucc_tl_nccl_comm_init(ucc_tl_nccl_team_t *team)
cudaStreamNonBlocking),
exit_err, status);
#if NCCL_USE_NON_BLOCKING
ncclConfig_t nccl_cfg = NCCL_CONFIG_INITIALIZER;
ncclResult_t async_status;

/*
* if NCCL comm initialized during first call to collective init a.k.a lazy init
* we need to use blocking init to correctly fallback to other TL in case of error
Expand Down

0 comments on commit c2aa99f

Please sign in to comment.