diff --git a/src/components/mc/cuda/mc_cuda.c b/src/components/mc/cuda/mc_cuda.c index 97f4ef2276..27590c5c1b 100644 --- a/src/components/mc/cuda/mc_cuda.c +++ b/src/components/mc/cuda/mc_cuda.c @@ -226,7 +226,7 @@ static ucc_status_t ucc_mc_cuda_mem_free(ucc_mc_buffer_header_t *h_ptr) mc_error(&ucc_mc_cuda.super, "failed to free mem at %p, " "cuda error %d(%s)", - h_ptr->addr, st, cudaGetErrorString(st)); + h_ptr, st, cudaGetErrorString(st)); return UCC_ERR_NO_MESSAGE; } ucc_free(h_ptr); diff --git a/src/components/tl/cuda/allgatherv/allgatherv_linear.c b/src/components/tl/cuda/allgatherv/allgatherv_linear.c index 9d2814d4f4..0fca5c6af6 100644 --- a/src/components/tl/cuda/allgatherv/allgatherv_linear.c +++ b/src/components/tl/cuda/allgatherv/allgatherv_linear.c @@ -131,7 +131,7 @@ static inline ucc_status_t ecopy(void *dst, void *src, size_t size, ucc_ee_executor_t * exec, ucc_ee_executor_task_t **etask) { - ucc_ee_executor_task_args_t exec_args; + ucc_ee_executor_task_args_t exec_args = {0}; exec_args.task_type = UCC_EE_EXECUTOR_TASK_COPY; exec_args.copy.dst = dst; diff --git a/src/components/tl/cuda/alltoallv/alltoallv_ce.c b/src/components/tl/cuda/alltoallv/alltoallv_ce.c index e701913dbb..62ca511bb6 100644 --- a/src/components/tl/cuda/alltoallv/alltoallv_ce.c +++ b/src/components/tl/cuda/alltoallv/alltoallv_ce.c @@ -146,9 +146,10 @@ ucc_status_t ucc_tl_cuda_alltoallv_setup_test(ucc_tl_cuda_task_t *task) ucc_status_t ucc_tl_cuda_alltoallv_ce_post_copies(ucc_tl_cuda_task_t *task) { - ucc_tl_cuda_team_t *team = TASK_TEAM(task); - ucc_rank_t rank = UCC_TL_TEAM_RANK(team); - ucc_tl_cuda_sync_t *sync = TASK_SYNC(task, rank); + ucc_tl_cuda_team_t *team = TASK_TEAM(task); + ucc_rank_t rank = UCC_TL_TEAM_RANK(team); + ucc_tl_cuda_sync_t *sync = TASK_SYNC(task, rank); + ucc_ee_executor_task_args_t exec_args = {0}; ucc_tl_cuda_sync_t *peer_sync; ucc_ee_executor_t *exec; void *src, *dst; @@ -156,7 +157,6 @@ ucc_status_t ucc_tl_cuda_alltoallv_ce_post_copies(ucc_tl_cuda_task_t *task) size_t data_size, data_displ; ucc_rank_t i, peer, psrc, pdst; ucc_status_t status; - ucc_ee_executor_task_args_t exec_args; task->alltoallv_ce.num_posted = 0; status = ucc_coll_task_get_executor(&task->super, &exec); diff --git a/src/components/tl/cuda/tl_cuda_cache.c b/src/components/tl/cuda/tl_cuda_cache.c index 977bf90442..6270210f05 100644 --- a/src/components/tl/cuda/tl_cuda_cache.c +++ b/src/components/tl/cuda/tl_cuda_cache.c @@ -280,6 +280,7 @@ ucc_tl_cuda_map_memhandle(const void *d_ptr, size_t size, err: pthread_rwlock_unlock(&cache->lock); + // coverity[leaked_storage:FALSE] return status; diff --git a/src/components/tl/mlx5/alltoall/alltoall.c b/src/components/tl/mlx5/alltoall/alltoall.c index f875b4dc52..2550445dfe 100644 --- a/src/components/tl/mlx5/alltoall/alltoall.c +++ b/src/components/tl/mlx5/alltoall/alltoall.c @@ -523,6 +523,7 @@ ucc_tl_mlx5_team_alltoall_init_progress(ucc_tl_mlx5_team_t *tl_team) tl_error(UCC_TL_TEAM_LIB(tl_team), "failure during service coll exchange: %s", ucc_status_string(status)); + ucc_service_coll_finalize(tl_team->scoll_req); goto err_service_allgather_progress; } if (UCC_INPROGRESS == status) { @@ -652,7 +653,6 @@ ucc_tl_mlx5_team_alltoall_init_progress(ucc_tl_mlx5_team_t *tl_team) ucc_free(a2a->net.rank_map); err_rank_map: err_service_allgather_progress: - ucc_service_coll_finalize(tl_team->scoll_req); err_service_allgather_post: if (!a2a->is_dc) { err_create_rc_qps: diff --git a/src/components/tl/mlx5/alltoall/alltoall_mkeys.c b/src/components/tl/mlx5/alltoall/alltoall_mkeys.c index c4af73c50f..7dd90d49b8 100644 --- a/src/components/tl/mlx5/alltoall/alltoall_mkeys.c +++ b/src/components/tl/mlx5/alltoall/alltoall_mkeys.c @@ -151,7 +151,7 @@ static ucc_status_t create_and_populate_recv_team_mkey(ucc_tl_mlx5_team_t *team, ucc_tl_mlx5_alltoall_t *a2a = team->a2a; ucc_tl_mlx5_alltoall_node_t *node = &a2a->node; int mnc = a2a->max_num_of_columns; - ucc_status_t status; + ucc_status_t status = UCC_OK; int i, j, index; status = create_master_key(MAX_OUTSTANDING_OPS * mnc, a2a->pd, @@ -162,6 +162,12 @@ static ucc_status_t create_and_populate_recv_team_mkey(ucc_tl_mlx5_team_t *team, struct ibv_sge *team_mkey_klm_entries = (struct ibv_sge *)calloc( MAX_OUTSTANDING_OPS * mnc, sizeof(struct ibv_sge)); + if (!team_mkey_klm_entries) { + tl_error(lib, "failed to allocate team_mkey_klm_entries"); + status = UCC_ERR_NO_MEMORY; + goto err_calloc; + } + for (i = 0; i < MAX_OUTSTANDING_OPS; i++) { for (j = 0; j < mnc; j++) { index = i * mnc + j; @@ -179,13 +185,18 @@ static ucc_status_t create_and_populate_recv_team_mkey(ucc_tl_mlx5_team_t *team, team_mkey_klm_entries, MAX_OUTSTANDING_OPS * mnc, lib); if (status != UCC_OK) { tl_error(a2a, "failed to populate team mkey"); - if (mlx5dv_destroy_mkey(node->team_recv_mkey)) { - tl_error(lib, "mkey destroy failed(errno=%d)", errno); - } - return status; + goto err_mkey; } ucc_free(team_mkey_klm_entries); - return UCC_OK; + return status; + +err_mkey: + ucc_free(team_mkey_klm_entries); +err_calloc: + if (mlx5dv_destroy_mkey(node->team_recv_mkey)) { + tl_error(lib, "mkey destroy failed(errno=%d)", errno); + } + return status; } /** @@ -237,7 +248,7 @@ ucc_status_t ucc_tl_mlx5_init_mkeys(ucc_tl_mlx5_team_t *team, status = create_and_populate_recv_team_mkey(team, lib); if (status != UCC_OK) { tl_error(lib, "failed to create recv top masterkey"); - goto err_create_mkey; + goto err_malloc; } return UCC_OK; diff --git a/src/components/tl/mlx5/tl_mlx5_context.c b/src/components/tl/mlx5/tl_mlx5_context.c index 2407708980..240ec85b71 100644 --- a/src/components/tl/mlx5/tl_mlx5_context.c +++ b/src/components/tl/mlx5/tl_mlx5_context.c @@ -175,6 +175,7 @@ ucc_status_t ucc_tl_mlx5_context_create_epilog(ucc_base_context_t *context) return UCC_ERR_NO_MEMORY; } + memset(&s.map, 0, sizeof(ucc_ep_map_t)); s.map.type = UCC_EP_MAP_FULL; s.map.ep_num = core_ctx->params.oob.n_oob_eps; s.myrank = core_ctx->rank; @@ -266,12 +267,13 @@ ucc_status_t ucc_tl_mlx5_context_create_epilog(ucc_base_context_t *context) ucc_free(sbcast_data); ucc_topo_cleanup(topo); - + close(sock); return UCC_OK; err: ucc_tl_mlx5_remove_shared_ctx_pd(ctx); ucc_topo_cleanup(topo); + close(sock); err_topo: ucc_free(sbcast_data); return status; diff --git a/src/components/tl/self/tl_self_coll.c b/src/components/tl/self/tl_self_coll.c index f7abeb1fff..c86e2b8dab 100644 --- a/src/components/tl/self/tl_self_coll.c +++ b/src/components/tl/self/tl_self_coll.c @@ -70,10 +70,11 @@ void ucc_tl_self_copy_progress(ucc_coll_task_t *coll_task) ucc_status_t ucc_tl_self_copy_start(ucc_coll_task_t *coll_task) { - ucc_tl_self_task_t *task = ucc_derived_of(coll_task, ucc_tl_self_task_t); - ucc_ee_executor_t *exec; - ucc_ee_executor_task_args_t exec_args; - ucc_status_t status; + ucc_tl_self_task_t *task = ucc_derived_of(coll_task, + ucc_tl_self_task_t); + ucc_ee_executor_task_args_t exec_args = {0}; + ucc_ee_executor_t *exec; + ucc_status_t status; status = ucc_coll_task_get_executor(&task->super, &exec); if (ucc_unlikely(status != UCC_OK)) { diff --git a/src/components/tl/ucp/allgather/allgather_knomial.c b/src/components/tl/ucp/allgather/allgather_knomial.c index c131619d7d..94531945e9 100644 --- a/src/components/tl/ucp/allgather/allgather_knomial.c +++ b/src/components/tl/ucp/allgather/allgather_knomial.c @@ -167,19 +167,20 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) { - ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); - ucc_coll_args_t *args = &TASK_ARGS(task); - ucc_tl_ucp_team_t *team = TASK_TEAM(task); - ucc_coll_type_t ct = args->coll_type; - ucc_rank_t size = UCC_TL_TEAM_SIZE(team); - ucc_kn_radix_t radix = task->allgather_kn.p.radix; - ucc_knomial_pattern_t *p = &task->allgather_kn.p; - ucc_rank_t rank = VRANK(UCC_TL_TEAM_RANK(team), - ct == UCC_COLL_TYPE_BCAST ? args->root : 0, - size); + ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, + ucc_tl_ucp_task_t); + ucc_coll_args_t *args = &TASK_ARGS(task); + ucc_tl_ucp_team_t *team = TASK_TEAM(task); + ucc_coll_type_t ct = args->coll_type; + ucc_rank_t size = UCC_TL_TEAM_SIZE(team); + ucc_kn_radix_t radix = task->allgather_kn.p.radix; + ucc_knomial_pattern_t *p = &task->allgather_kn.p; + ucc_rank_t rank = VRANK(UCC_TL_TEAM_RANK(team), + ct == UCC_COLL_TYPE_BCAST ? + args->root : 0, size); + ucc_ee_executor_task_args_t eargs = {0}; ucc_status_t status; ptrdiff_t offset; - ucc_ee_executor_task_args_t eargs; ucc_ee_executor_t *exec; UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_start", 0); diff --git a/src/components/tl/ucp/reduce_scatter/reduce_scatter_knomial.c b/src/components/tl/ucp/reduce_scatter/reduce_scatter_knomial.c index c7c6ec02bd..28b5c40e23 100644 --- a/src/components/tl/ucp/reduce_scatter/reduce_scatter_knomial.c +++ b/src/components/tl/ucp/reduce_scatter/reduce_scatter_knomial.c @@ -17,33 +17,35 @@ void ucc_tl_ucp_reduce_scatter_knomial_progress(ucc_coll_task_t *coll_task) { - ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); - ucc_coll_args_t *args = &TASK_ARGS(task); - ucc_tl_ucp_team_t *team = TASK_TEAM(task); - ucc_kn_radix_t radix = task->reduce_scatter_kn.p.radix; - int avg_pre_op = + ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, + ucc_tl_ucp_task_t); + ucc_coll_args_t *args = &TASK_ARGS(task); + ucc_tl_ucp_team_t *team = TASK_TEAM(task); + ucc_kn_radix_t radix = task->reduce_scatter_kn.p.radix; + int avg_pre_op = UCC_TL_UCP_TEAM_LIB(team)->cfg.reduce_avg_pre_op; - uint8_t node_type = task->reduce_scatter_kn.p.node_type; - ucc_knomial_pattern_t *p = &task->reduce_scatter_kn.p; - void *scratch = task->reduce_scatter_kn.scratch; - void *rbuf = args->dst.info.buffer; - ucc_memory_type_t mem_type = args->dst.info.mem_type; - size_t count = args->dst.info.count; - ucc_datatype_t dt = args->dst.info.datatype; - void *sbuf = UCC_IS_INPLACE(*args) ? + uint8_t node_type = + task->reduce_scatter_kn.p.node_type; + ucc_knomial_pattern_t *p = &task->reduce_scatter_kn.p; + void *scratch = task->reduce_scatter_kn.scratch; + void *rbuf = args->dst.info.buffer; + ucc_memory_type_t mem_type = args->dst.info.mem_type; + size_t count = args->dst.info.count; + ucc_datatype_t dt = args->dst.info.datatype; + void *sbuf = UCC_IS_INPLACE(*args) ? rbuf : args->src.info.buffer; - size_t dt_size = ucc_dt_size(dt); - size_t data_size = count * dt_size; - ucc_rank_t rank = UCC_TL_TEAM_RANK(team); - ucc_rank_t size = UCC_TL_TEAM_SIZE(team); - ptrdiff_t peer_seg_offset, local_seg_offset, offset; - ucc_rank_t peer, step_radix, peer_seg_index, local_seg_index; - ucc_status_t status; - ucc_kn_radix_t loop_step; - size_t block_count, peer_seg_count, local_seg_count; - void *reduce_data, *local_data; - int is_avg; - ucc_ee_executor_task_args_t eargs; + size_t dt_size = ucc_dt_size(dt); + size_t data_size = count * dt_size; + ucc_rank_t rank = UCC_TL_TEAM_RANK(team); + ucc_rank_t size = UCC_TL_TEAM_SIZE(team); + ucc_ee_executor_task_args_t eargs = {0}; + ptrdiff_t peer_seg_offset, local_seg_offset, offset; + ucc_rank_t peer, step_radix, peer_seg_index, local_seg_index; + ucc_status_t status; + ucc_kn_radix_t loop_step; + size_t block_count, peer_seg_count, local_seg_count; + void *reduce_data, *local_data; + int is_avg; local_seg_count = 0; block_count = ucc_sra_kn_compute_block_count(count, rank, p);