diff --git a/src/components/tl/mlx5/tl_mlx5.h b/src/components/tl/mlx5/tl_mlx5.h index a85e7d4372..a8676c5a9a 100644 --- a/src/components/tl/mlx5/tl_mlx5.h +++ b/src/components/tl/mlx5/tl_mlx5.h @@ -97,7 +97,7 @@ typedef struct ucc_tl_mlx5_team { UCC_CLASS_DECLARE(ucc_tl_mlx5_team_t, ucc_base_context_t *, const ucc_base_team_params_t *); -ucc_status_t tl_mlx5_create_rcache(ucc_tl_mlx5_context_t *ctx); +ucc_status_t tl_mlx5_rcache_create(ucc_tl_mlx5_context_t *ctx); typedef struct ucc_tl_mlx5_reg { struct ibv_mr * mr; ucs_rcache_region_t *region; @@ -106,8 +106,7 @@ typedef struct ucc_tl_mlx5_reg { static inline ucc_tl_mlx5_reg_t * ucc_tl_mlx5_get_rcache_reg_data(ucc_rcache_region_t *region) { - return (ucc_tl_mlx5_reg_t *)((ptrdiff_t)region + - sizeof(ucc_rcache_region_t)); + return (ucc_tl_mlx5_reg_t *)PTR_OFFSET(region, sizeof(ucc_rcache_region_t)); } #define UCC_TL_MLX5_SUPPORTED_COLLS (UCC_COLL_TYPE_ALLTOALL) diff --git a/src/components/tl/mlx5/tl_mlx5_context.c b/src/components/tl/mlx5/tl_mlx5_context.c index 43bbf8215f..f9cabb99ba 100644 --- a/src/components/tl/mlx5/tl_mlx5_context.c +++ b/src/components/tl/mlx5/tl_mlx5_context.c @@ -33,8 +33,8 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_context_t, status = ucc_mpool_init( &self->req_mp, 0, ucc_max(sizeof(ucc_tl_mlx5_task_t), sizeof(ucc_tl_mlx5_schedule_t)), 0, - UCC_CACHE_LINE_SIZE, 8, UINT_MAX, NULL, params->thread_mode, - "tl_mlx5_req_mp"); + UCC_CACHE_LINE_SIZE, 8, UINT_MAX, &ucc_coll_task_mpool_ops, + params->thread_mode, "tl_mlx5_req_mp"); if (UCC_OK != status) { tl_error(self->super.super.lib, "failed to initialize tl_mlx5_req mpool"); @@ -48,6 +48,9 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_context_t, UCC_CLASS_CLEANUP_FUNC(ucc_tl_mlx5_context_t) { tl_debug(self->super.super.lib, "finalizing tl context: %p", self); + if (self->rcache) { + ucc_rcache_destroy(self->rcache); + } if (ucc_tl_mlx5_remove_shared_ctx_pd(self) != UCC_OK) { tl_error(self->super.super.lib, "failed to free ib ctx and pd"); @@ -245,7 +248,7 @@ ucc_status_t ucc_tl_mlx5_context_create_epilog(ucc_base_context_t *context) goto err; } - status = tl_mlx5_create_rcache(ctx); + status = tl_mlx5_rcache_create(ctx); if (UCC_OK != status) { tl_error(context->lib, "failed to create rcache"); goto err; diff --git a/src/components/tl/mlx5/tl_mlx5_rcache.c b/src/components/tl/mlx5/tl_mlx5_rcache.c index c35b774381..d72b8b7747 100644 --- a/src/components/tl/mlx5/tl_mlx5_rcache.c +++ b/src/components/tl/mlx5/tl_mlx5_rcache.c @@ -14,8 +14,9 @@ rcache_reg_mr(void *context, ucs_rcache_t *rcache, //NOLINT: rcache is unused ucc_tl_mlx5_context_t *ctx = (ucc_tl_mlx5_context_t *)context; void * addr = (void *)rregion->super.start; ucc_tl_mlx5_reg_t * mlx5_reg = ucc_tl_mlx5_get_rcache_reg_data(rregion); + size_t length = (size_t)(rregion->super.end + - rregion->super.start); int * change_flag = (int *)arg; - size_t length = (size_t)(rregion->super.end - rregion->super.start); mlx5_reg->region = rregion; *change_flag = 1; @@ -39,7 +40,7 @@ static void rcache_dereg_mr(void * context, //NOLINT: context is unused mlx5_reg->mr = NULL; } -ucc_status_t tl_mlx5_create_rcache(ucc_tl_mlx5_context_t *ctx) +ucc_status_t tl_mlx5_rcache_create(ucc_tl_mlx5_context_t *ctx) { static ucc_rcache_ops_t rcache_ucc_ops = {.mem_reg = rcache_reg_mr, .mem_dereg = rcache_dereg_mr, @@ -54,13 +55,14 @@ ucc_status_t tl_mlx5_create_rcache(ucc_tl_mlx5_context_t *ctx) rcache_params.ucm_event_priority = 1000; rcache_params.context = (void *)ctx; rcache_params.ops = &rcache_ucc_ops; - rcache_params.ucm_events = UCM_EVENT_VM_UNMAPPED | UCM_EVENT_MEM_TYPE_FREE; + rcache_params.ucm_events = UCM_EVENT_VM_UNMAPPED + | UCM_EVENT_MEM_TYPE_FREE; status = ucc_rcache_create(&rcache_params, "reg cache", &ctx->rcache); if (status != UCC_OK) { tl_error(ctx->super.super.lib, "Failed to create reg cache"); - return UCC_ERR_NO_MESSAGE; + return status; } return UCC_OK; }