From efee2e6145e7d07af7385382c41633db34465f87 Mon Sep 17 00:00:00 2001 From: Valentin Petrov Date: Fri, 14 Oct 2022 13:01:18 +0300 Subject: [PATCH] TL/MLX5: rcache --- src/components/tl/mlx5/Makefile.am | 3 +- src/components/tl/mlx5/tl_mlx5.h | 13 +++++ src/components/tl/mlx5/tl_mlx5_context.c | 8 +++ src/components/tl/mlx5/tl_mlx5_rcache.c | 64 ++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 src/components/tl/mlx5/tl_mlx5_rcache.c diff --git a/src/components/tl/mlx5/Makefile.am b/src/components/tl/mlx5/Makefile.am index 8a55c5866e..b406feac41 100644 --- a/src/components/tl/mlx5/Makefile.am +++ b/src/components/tl/mlx5/Makefile.am @@ -16,7 +16,8 @@ sources = \ tl_mlx5_wqe.h \ tl_mlx5_wqe.c \ tl_mlx5_pd.h \ - tl_mlx5_pd.c + tl_mlx5_pd.c \ + tl_mlx5_rcache.c module_LTLIBRARIES = libucc_tl_mlx5.la libucc_tl_mlx5_la_SOURCES = $(sources) diff --git a/src/components/tl/mlx5/tl_mlx5.h b/src/components/tl/mlx5/tl_mlx5.h index e129f28626..a85e7d4372 100644 --- a/src/components/tl/mlx5/tl_mlx5.h +++ b/src/components/tl/mlx5/tl_mlx5.h @@ -97,6 +97,19 @@ typedef struct ucc_tl_mlx5_team { UCC_CLASS_DECLARE(ucc_tl_mlx5_team_t, ucc_base_context_t *, const ucc_base_team_params_t *); +ucc_status_t tl_mlx5_create_rcache(ucc_tl_mlx5_context_t *ctx); +typedef struct ucc_tl_mlx5_reg { + struct ibv_mr * mr; + ucs_rcache_region_t *region; +} ucc_tl_mlx5_reg_t; + +static inline ucc_tl_mlx5_reg_t * +ucc_tl_mlx5_get_rcache_reg_data(ucc_rcache_region_t *region) +{ + return (ucc_tl_mlx5_reg_t *)((ptrdiff_t)region + + sizeof(ucc_rcache_region_t)); +} + #define UCC_TL_MLX5_SUPPORTED_COLLS (UCC_COLL_TYPE_ALLTOALL) #define UCC_TL_MLX5_TEAM_LIB(_team) \ diff --git a/src/components/tl/mlx5/tl_mlx5_context.c b/src/components/tl/mlx5/tl_mlx5_context.c index df33210ea8..41aa6efe56 100644 --- a/src/components/tl/mlx5/tl_mlx5_context.c +++ b/src/components/tl/mlx5/tl_mlx5_context.c @@ -245,8 +245,16 @@ ucc_status_t ucc_tl_mlx5_context_create_epilog(ucc_base_context_t *context) goto err; } + status = tl_mlx5_create_rcache(ctx); + if (UCC_OK != status) { + tl_error(context->lib, "failed to create rcache"); + goto out; + + } + ucc_free(sbcast_data); ucc_topo_cleanup(topo); + return UCC_OK; err: diff --git a/src/components/tl/mlx5/tl_mlx5_rcache.c b/src/components/tl/mlx5/tl_mlx5_rcache.c new file mode 100644 index 0000000000..30f8ad27a2 --- /dev/null +++ b/src/components/tl/mlx5/tl_mlx5_rcache.c @@ -0,0 +1,64 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "tl_mlx5.h" + +static ucs_status_t rcache_reg_mr(void *context, ucs_rcache_t *rcache, + void *arg, ucc_rcache_region_t *rregion, + uint16_t flags) +{ + ucc_tl_mlx5_context_t *ctx = (ucc_tl_mlx5_context_t *)context; + void * addr = (void *)rregion->super.start; + size_t length = (size_t)(rregion->super.end - rregion->super.start); + ucc_tl_mlx5_reg_t *mlx5_reg = ucc_tl_mlx5_get_rcache_reg_data(rregion); + int * change_flag = (int *)arg; + + mlx5_reg->region = rregion; + *change_flag = 1; + mlx5_reg->mr = ibv_reg_mr(ctx->shared_pd, addr, length, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + if (!mlx5_reg->mr) { + tl_error(ctx->super.super.lib, "failed to register memory"); + return UCS_ERR_NO_MESSAGE; + } + return UCS_OK; +} + +static void rcache_dereg_mr(void *context, ucc_rcache_t *rcache, + ucc_rcache_region_t *rregion) +{ + ucc_tl_mlx5_reg_t *mlx5_reg = ucc_tl_mlx5_get_rcache_reg_data(rregion); + + ucc_assert(mlx5_reg->region == rregion); + ibv_dereg_mr(mlx5_reg->mr); + mlx5_reg->mr = NULL; +} + +ucc_status_t tl_mlx5_create_rcache(ucc_tl_mlx5_context_t *ctx) +{ + static ucc_rcache_ops_t rcache_ucc_ops = {.mem_reg = rcache_reg_mr, + .mem_dereg = rcache_dereg_mr, + .dump_region = NULL}; + ucc_rcache_params_t rcache_params; + ucc_status_t status; + + rcache_params.region_struct_size = + sizeof(ucc_rcache_region_t) + sizeof(ucc_tl_mlx5_reg_t); + rcache_params.alignment = UCS_PGT_ADDR_ALIGN; + rcache_params.max_alignment = getpagesize(); + rcache_params.ucm_events = UCM_EVENT_VM_UNMAPPED | UCM_EVENT_MEM_TYPE_FREE; + rcache_params.ucm_event_priority = 1000; + rcache_params.context = (void *)ctx; + rcache_params.ops = &rcache_ucc_ops; + + status = ucc_rcache_create(&rcache_params, "reg cache", &ctx->rcache); + + if (status != UCC_OK) { + tl_error(ctx->super.super.lib, "Failed to create reg cache"); + return UCC_ERR_NO_MESSAGE; + } + return UCC_OK; +}