Skip to content

Commit

Permalink
TL/MLX5: fix
Browse files Browse the repository at this point in the history
  • Loading branch information
samnordmann committed Jun 19, 2023
1 parent 46bb6ca commit 5445058
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/alltoall/alltoall.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ typedef struct ucc_tl_mlx5_alltoall_node {
ucc_sbgp_t *sbgp;
void *storage;
ucc_tl_mlx5_alltoall_op_t ops[MAX_OUTSTANDING_OPS];
struct mlx5dv_mkey team_recv_mkey;
struct mlx5dv_mkey *team_recv_mkey;
void *umr_entries_buf;
struct ibv_mr *umr_entries_mr;
} ucc_tl_mlx5_alltoall_node_t;
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/alltoall/alltoall_mkeys.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ static ucc_status_t populate_strided_mkey(ucc_tl_mlx5_alltoall_t *a2a,
}

static ucc_status_t create_and_populate_recv_team_mkey(ucc_tl_mlx5_team_t *team,
ucc_base_lib_t * lib)
ucc_base_lib_t *lib)
{
int team_size = UCC_TL_TEAM_SIZE(team);
ucc_tl_mlx5_alltoall_t *a2a = team->a2a;
Expand Down
4 changes: 2 additions & 2 deletions src/components/tl/mlx5/mcast/tl_mlx5_mcast_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
#include "tl_mlx5_mcast.h"
#include "utils/arch/cpu.h"
#include <ucs/sys/string.h>
#include "src/core/ucc_service_coll.h"
#include "core/ucc_service_coll.h"
#include "tl_mlx5.h"

ucc_status_t ucc_tl_mlx5_mcast_context_init(ucc_tl_mlx5_mcast_context_t *context, /* NOLINT */
ucc_tl_mlx5_mcast_ctx_params_t *mcast_ctx_conf /* NOLINT */)
{
return UCC_ERR_NOT_SUPPORTED;
return UCC_OK;
}
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/mcast/tl_mlx5_mcast_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ ucc_status_t ucc_tl_mlx5_mcast_team_init(ucc_base_context_t *base_cont
const ucc_base_team_params_t *params, /* NOLINT */
mcast_coll_comm_init_spec_t *mcast_conf /* NOLINT */)
{
return UCC_ERR_NOT_SUPPORTED;
return UCC_OK;
}

13 changes: 7 additions & 6 deletions src/components/tl/mlx5/tl_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef struct ucc_tl_mlx5_ib_qp_conf {

typedef struct ucc_tl_mlx5_lib_config {
ucc_tl_lib_config_t super;
int asr_barrier;
int block_size;
int num_dci_qps;
int dc_threshold;
Expand Down Expand Up @@ -78,16 +79,16 @@ UCC_CLASS_DECLARE(ucc_tl_mlx5_lib_t, const ucc_base_lib_params_t *,
typedef struct ucc_tl_mlx5_context {
ucc_tl_context_t super;
ucc_tl_mlx5_context_config_t cfg;
struct ibv_context * shared_ctx;
struct ibv_pd * shared_pd;
ucc_rcache_t * rcache;
struct ibv_context *shared_ctx;
struct ibv_pd *shared_pd;
ucc_rcache_t *rcache;
int is_imported;
int ib_port;
ucc_mpool_t req_mp;
ucc_tl_mlx5_mcast_context_t mcast;
} ucc_tl_mlx5_context_t;
UCC_CLASS_DECLARE(ucc_tl_mlx5_context_t, const ucc_base_context_params_t *,
const ucc_base_config_t *);
UCC_CLASS_DECLARE(ucc_tl_mlx5_context_t, const ucc_base_context_params_t*,
const ucc_base_config_t*);

typedef struct ucc_tl_mlx5_task ucc_tl_mlx5_task_t;
typedef struct ucc_tl_mlx5_schedule ucc_tl_mlx5_schedule_t;
Expand Down Expand Up @@ -116,7 +117,7 @@ typedef struct ucc_tl_mlx5_team {
ucc_mpool_t dm_pool;
struct ibv_dm *dm_ptr;
struct ibv_mr *dm_mr;
ucc_tl_mlx5_a2a_t *a2a;
ucc_tl_mlx5_alltoall_t *a2a;
ucc_topo_t *topo;
ucc_ep_map_t ctx_map;
ucc_tl_mlx5_mcast_team_t *mcast;
Expand Down
15 changes: 8 additions & 7 deletions src/components/tl/mlx5/tl_mlx5_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,21 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_team_t, ucc_base_context_t *tl_context,
return status;
}

if (ucc_topo_get_sbgp(self->topo, UCC_SBGP_NODE)->group_rank == 0) {
status = ucc_tl_mlx5_dm_init(self);
if (UCC_OK != status) {
tl_error(UCC_TL_TEAM_LIB(self), "failed to init device memory");
}
}

self->mcast = NULL;
status = ucc_tl_mlx5_mcast_team_init(tl_context, &(self->mcast), &(ctx->mcast), params,
&(UCC_TL_MLX5_TEAM_LIB(self)->cfg.mcast_conf));
if (ucc_unlikely(UCC_OK != status)) {
tl_error(UCC_TL_TEAM_LIB(self), "failed to init team for mcast");
return status;
}

if (ucc_topo_get_sbgp(self->topo, UCC_SBGP_NODE)->group_rank == 0) {
status = ucc_tl_mlx5_dm_init(self);
if (UCC_OK != status) {
tl_error(UCC_TL_TEAM_LIB(self), "failed to init device memory");
}
}

self->status[0] = status;
self->state = TL_MLX5_TEAM_STATE_INIT;

Expand Down

0 comments on commit 5445058

Please sign in to comment.