From 9562dc50bac80009d5d64030725c71f748afc75d Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Sat, 13 Apr 2024 06:22:08 +0000 Subject: [PATCH] [v1.21.x] prov/efa: Use srx lock from domain directly Currently, efa_rdm_cq_readfrom access srx_lock via util_domain->srx->peer_srx.ep_fid.fid.context. However, srx is destroyed during ep close and may not be accessible if a cq read is called after ep close. This patch fixes this issue by accessing the srx lock via efa_domain directly, as the lock is created by efa domain and cannot be destroyed before domain close. Same issue applies to efa cntr. Signed-off-by: Shi Jin (cherry picked from commit 0bb72fd08737c2b02e1c62c831a4b4d24a7291f9) --- prov/efa/src/efa_cntr.c | 32 +++++++++++++------------------- prov/efa/src/efa_cntr.h | 15 --------------- prov/efa/src/rdm/efa_rdm_cq.c | 8 ++++---- 3 files changed, 17 insertions(+), 38 deletions(-) diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index 15ff242be..efff358fd 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -15,14 +15,13 @@ static int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time int numtry = 5; int tryid = 0; int waitim = 1; - struct util_srx_ctx *srx_ctx; + struct efa_domain *domain; - srx_ctx = efa_cntr_get_srx_ctx(cntr_fid); + cntr = container_of(cntr_fid, struct util_cntr, cntr_fid); + domain = container_of(cntr->domain, struct efa_domain, util_domain); - if (srx_ctx) - ofi_genlock_lock(srx_ctx->lock); + ofi_genlock_lock(&domain->srx_lock); - cntr = container_of(cntr_fid, struct util_cntr, cntr_fid); assert(cntr->wait); errcnt = ofi_atomic_get64(&cntr->err); start = (timeout >= 0) ? ofi_gettime_ms() : 0; @@ -55,52 +54,47 @@ static int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time } unlock: - if (srx_ctx) - ofi_genlock_unlock(srx_ctx->lock); + ofi_genlock_unlock(&domain->srx_lock); return ret; } static uint64_t efa_cntr_read(struct fid_cntr *cntr_fid) { - struct util_srx_ctx *srx_ctx; + struct efa_domain *domain; struct efa_cntr *efa_cntr; uint64_t ret; efa_cntr = container_of(cntr_fid, struct efa_cntr, util_cntr.cntr_fid); - srx_ctx = efa_cntr_get_srx_ctx(cntr_fid); + domain = container_of(efa_cntr->util_cntr.domain, struct efa_domain, util_domain); - if (srx_ctx) - ofi_genlock_lock(srx_ctx->lock); + ofi_genlock_lock(&domain->srx_lock); if (efa_cntr->shm_cntr) fi_cntr_read(efa_cntr->shm_cntr); ret = ofi_cntr_read(cntr_fid); - if (srx_ctx) - ofi_genlock_unlock(srx_ctx->lock); + ofi_genlock_unlock(&domain->srx_lock); return ret; } static uint64_t efa_cntr_readerr(struct fid_cntr *cntr_fid) { - struct util_srx_ctx *srx_ctx; + struct efa_domain *domain; struct efa_cntr *efa_cntr; uint64_t ret; efa_cntr = container_of(cntr_fid, struct efa_cntr, util_cntr.cntr_fid); - srx_ctx = efa_cntr_get_srx_ctx(cntr_fid); + domain = container_of(efa_cntr->util_cntr.domain, struct efa_domain, util_domain); - if (srx_ctx) - ofi_genlock_lock(srx_ctx->lock); + ofi_genlock_lock(&domain->srx_lock); if (efa_cntr->shm_cntr) fi_cntr_read(efa_cntr->shm_cntr); ret = ofi_cntr_readerr(cntr_fid); - if (srx_ctx) - ofi_genlock_unlock(srx_ctx->lock); + ofi_genlock_unlock(&domain->srx_lock); return ret; } diff --git a/prov/efa/src/efa_cntr.h b/prov/efa/src/efa_cntr.h index c89ead481..a4e1bb269 100644 --- a/prov/efa/src/efa_cntr.h +++ b/prov/efa/src/efa_cntr.h @@ -23,20 +23,5 @@ void efa_cntr_report_rx_completion(struct util_ep *ep, uint64_t flags); void efa_cntr_report_error(struct util_ep *ep, uint64_t flags); -static inline -void *efa_cntr_get_srx_ctx(struct fid_cntr *cntr_fid) -{ - struct efa_cntr *efa_cntr; - struct fid_peer_srx *srx = NULL; - - efa_cntr = container_of(cntr_fid, struct efa_cntr, util_cntr.cntr_fid); - - srx = efa_cntr->util_cntr.domain->srx; - if (!srx) - return NULL; - - return srx->ep_fid.fid.context; -} - #endif diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index a70552f3f..ec9ea88b8 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -401,13 +401,13 @@ static ssize_t efa_rdm_cq_readfrom(struct fid_cq *cq_fid, void *buf, size_t coun { struct efa_rdm_cq *cq; ssize_t ret; - struct util_srx_ctx *srx_ctx; + struct efa_domain *domain; cq = container_of(cq_fid, struct efa_rdm_cq, util_cq.cq_fid.fid); - srx_ctx = cq->util_cq.domain->srx->ep_fid.fid.context; + domain = container_of(cq->util_cq.domain, struct efa_domain, util_domain); - ofi_genlock_lock(srx_ctx->lock); + ofi_genlock_lock(&domain->srx_lock); if (cq->shm_cq) { fi_cq_read(cq->shm_cq, NULL, 0); @@ -426,7 +426,7 @@ static ssize_t efa_rdm_cq_readfrom(struct fid_cq *cq_fid, void *buf, size_t coun ret = ofi_cq_readfrom(&cq->util_cq.cq_fid, buf, count, src_addr); out: - ofi_genlock_unlock(srx_ctx->lock); + ofi_genlock_unlock(&domain->srx_lock); return ret; }