Skip to content

Commit

Permalink
prov/efa: reuse protection domain in same process address space
Browse files Browse the repository at this point in the history
We currently are limited in the number of protection domains for EFA,
which causes issues with newer versions of Open MPI that open a
Libfabric domain in both the OFI MTL and BTL for a single rank.

Although we plan to increase this limit in the future, we have to add a
workaround to reuse a protection domain if possible in the meantime. Add
a list of protection domains and reuse it during domain open.

Signed-off-by: Robert Wespetal <[email protected]>
  • Loading branch information
rwespetal committed May 5, 2020
1 parent 049f3cf commit 4f88228
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 11 deletions.
10 changes: 10 additions & 0 deletions prov/efa/src/efa.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,19 @@ struct efa_cq {

struct efa_context {
struct ibv_context *ibv_ctx;
int dev_idx;
uint64_t max_mr_size;
uint16_t inline_buf_size;
uint16_t max_wr_rdma_sge;
uint32_t max_rdma_size;
uint32_t device_caps;
};

struct efa_pd {
struct ibv_pd *ibv_pd;
int use_cnt;
};

struct efa_qp {
struct ibv_qp *ibv_qp;
struct ibv_qp_ex *ibv_qp_ex;
Expand Down Expand Up @@ -334,6 +340,10 @@ extern struct fi_ops_cm efa_ep_cm_ops;
extern struct fi_ops_msg efa_ep_msg_ops;
extern struct fi_ops_rma efa_ep_rma_ops;

extern fastlock_t pd_list_lock;
// This list has the same indicies as ctx_list.
extern struct efa_pd *pd_list;

int efa_device_init(void);
void efa_device_free(void);

Expand Down
17 changes: 16 additions & 1 deletion prov/efa/src/efa_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ int efa_device_init(void)
int ctx_idx;
int ret;

fastlock_init(&pd_list_lock);

device_list = ibv_get_device_list(&dev_cnt);
if (dev_cnt <= 0)
return -ENODEV;
Expand All @@ -97,12 +99,19 @@ int efa_device_init(void)
goto err_free_dev_list;
}

pd_list = calloc(dev_cnt, sizeof(*pd_list));
if (!pd_list) {
ret = -ENOMEM;
goto err_free_ctx_list;
}

for (ctx_idx = 0; ctx_idx < dev_cnt; ctx_idx++) {
ctx_list[ctx_idx] = efa_device_open(device_list[ctx_idx]);
if (!ctx_list[ctx_idx]) {
ret = -ENODEV;
goto err_close_devs;
}
ctx_list[ctx_idx]->dev_idx = ctx_idx;
}

ibv_free_device_list(device_list);
Expand All @@ -112,6 +121,8 @@ int efa_device_init(void)
err_close_devs:
for (ctx_idx--; ctx_idx >= 0; ctx_idx--)
efa_device_close(ctx_list[ctx_idx]);
free(pd_list);
err_free_ctx_list:
free(ctx_list);
err_free_dev_list:
ibv_free_device_list(device_list);
Expand All @@ -123,11 +134,15 @@ void efa_device_free(void)
{
int i;

for (i = 0; i < dev_cnt; i++)
for (i = 0; i < dev_cnt; i++) {
assert(pd_list[i].use_cnt == 0);
efa_device_close(ctx_list[i]);
}

free(pd_list);
free(ctx_list);
dev_cnt = 0;
fastlock_destroy(&pd_list_lock);
}

struct efa_context **efa_device_get_context_list(int *num_ctx)
Expand Down
46 changes: 36 additions & 10 deletions prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@
#include "efa.h"
#include "rxr_cntr.h"

fastlock_t pd_list_lock;
struct efa_pd *pd_list = NULL;

static int efa_domain_close(fid_t fid)
{
struct efa_domain *domain;
struct efa_pd *efa_pd;
int ret;

domain = container_of(fid, struct efa_domain,
Expand All @@ -49,12 +53,21 @@ static int efa_domain_close(fid_t fid)
ofi_mr_cache_cleanup(&domain->cache);

if (domain->ibv_pd) {
ret = -ibv_dealloc_pd(domain->ibv_pd);
if (ret) {
EFA_INFO_ERRNO(FI_LOG_DOMAIN, "ibv_dealloc_pd", ret);
return ret;
fastlock_acquire(&pd_list_lock);
efa_pd = &pd_list[domain->ctx->dev_idx];
if (efa_pd->use_cnt == 1) {
ret = -ibv_dealloc_pd(domain->ibv_pd);
if (ret) {
fastlock_release(&pd_list_lock);
EFA_INFO_ERRNO(FI_LOG_DOMAIN, "ibv_dealloc_pd",
ret);
return ret;
}
efa_pd->ibv_pd = NULL;
}
efa_pd->use_cnt--;
domain->ibv_pd = NULL;
fastlock_release(&pd_list_lock);
}

ret = ofi_domain_close(&domain->util_domain);
Expand Down Expand Up @@ -94,6 +107,25 @@ static int efa_open_device_by_name(struct efa_domain *domain, const char *name)
}
}

/*
* Check if a PD has already been allocated for this device and reuse
* it if this is the case.
*/
fastlock_acquire(&pd_list_lock);
if (pd_list[i].ibv_pd) {
domain->ibv_pd = pd_list[i].ibv_pd;
pd_list[i].use_cnt++;
} else {
domain->ibv_pd = ibv_alloc_pd(domain->ctx->ibv_ctx);
if (!domain->ibv_pd) {
ret = -errno;
} else {
pd_list[i].ibv_pd = domain->ibv_pd;
pd_list[i].use_cnt++;
}
}
fastlock_release(&pd_list_lock);

efa_device_free_context_list(ctx_list);
return ret;
}
Expand Down Expand Up @@ -172,12 +204,6 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info,
if (ret)
goto err_free_info;

domain->ibv_pd = ibv_alloc_pd(domain->ctx->ibv_ctx);
if (!domain->ibv_pd) {
ret = -errno;
goto err_free_info;
}

domain->util_domain.domain_fid.fid.ops = &efa_fid_ops;
domain->util_domain.domain_fid.ops = &efa_domain_ops;
/* RMA mr_modes are being removed, since EFA layer
Expand Down

0 comments on commit 4f88228

Please sign in to comment.