Skip to content

Commit

Permalink
Merge pull request ofiwg#5916 from rwespetal/efa-reuse-pd
Browse files Browse the repository at this point in the history
prov/efa: reuse protection domain in same process address space
  • Loading branch information
rajachan authored May 6, 2020
2 parents 461440d + 4f88228 commit be535bd
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 11 deletions.
10 changes: 10 additions & 0 deletions prov/efa/src/efa.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,19 @@ struct efa_cq {

struct efa_context {
struct ibv_context *ibv_ctx;
int dev_idx;
uint64_t max_mr_size;
uint16_t inline_buf_size;
uint16_t max_wr_rdma_sge;
uint32_t max_rdma_size;
uint32_t device_caps;
};

struct efa_pd {
struct ibv_pd *ibv_pd;
int use_cnt;
};

struct efa_qp {
struct ibv_qp *ibv_qp;
struct ibv_qp_ex *ibv_qp_ex;
Expand Down Expand Up @@ -334,6 +340,10 @@ extern struct fi_ops_cm efa_ep_cm_ops;
extern struct fi_ops_msg efa_ep_msg_ops;
extern struct fi_ops_rma efa_ep_rma_ops;

extern fastlock_t pd_list_lock;
// This list has the same indicies as ctx_list.
extern struct efa_pd *pd_list;

int efa_device_init(void);
void efa_device_free(void);

Expand Down
17 changes: 16 additions & 1 deletion prov/efa/src/efa_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ int efa_device_init(void)
int ctx_idx;
int ret;

fastlock_init(&pd_list_lock);

device_list = ibv_get_device_list(&dev_cnt);
if (dev_cnt <= 0)
return -ENODEV;
Expand All @@ -97,12 +99,19 @@ int efa_device_init(void)
goto err_free_dev_list;
}

pd_list = calloc(dev_cnt, sizeof(*pd_list));
if (!pd_list) {
ret = -ENOMEM;
goto err_free_ctx_list;
}

for (ctx_idx = 0; ctx_idx < dev_cnt; ctx_idx++) {
ctx_list[ctx_idx] = efa_device_open(device_list[ctx_idx]);
if (!ctx_list[ctx_idx]) {
ret = -ENODEV;
goto err_close_devs;
}
ctx_list[ctx_idx]->dev_idx = ctx_idx;
}

ibv_free_device_list(device_list);
Expand All @@ -112,6 +121,8 @@ int efa_device_init(void)
err_close_devs:
for (ctx_idx--; ctx_idx >= 0; ctx_idx--)
efa_device_close(ctx_list[ctx_idx]);
free(pd_list);
err_free_ctx_list:
free(ctx_list);
err_free_dev_list:
ibv_free_device_list(device_list);
Expand All @@ -123,11 +134,15 @@ void efa_device_free(void)
{
int i;

for (i = 0; i < dev_cnt; i++)
for (i = 0; i < dev_cnt; i++) {
assert(pd_list[i].use_cnt == 0);
efa_device_close(ctx_list[i]);
}

free(pd_list);
free(ctx_list);
dev_cnt = 0;
fastlock_destroy(&pd_list_lock);
}

struct efa_context **efa_device_get_context_list(int *num_ctx)
Expand Down
46 changes: 36 additions & 10 deletions prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@
#include "efa.h"
#include "rxr_cntr.h"

fastlock_t pd_list_lock;
struct efa_pd *pd_list = NULL;

static int efa_domain_close(fid_t fid)
{
struct efa_domain *domain;
struct efa_pd *efa_pd;
int ret;

domain = container_of(fid, struct efa_domain,
Expand All @@ -49,12 +53,21 @@ static int efa_domain_close(fid_t fid)
ofi_mr_cache_cleanup(&domain->cache);

if (domain->ibv_pd) {
ret = -ibv_dealloc_pd(domain->ibv_pd);
if (ret) {
EFA_INFO_ERRNO(FI_LOG_DOMAIN, "ibv_dealloc_pd", ret);
return ret;
fastlock_acquire(&pd_list_lock);
efa_pd = &pd_list[domain->ctx->dev_idx];
if (efa_pd->use_cnt == 1) {
ret = -ibv_dealloc_pd(domain->ibv_pd);
if (ret) {
fastlock_release(&pd_list_lock);
EFA_INFO_ERRNO(FI_LOG_DOMAIN, "ibv_dealloc_pd",
ret);
return ret;
}
efa_pd->ibv_pd = NULL;
}
efa_pd->use_cnt--;
domain->ibv_pd = NULL;
fastlock_release(&pd_list_lock);
}

ret = ofi_domain_close(&domain->util_domain);
Expand Down Expand Up @@ -94,6 +107,25 @@ static int efa_open_device_by_name(struct efa_domain *domain, const char *name)
}
}

/*
* Check if a PD has already been allocated for this device and reuse
* it if this is the case.
*/
fastlock_acquire(&pd_list_lock);
if (pd_list[i].ibv_pd) {
domain->ibv_pd = pd_list[i].ibv_pd;
pd_list[i].use_cnt++;
} else {
domain->ibv_pd = ibv_alloc_pd(domain->ctx->ibv_ctx);
if (!domain->ibv_pd) {
ret = -errno;
} else {
pd_list[i].ibv_pd = domain->ibv_pd;
pd_list[i].use_cnt++;
}
}
fastlock_release(&pd_list_lock);

efa_device_free_context_list(ctx_list);
return ret;
}
Expand Down Expand Up @@ -172,12 +204,6 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info,
if (ret)
goto err_free_info;

domain->ibv_pd = ibv_alloc_pd(domain->ctx->ibv_ctx);
if (!domain->ibv_pd) {
ret = -errno;
goto err_free_info;
}

domain->util_domain.domain_fid.fid.ops = &efa_fid_ops;
domain->util_domain.domain_fid.ops = &efa_domain_ops;
/* RMA mr_modes are being removed, since EFA layer
Expand Down

0 comments on commit be535bd

Please sign in to comment.