Skip to content

Commit

Permalink
prov/lnx: Fix various issues with initial commit
Browse files Browse the repository at this point in the history
1. must pass an ep fid for fi_srx_context to comply with API symantics
2. Don't overwrite attribute flags passed in fi_av_open() by application
3. Enforce the use of FI_AV_TABLE type as the other types have been
   deprecated
4. Check for NULL entry before calling ofi_mr_cache_delete()

Signed-off-by: Amir Shehata <[email protected]>
  • Loading branch information
amirshehataornl authored and aingerson committed Nov 21, 2024
1 parent afbff48 commit 52e3437
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 22 deletions.
1 change: 1 addition & 0 deletions prov/lnx/include/lnx.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ struct local_prov_ep {
struct fid_ep **lpe_txc;
struct fid_ep **lpe_rxc;
struct fid_av *lpe_av;
struct fid_ep *lpe_srx_ep;
struct lnx_peer_cq lpe_cq;
struct fi_info *lpe_fi_info;
struct fid_peer_srx lpe_srx;
Expand Down
6 changes: 1 addition & 5 deletions prov/lnx/src/lnx_av.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,11 +630,7 @@ int lnx_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
if (attr->name)
return -FI_ENOSYS;

if (attr->type != FI_AV_UNSPEC &&
attr->type != FI_AV_TABLE)
return -FI_ENOSYS;

if (attr->type == FI_AV_UNSPEC)
if (attr->type != FI_AV_TABLE)
attr->type = FI_AV_TABLE;

peer_tbl = calloc(sizeof(*peer_tbl), 1);
Expand Down
5 changes: 3 additions & 2 deletions prov/lnx/src/lnx_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,12 @@ static int lnx_cq_open_core_prov(struct lnx_cq *cq, struct fi_cq_attr *attr)
int rc;
struct local_prov_ep *ep;
struct local_prov *entry;
struct fi_cq_attr peer_attr = {0};
struct dlist_entry *prov_table =
&cq->lnx_domain->ld_fabric->local_prov_table;

/* tell the core providers to import my CQ */
attr->flags |= FI_PEER;
peer_attr.flags |= FI_PEER;

/* create all the core provider completion queues */
dlist_foreach_container(prov_table, struct local_prov,
Expand All @@ -181,7 +182,7 @@ static int lnx_cq_open_core_prov(struct lnx_cq *cq, struct fi_cq_attr *attr)
cq_ctxt.cq = &ep->lpe_cq.lpc_cq;

/* pass my CQ into the open and get back the core's cq */
rc = fi_cq_open(ep->lpe_domain, attr, &core_cq, &cq_ctxt);
rc = fi_cq_open(ep->lpe_domain, &peer_attr, &core_cq, &cq_ctxt);
if (rc)
return rc;

Expand Down
8 changes: 7 additions & 1 deletion prov/lnx/src/lnx_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ static int lnx_cleanup_domains(struct local_prov *prov)
struct local_prov_ep, ep, entry) {
if (!ep->lpe_domain)
continue;

rc = fi_close(&ep->lpe_srx_ep->fid);
if (rc)
frc = rc;

rc = fi_close(&ep->lpe_domain->fid);
if (rc)
frc = rc;
Expand Down Expand Up @@ -463,7 +468,8 @@ static int lnx_open_core_domains(struct local_prov *prov,
if (!rc && srq_support) {
ep->lpe_srx.owner_ops = &lnx_srx_ops;
peer_srx.srx = &ep->lpe_srx;
rc = fi_srx_context(ep->lpe_domain, &attr, NULL, &peer_srx);
rc = fi_srx_context(ep->lpe_domain, &attr,
&ep->lpe_srx_ep, &peer_srx);
}

/* if one of the constituent endpoints doesn't support shared
Expand Down
2 changes: 1 addition & 1 deletion prov/lnx/src/lnx_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ static int lnx_enable_core_eps(struct lnx_ep *lep)
struct local_prov_ep, ep, entry) {
if (srq_support) {
rc = fi_ep_bind(ep->lpe_ep,
&ep->lpe_srx.ep_fid.fid, 0);
&ep->lpe_srx_ep->fid, 0);
if (rc) {
FI_INFO(&lnx_prov, FI_LOG_CORE,
"%s doesn't support SRX (%d)\n",
Expand Down
5 changes: 3 additions & 2 deletions prov/lnx/src/lnx_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct fi_domain_attr lnx_domain_attr = {
.control_progress = FI_PROGRESS_AUTO,
.data_progress = FI_PROGRESS_AUTO,
.resource_mgmt = FI_RM_ENABLED,
.av_type = FI_AV_UNSPEC,
.av_type = FI_AV_TABLE,
.mr_mode = FI_MR_RAW,
.mr_key_size = SIZE_MAX,
.cq_data_size = SIZE_MAX,
Expand Down Expand Up @@ -410,6 +410,7 @@ static int lnx_form_info(struct fi_info *fi, struct fi_info **out)
rc = -FI_ENOMEM;
goto fail;
}
r->domain_attr->av_type = FI_AV_TABLE;
meta->lnx_rep = r;
meta->lnx_link = fi;
if (r->tx_attr)
Expand Down Expand Up @@ -531,7 +532,7 @@ int lnx_getinfo_helper(uint32_t version, char *prov, struct fi_info *lnx_hints)
lnx_hints->domain_attr->mr_mode |= (FI_MR_VIRT_ADDR | FI_MR_HMEM
| FI_MR_PROV_KEY);
}
rc = fi_getinfo(version, NULL, NULL, OFI_GETINFO_INTERNAL,
rc = fi_getinfo(version, NULL, NULL, OFI_GETINFO_HIDDEN,
lnx_hints, &core_info);

lnx_hints->fabric_attr->prov_name = orig_prov_name;
Expand Down
33 changes: 22 additions & 11 deletions prov/lnx/src/lnx_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,8 @@ ssize_t lnx_tsend(struct fid_ep *ep, const void *buf, size_t len, void *desc,

rc = fi_tsend(cep->lpe_ep, buf, len, mem_desc, core_addr, tag, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

return rc;
}
Expand Down Expand Up @@ -607,7 +608,8 @@ ssize_t lnx_tsendv(struct fid_ep *ep, const struct iovec *iov, void **desc,

rc = fi_tsendv(cep->lpe_ep, iov, &mem_desc, count, core_addr, tag, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

return rc;
}
Expand Down Expand Up @@ -648,7 +650,8 @@ ssize_t lnx_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,

rc = fi_tsendmsg(cep->lpe_ep, &core_msg, flags);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

return rc;
}
Expand Down Expand Up @@ -681,7 +684,8 @@ ssize_t lnx_tinject(struct fid_ep *ep, const void *buf, size_t len,

rc = fi_tinject(cep->lpe_ep, buf, len, core_addr, tag);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

return rc;
}
Expand Down Expand Up @@ -717,7 +721,8 @@ ssize_t lnx_tsenddata(struct fid_ep *ep, const void *buf, size_t len, void *desc
rc = fi_tsenddata(cep->lpe_ep, buf, len, mem_desc,
data, core_addr, tag, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

return rc;
}
Expand Down Expand Up @@ -750,7 +755,8 @@ ssize_t lnx_tinjectdata(struct fid_ep *ep, const void *buf, size_t len,

rc = fi_tinjectdata(cep->lpe_ep, buf, len, data, core_addr, tag);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

return rc;
}
Expand Down Expand Up @@ -792,7 +798,8 @@ lnx_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc,
rc = fi_read(core_ep, buf, len, mem_desc,
core_addr, addr, key, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
out:
return rc;
}
Expand Down Expand Up @@ -834,7 +841,8 @@ lnx_rma_write(struct fid_ep *ep, const void *buf, size_t len, void *desc,
rc = fi_write(core_ep, buf, len, mem_desc,
core_addr, addr, key, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
out:
return rc;
}
Expand Down Expand Up @@ -878,7 +886,8 @@ lnx_atomic_write(struct fid_ep *ep,
rc = fi_atomic(core_ep, buf, count, mem_desc,
core_addr, addr, key, datatype, op, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
out:
return rc;
}
Expand Down Expand Up @@ -924,7 +933,8 @@ lnx_atomic_readwrite(struct fid_ep *ep,
result, mem_desc, core_addr, addr, key,
datatype, op, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
out:
return rc;
}
Expand Down Expand Up @@ -971,7 +981,8 @@ lnx_atomic_compwrite(struct fid_ep *ep,
compare, compare_desc, result, mem_desc,
core_addr, addr, key, datatype, op, context);

ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);
if (mre)
ofi_mr_cache_delete(&lep->le_domain->ld_mr_cache, mre);

out:
return rc;
Expand Down

0 comments on commit 52e3437

Please sign in to comment.