Skip to content

Commit

Permalink
prov/efa: fixing bug with reporting cq errors and fixing logic
Browse files Browse the repository at this point in the history
this patch fixed a bug where efa_ep_progress_internal would check for
FI_EAVAIL instead of -FI_EAVAIL and fixed some of the logic surrounding error
detection on the function. EQ entries are now filled when the cq cannot be
read or when error entries cannot be read.

Signed-off-by: Nikola Dancejic <[email protected]>
  • Loading branch information
dancejic committed Mar 3, 2020
1 parent fa08835 commit c8d350c
Showing 1 changed file with 24 additions and 17 deletions.
41 changes: 24 additions & 17 deletions prov/efa/src/efa_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,35 +425,43 @@ static struct fi_ops efa_ep_ops = {
.ops_open = fi_no_ops_open,
};

static void efa_ep_progress_internal(struct efa_cq *efa_cq, uint64_t flags)
static void efa_ep_progress_internal(struct efa_ep *ep, struct efa_cq *efa_cq)
{
struct util_cq *cq = &efa_cq->util_cq;
int i;
ssize_t ret;
struct util_cq *cq;
struct fi_cq_tagged_entry cq_entry[EFA_CQ_PROGRESS_ENTRIES];
struct fi_cq_tagged_entry *temp_cq_entry;
struct fi_cq_err_entry cq_err_entry;
fi_addr_t src_addr[EFA_CQ_PROGRESS_ENTRIES];
uint64_t flags;
int i;
ssize_t ret, err;

cq = &efa_cq->util_cq;
flags = ep->util_ep.caps;

VALGRIND_MAKE_MEM_DEFINED(&cq_entry, sizeof(cq_entry));

ret = efa_cq_readfrom(&cq->cq_fid, cq_entry, EFA_CQ_PROGRESS_ENTRIES,
(flags & FI_SOURCE) ? src_addr : NULL);
if (ret == -FI_EAGAIN)
goto err_cq;
return;

if (OFI_UNLIKELY(ret < 0)) {
ret = (ret == FI_EAVAIL) ?
efa_cq_readerr(&cq->cq_fid, &cq_err_entry, flags) :
-FI_EAVAIL;
if (OFI_UNLIKELY(ret < 0)) {
if (OFI_UNLIKELY(ret != -FI_EAGAIN))
EFA_WARN(FI_LOG_CQ,
"failed to read cq error: %ld\n", ret);
goto err_cq;
if (OFI_UNLIKELY(ret != -FI_EAVAIL)) {
EFA_WARN(FI_LOG_CQ, "no error available errno: %ld\n", ret);
efa_eq_write_error(&ep->util_ep, FI_EOTHER, ret);
return;
}

err = efa_cq_readerr(&cq->cq_fid, &cq_err_entry, flags);
if (OFI_UNLIKELY(err < 0)) {
EFA_WARN(FI_LOG_CQ, "unable to read error entry errno: %ld\n", err);
efa_eq_write_error(&ep->util_ep, FI_EOTHER, err);
return;
}

ofi_cq_write_error(cq, &cq_err_entry);
goto err_cq;
return;
}

temp_cq_entry = (struct fi_cq_tagged_entry *)cq_entry;
Expand All @@ -476,7 +484,6 @@ static void efa_ep_progress_internal(struct efa_cq *efa_cq, uint64_t flags)
temp_cq_entry = (struct fi_cq_tagged_entry *)
((uint8_t *)temp_cq_entry + efa_cq->entry_size);
}
err_cq:
return;
}

Expand All @@ -493,10 +500,10 @@ void efa_ep_progress(struct util_ep *ep)
fastlock_acquire(&ep->lock);

if (rcq)
efa_ep_progress_internal(rcq, ep->caps);
efa_ep_progress_internal(efa_ep, rcq);

if (scq && scq != rcq)
efa_ep_progress_internal(scq, ep->caps);
efa_ep_progress_internal(efa_ep, scq);

fastlock_release(&ep->lock);
}
Expand Down

0 comments on commit c8d350c

Please sign in to comment.