Skip to content

Commit

Permalink
prov/efa: Propagate errnos from core functions untouched
Browse files Browse the repository at this point in the history
This is a best-effort attempt at propagating core Libfabric error codes
upwards wherever possible.

Signed-off-by: Darryl Abbate <[email protected]>
  • Loading branch information
darrylabbate authored and j-xiong committed Mar 21, 2024
1 parent 87a1006 commit b266f14
Show file tree
Hide file tree
Showing 11 changed files with 23 additions and 24 deletions.
2 changes: 1 addition & 1 deletion prov/efa/src/dgram/efa_dgram_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ static void efa_dgram_ep_progress_internal(struct efa_dgram_ep *ep, struct efa_d
if (OFI_UNLIKELY(ret < 0)) {
if (OFI_UNLIKELY(ret != -FI_EAVAIL)) {
EFA_WARN(FI_LOG_CQ, "no error available errno: %ld\n", ret);
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_DGRAM_CQ_READ);
efa_base_ep_write_eq_error(&ep->base_ep, -ret, FI_EFA_ERR_DGRAM_CQ_READ);
return;
}

Expand Down
5 changes: 2 additions & 3 deletions prov/efa/src/rdm/efa_rdm_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void efa_rdm_cq_proc_ibv_recv_rdma_with_imm_completion(
EFA_WARN(FI_LOG_CQ,
"Unable to write a cq entry for remote for RECV_RDMA operation: %s\n",
fi_strerror(-ret));
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_WRITE_SHM_CQ_ENTRY);
efa_base_ep_write_eq_error(&ep->base_ep, -ret, FI_EFA_ERR_WRITE_SHM_CQ_ENTRY);
}

efa_cntr_report_rx_completion(&ep->base_ep.util_ep, flags);
Expand Down Expand Up @@ -507,8 +507,7 @@ int efa_rdm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,

ret = efa_cq_ibv_cq_ex_open(attr, efa_domain->device->ibv_ctx, &cq->ibv_cq.ibv_cq_ex, &cq->ibv_cq.ibv_cq_ex_type);
if (ret) {
EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %d\n", ret);
ret = -FI_EINVAL;
EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %s\n", fi_strerror(ret));
goto close_util_cq;
}

Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_ep_progress.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ void efa_rdm_ep_progress_internal(struct efa_rdm_ep *ep)
EFA_WARN(FI_LOG_EP_CTRL,
"Failed to post HANDSHAKE to peer %ld: %s\n",
peer->efa_fiaddr, fi_strerror(-ret));
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PEER_HANDSHAKE);
efa_base_ep_write_eq_error(&ep->base_ep, -ret, FI_EFA_ERR_PEER_HANDSHAKE);
return;
}

Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_ep_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ void efa_rdm_ep_post_handshake_or_queue(struct efa_rdm_ep *ep, struct efa_rdm_pe
EFA_WARN(FI_LOG_EP_CTRL,
"Failed to post HANDSHAKE to peer %ld: %s\n",
peer->efa_fiaddr, fi_strerror(-err));
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PEER_HANDSHAKE);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_PEER_HANDSHAKE);
return;
}

Expand Down
4 changes: 2 additions & 2 deletions prov/efa/src/rdm/efa_rdm_pke_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ fi_addr_t efa_rdm_pke_insert_addr(struct efa_rdm_pke *pkt_entry, void *raw_addr)
ret = efa_av_insert_one(ep->base_ep.av, (struct efa_ep_addr *)raw_addr,
&rdm_addr, 0, NULL, false);
if (OFI_UNLIKELY(ret != 0)) {
efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_AV_INSERT);
efa_base_ep_write_eq_error(&ep->base_ep, ret, FI_EFA_ERR_AV_INSERT);
return -1;
}

Expand Down Expand Up @@ -910,7 +910,7 @@ void efa_rdm_pke_handle_recv_completion(struct efa_rdm_pke *pkt_entry)
"Peer %d is requesting feature %d, which this EP does not support.\n",
(int)pkt_entry->addr, base_hdr->type);

assert(0 && "invalid REQ packe type");
assert(0 && "invalid REQ packet type");
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_INVALID_PKT_TYPE);
efa_rdm_pke_release_rx(pkt_entry);
return;
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_pke_nonreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ void efa_rdm_pke_handle_atomrsp_recv(struct efa_rdm_pke *pkt_entry)
txe->atomic_ex.resp_iov_count, atomrsp_pkt->data,
atomrsp_hdr->seg_length);
if (OFI_UNLIKELY(ret < 0)) {
efa_base_ep_write_eq_error(&pkt_entry->ep->base_ep, FI_EMSGSIZE, EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH);
efa_base_ep_write_eq_error(&pkt_entry->ep->base_ep, -ret, EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH);
return;
}

Expand Down
6 changes: 3 additions & 3 deletions prov/efa/src/rdm/efa_rdm_pke_rta.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ int efa_rdm_pke_proc_compare_rta(struct efa_rdm_pke *pkt_entry)
dt = rxe->atomic_hdr.datatype;
dtsize = ofi_datatype_size(rxe->atomic_hdr.datatype);
if (OFI_UNLIKELY(!dtsize)) {
efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_INVALID_DATATYPE);
efa_base_ep_write_eq_error(&ep->base_ep, errno, FI_EFA_ERR_INVALID_DATATYPE);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return -errno;
Expand Down Expand Up @@ -551,7 +551,7 @@ int efa_rdm_pke_proc_compare_rta(struct efa_rdm_pke *pkt_entry)

err = efa_rdm_ope_post_send_or_queue(rxe, EFA_RDM_ATOMRSP_PKT);
if (OFI_UNLIKELY(err)) {
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PKT_POST);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_PKT_POST);
ofi_buf_free(rxe->atomrsp_data);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
Expand All @@ -560,4 +560,4 @@ int efa_rdm_pke_proc_compare_rta(struct efa_rdm_pke *pkt_entry)

efa_rdm_pke_release_rx(pkt_entry);
return 0;
}
}
4 changes: 2 additions & 2 deletions prov/efa/src/rdm/efa_rdm_pke_rtm.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ void efa_rdm_pke_handle_rtm_rta_recv(struct efa_rdm_pke *pkt_entry)
"Invalid msg_id: %" PRIu32
" robuf->exp_msg_id: %" PRIu32 "\n",
msg_id, peer->robuf.exp_msg_id);
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PKT_ALREADY_PROCESSED);
efa_base_ep_write_eq_error(&ep->base_ep, ret, FI_EFA_ERR_PKT_ALREADY_PROCESSED);
efa_rdm_pke_release_rx(pkt_entry);
return;
}
Expand All @@ -489,7 +489,7 @@ void efa_rdm_pke_handle_rtm_rta_recv(struct efa_rdm_pke *pkt_entry)
EFA_WARN(FI_LOG_EP_CTRL,
"Unknown error %d processing REQ packet msg_id: %"
PRIu32 "\n", ret, msg_id);
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_OTHER);
efa_base_ep_write_eq_error(&ep->base_ep, ret, FI_EFA_ERR_OTHER);
return;
}

Expand Down
6 changes: 3 additions & 3 deletions prov/efa/src/rdm/efa_rdm_pke_rtr.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ void efa_rdm_pke_handle_rtr_recv(struct efa_rdm_pke *pkt_entry)
FI_REMOTE_READ, rxe->iov, rxe->desc);
if (OFI_UNLIKELY(err)) {
EFA_WARN(FI_LOG_CQ, "RMA address verification failed!\n");
efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RMA_ADDR);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return;
Expand All @@ -116,11 +116,11 @@ void efa_rdm_pke_handle_rtr_recv(struct efa_rdm_pke *pkt_entry)
err = efa_rdm_ope_post_send_or_queue(rxe, EFA_RDM_READRSP_PKT);
if (OFI_UNLIKELY(err)) {
EFA_WARN(FI_LOG_CQ, "Posting of readrsp packet failed! err=%ld\n", err);
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PKT_POST);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_PKT_POST);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return;
}

efa_rdm_pke_release_rx(pkt_entry);
}
}
12 changes: 6 additions & 6 deletions prov/efa/src/rdm/efa_rdm_pke_rtw.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ void efa_rdm_pke_proc_eager_rtw(struct efa_rdm_pke *pkt_entry,

if (OFI_UNLIKELY(err)) {
EFA_WARN(FI_LOG_CQ, "RMA address verify failed!\n");
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_RMA_ADDR);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return;
Expand All @@ -169,7 +169,7 @@ void efa_rdm_pke_proc_eager_rtw(struct efa_rdm_pke *pkt_entry,
} else {
err = efa_rdm_pke_copy_payload_to_ope(pkt_entry, rxe);
if (OFI_UNLIKELY(err)) {
efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RXE_COPY);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RXE_COPY);
efa_rdm_pke_release_rx(pkt_entry);
efa_rdm_rxe_release(rxe);
}
Expand Down Expand Up @@ -391,7 +391,7 @@ void efa_rdm_pke_handle_longcts_rtw_recv(struct efa_rdm_pke *pkt_entry)
FI_REMOTE_WRITE, rxe->iov, rxe->desc);
if (OFI_UNLIKELY(err)) {
EFA_WARN(FI_LOG_CQ, "RMA address verify failed!\n");
efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_RMA_ADDR);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return;
Expand All @@ -414,7 +414,7 @@ void efa_rdm_pke_handle_longcts_rtw_recv(struct efa_rdm_pke *pkt_entry)
} else {
err = efa_rdm_pke_copy_payload_to_ope(pkt_entry, rxe);
if (OFI_UNLIKELY(err)) {
efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RXE_COPY);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RXE_COPY);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return;
Expand Down Expand Up @@ -539,7 +539,7 @@ void efa_rdm_pke_handle_longread_rtw_recv(struct efa_rdm_pke *pkt_entry)
FI_REMOTE_WRITE, rxe->iov, rxe->desc);
if (OFI_UNLIKELY(err)) {
EFA_WARN(FI_LOG_CQ, "RMA address verify failed!\n");
efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RMA_ADDR);
efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR);
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
return;
Expand Down Expand Up @@ -567,4 +567,4 @@ void efa_rdm_pke_handle_longread_rtw_recv(struct efa_rdm_pke *pkt_entry)
efa_rdm_rxe_release(rxe);
efa_rdm_pke_release_rx(pkt_entry);
}
}
}
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ int efa_rdm_rma_verified_copy_iov(struct efa_rdm_ep *ep, struct efa_rma_iov *rma
EFA_WARN(FI_LOG_EP_CTRL,
"MR verification failed (%s), addr: %lx key: %ld\n",
fi_strerror(-ret), rma[i].addr, rma[i].key);
return -FI_EACCES;
return ret;
}

iov[i].iov_base = (void *)rma[i].addr;
Expand Down

0 comments on commit b266f14

Please sign in to comment.