From b266f141193521bfae31c9b92f9c3aa53752b85d Mon Sep 17 00:00:00 2001 From: Darryl Abbate Date: Tue, 19 Mar 2024 11:16:12 -0700 Subject: [PATCH] prov/efa: Propagate errnos from core functions untouched This is a best-effort attempt at propagating core Libfabric error codes upwards wherever possible. Signed-off-by: Darryl Abbate --- prov/efa/src/dgram/efa_dgram_ep.c | 2 +- prov/efa/src/rdm/efa_rdm_cq.c | 5 ++--- prov/efa/src/rdm/efa_rdm_ep_progress.c | 2 +- prov/efa/src/rdm/efa_rdm_ep_utils.c | 2 +- prov/efa/src/rdm/efa_rdm_pke_cmd.c | 4 ++-- prov/efa/src/rdm/efa_rdm_pke_nonreq.c | 2 +- prov/efa/src/rdm/efa_rdm_pke_rta.c | 6 +++--- prov/efa/src/rdm/efa_rdm_pke_rtm.c | 4 ++-- prov/efa/src/rdm/efa_rdm_pke_rtr.c | 6 +++--- prov/efa/src/rdm/efa_rdm_pke_rtw.c | 12 ++++++------ prov/efa/src/rdm/efa_rdm_rma.c | 2 +- 11 files changed, 23 insertions(+), 24 deletions(-) diff --git a/prov/efa/src/dgram/efa_dgram_ep.c b/prov/efa/src/dgram/efa_dgram_ep.c index 8af66cb719c..c02bf3556ae 100644 --- a/prov/efa/src/dgram/efa_dgram_ep.c +++ b/prov/efa/src/dgram/efa_dgram_ep.c @@ -303,7 +303,7 @@ static void efa_dgram_ep_progress_internal(struct efa_dgram_ep *ep, struct efa_d if (OFI_UNLIKELY(ret < 0)) { if (OFI_UNLIKELY(ret != -FI_EAVAIL)) { EFA_WARN(FI_LOG_CQ, "no error available errno: %ld\n", ret); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_DGRAM_CQ_READ); + efa_base_ep_write_eq_error(&ep->base_ep, -ret, FI_EFA_ERR_DGRAM_CQ_READ); return; } diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index 588d6de26a2..436e52061ef 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -115,7 +115,7 @@ void efa_rdm_cq_proc_ibv_recv_rdma_with_imm_completion( EFA_WARN(FI_LOG_CQ, "Unable to write a cq entry for remote for RECV_RDMA operation: %s\n", fi_strerror(-ret)); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_WRITE_SHM_CQ_ENTRY); + efa_base_ep_write_eq_error(&ep->base_ep, -ret, FI_EFA_ERR_WRITE_SHM_CQ_ENTRY); } efa_cntr_report_rx_completion(&ep->base_ep.util_ep, flags); @@ -507,8 +507,7 @@ int efa_rdm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, ret = efa_cq_ibv_cq_ex_open(attr, efa_domain->device->ibv_ctx, &cq->ibv_cq.ibv_cq_ex, &cq->ibv_cq.ibv_cq_ex_type); if (ret) { - EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %d\n", ret); - ret = -FI_EINVAL; + EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %s\n", fi_strerror(ret)); goto close_util_cq; } diff --git a/prov/efa/src/rdm/efa_rdm_ep_progress.c b/prov/efa/src/rdm/efa_rdm_ep_progress.c index 850c67464bf..ec0323418c8 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_progress.c +++ b/prov/efa/src/rdm/efa_rdm_ep_progress.c @@ -344,7 +344,7 @@ void efa_rdm_ep_progress_internal(struct efa_rdm_ep *ep) EFA_WARN(FI_LOG_EP_CTRL, "Failed to post HANDSHAKE to peer %ld: %s\n", peer->efa_fiaddr, fi_strerror(-ret)); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PEER_HANDSHAKE); + efa_base_ep_write_eq_error(&ep->base_ep, -ret, FI_EFA_ERR_PEER_HANDSHAKE); return; } diff --git a/prov/efa/src/rdm/efa_rdm_ep_utils.c b/prov/efa/src/rdm/efa_rdm_ep_utils.c index 2304babd71d..32d494c7c34 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_utils.c +++ b/prov/efa/src/rdm/efa_rdm_ep_utils.c @@ -653,7 +653,7 @@ void efa_rdm_ep_post_handshake_or_queue(struct efa_rdm_ep *ep, struct efa_rdm_pe EFA_WARN(FI_LOG_EP_CTRL, "Failed to post HANDSHAKE to peer %ld: %s\n", peer->efa_fiaddr, fi_strerror(-err)); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PEER_HANDSHAKE); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_PEER_HANDSHAKE); return; } diff --git a/prov/efa/src/rdm/efa_rdm_pke_cmd.c b/prov/efa/src/rdm/efa_rdm_pke_cmd.c index fa1fdf1f155..9a3a4d10164 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_cmd.c +++ b/prov/efa/src/rdm/efa_rdm_pke_cmd.c @@ -745,7 +745,7 @@ fi_addr_t efa_rdm_pke_insert_addr(struct efa_rdm_pke *pkt_entry, void *raw_addr) ret = efa_av_insert_one(ep->base_ep.av, (struct efa_ep_addr *)raw_addr, &rdm_addr, 0, NULL, false); if (OFI_UNLIKELY(ret != 0)) { - efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_AV_INSERT); + efa_base_ep_write_eq_error(&ep->base_ep, ret, FI_EFA_ERR_AV_INSERT); return -1; } @@ -910,7 +910,7 @@ void efa_rdm_pke_handle_recv_completion(struct efa_rdm_pke *pkt_entry) "Peer %d is requesting feature %d, which this EP does not support.\n", (int)pkt_entry->addr, base_hdr->type); - assert(0 && "invalid REQ packe type"); + assert(0 && "invalid REQ packet type"); efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_INVALID_PKT_TYPE); efa_rdm_pke_release_rx(pkt_entry); return; diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c index 33b520174f4..d0b7d9bd6cc 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c @@ -806,7 +806,7 @@ void efa_rdm_pke_handle_atomrsp_recv(struct efa_rdm_pke *pkt_entry) txe->atomic_ex.resp_iov_count, atomrsp_pkt->data, atomrsp_hdr->seg_length); if (OFI_UNLIKELY(ret < 0)) { - efa_base_ep_write_eq_error(&pkt_entry->ep->base_ep, FI_EMSGSIZE, EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH); + efa_base_ep_write_eq_error(&pkt_entry->ep->base_ep, -ret, EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH); return; } diff --git a/prov/efa/src/rdm/efa_rdm_pke_rta.c b/prov/efa/src/rdm/efa_rdm_pke_rta.c index 3b72e02fb25..3fe95ab52f3 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rta.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rta.c @@ -520,7 +520,7 @@ int efa_rdm_pke_proc_compare_rta(struct efa_rdm_pke *pkt_entry) dt = rxe->atomic_hdr.datatype; dtsize = ofi_datatype_size(rxe->atomic_hdr.datatype); if (OFI_UNLIKELY(!dtsize)) { - efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_INVALID_DATATYPE); + efa_base_ep_write_eq_error(&ep->base_ep, errno, FI_EFA_ERR_INVALID_DATATYPE); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return -errno; @@ -551,7 +551,7 @@ int efa_rdm_pke_proc_compare_rta(struct efa_rdm_pke *pkt_entry) err = efa_rdm_ope_post_send_or_queue(rxe, EFA_RDM_ATOMRSP_PKT); if (OFI_UNLIKELY(err)) { - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PKT_POST); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_PKT_POST); ofi_buf_free(rxe->atomrsp_data); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); @@ -560,4 +560,4 @@ int efa_rdm_pke_proc_compare_rta(struct efa_rdm_pke *pkt_entry) efa_rdm_pke_release_rx(pkt_entry); return 0; -} \ No newline at end of file +} diff --git a/prov/efa/src/rdm/efa_rdm_pke_rtm.c b/prov/efa/src/rdm/efa_rdm_pke_rtm.c index 2c88cbef027..a4ed52e6264 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rtm.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rtm.c @@ -475,7 +475,7 @@ void efa_rdm_pke_handle_rtm_rta_recv(struct efa_rdm_pke *pkt_entry) "Invalid msg_id: %" PRIu32 " robuf->exp_msg_id: %" PRIu32 "\n", msg_id, peer->robuf.exp_msg_id); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PKT_ALREADY_PROCESSED); + efa_base_ep_write_eq_error(&ep->base_ep, ret, FI_EFA_ERR_PKT_ALREADY_PROCESSED); efa_rdm_pke_release_rx(pkt_entry); return; } @@ -489,7 +489,7 @@ void efa_rdm_pke_handle_rtm_rta_recv(struct efa_rdm_pke *pkt_entry) EFA_WARN(FI_LOG_EP_CTRL, "Unknown error %d processing REQ packet msg_id: %" PRIu32 "\n", ret, msg_id); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_OTHER); + efa_base_ep_write_eq_error(&ep->base_ep, ret, FI_EFA_ERR_OTHER); return; } diff --git a/prov/efa/src/rdm/efa_rdm_pke_rtr.c b/prov/efa/src/rdm/efa_rdm_pke_rtr.c index a8d6847df21..2ad5718865d 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rtr.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rtr.c @@ -102,7 +102,7 @@ void efa_rdm_pke_handle_rtr_recv(struct efa_rdm_pke *pkt_entry) FI_REMOTE_READ, rxe->iov, rxe->desc); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "RMA address verification failed!\n"); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RMA_ADDR); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return; @@ -116,11 +116,11 @@ void efa_rdm_pke_handle_rtr_recv(struct efa_rdm_pke *pkt_entry) err = efa_rdm_ope_post_send_or_queue(rxe, EFA_RDM_READRSP_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "Posting of readrsp packet failed! err=%ld\n", err); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_PKT_POST); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_PKT_POST); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return; } efa_rdm_pke_release_rx(pkt_entry); -} \ No newline at end of file +} diff --git a/prov/efa/src/rdm/efa_rdm_pke_rtw.c b/prov/efa/src/rdm/efa_rdm_pke_rtw.c index 7b31cb461c0..c7dc43f2490 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rtw.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rtw.c @@ -147,7 +147,7 @@ void efa_rdm_pke_proc_eager_rtw(struct efa_rdm_pke *pkt_entry, if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "RMA address verify failed!\n"); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_RMA_ADDR); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return; @@ -169,7 +169,7 @@ void efa_rdm_pke_proc_eager_rtw(struct efa_rdm_pke *pkt_entry, } else { err = efa_rdm_pke_copy_payload_to_ope(pkt_entry, rxe); if (OFI_UNLIKELY(err)) { - efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RXE_COPY); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RXE_COPY); efa_rdm_pke_release_rx(pkt_entry); efa_rdm_rxe_release(rxe); } @@ -391,7 +391,7 @@ void efa_rdm_pke_handle_longcts_rtw_recv(struct efa_rdm_pke *pkt_entry) FI_REMOTE_WRITE, rxe->iov, rxe->desc); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "RMA address verify failed!\n"); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EIO, FI_EFA_ERR_RMA_ADDR); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return; @@ -414,7 +414,7 @@ void efa_rdm_pke_handle_longcts_rtw_recv(struct efa_rdm_pke *pkt_entry) } else { err = efa_rdm_pke_copy_payload_to_ope(pkt_entry, rxe); if (OFI_UNLIKELY(err)) { - efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RXE_COPY); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RXE_COPY); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return; @@ -539,7 +539,7 @@ void efa_rdm_pke_handle_longread_rtw_recv(struct efa_rdm_pke *pkt_entry) FI_REMOTE_WRITE, rxe->iov, rxe->desc); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "RMA address verify failed!\n"); - efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_RMA_ADDR); + efa_base_ep_write_eq_error(&ep->base_ep, err, FI_EFA_ERR_RMA_ADDR); efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); return; @@ -567,4 +567,4 @@ void efa_rdm_pke_handle_longread_rtw_recv(struct efa_rdm_pke *pkt_entry) efa_rdm_rxe_release(rxe); efa_rdm_pke_release_rx(pkt_entry); } -} \ No newline at end of file +} diff --git a/prov/efa/src/rdm/efa_rdm_rma.c b/prov/efa/src/rdm/efa_rdm_rma.c index b1be5c5d182..c4328e33a9e 100644 --- a/prov/efa/src/rdm/efa_rdm_rma.c +++ b/prov/efa/src/rdm/efa_rdm_rma.c @@ -33,7 +33,7 @@ int efa_rdm_rma_verified_copy_iov(struct efa_rdm_ep *ep, struct efa_rma_iov *rma EFA_WARN(FI_LOG_EP_CTRL, "MR verification failed (%s), addr: %lx key: %ld\n", fi_strerror(-ret), rma[i].addr, rma[i].key); - return -FI_EACCES; + return ret; } iov[i].iov_base = (void *)rma[i].addr;