Skip to content

Commit

Permalink
prov/efa: Squash completion for ctrl pkts when RNR
Browse files Browse the repository at this point in the history
When hitting RNR and resource management is turned off, efa provider
currently writes cq error for any non-handshake pkt type. However,
efa provider still have other pkts for ctrl purpose, like the rtw
pkt used to trigger a handshake. In this case libfabric should
still queue the pkt instead of writing cq error.

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Nov 13, 2024
1 parent f61cb8a commit 062b935
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 1 deletion.
2 changes: 2 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ struct efa_rdm_ope *efa_rdm_ep_alloc_txe(struct efa_rdm_ep *efa_rdm_ep,
txe->tag = tag;
}

txe->internal_flags |= EFA_RDM_OPE_CTRL;
dlist_insert_tail(&txe->ep_entry, &efa_rdm_ep->txe_list);
return txe;
}
Expand Down Expand Up @@ -601,6 +602,7 @@ ssize_t efa_rdm_ep_post_handshake(struct efa_rdm_ep *ep, struct efa_rdm_peer *pe
* reset to desired flags (remove things like FI_DELIVERY_COMPLETE, and FI_COMPLETION)
*/
txe->fi_flags = EFA_RDM_TXE_NO_COMPLETION | EFA_RDM_TXE_NO_COUNTER;
txe->internal_flags |= EFA_RDM_OPE_CTRL;

pkt_entry = efa_rdm_pke_alloc(ep, ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL);
if (OFI_UNLIKELY(!pkt_entry)) {
Expand Down
6 changes: 6 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ope.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ void efa_rdm_rxe_release_internal(struct efa_rdm_ope *rxe);
*/
#define EFA_RDM_OPE_QUEUED_BEFORE_HANDSHAKE BIT_ULL(14)

/**
* @brief flag to indicate that the ope was created for posting
* control purpose and wasn't associated to application request
*/
#define EFA_RDM_OPE_CTRL BIT_ULL(15)

#define EFA_RDM_OPE_QUEUED_FLAGS (EFA_RDM_OPE_QUEUED_RNR | EFA_RDM_OPE_QUEUED_CTRL | EFA_RDM_OPE_QUEUED_READ | EFA_RDM_OPE_QUEUED_BEFORE_HANDSHAKE)

void efa_rdm_ope_try_fill_desc(struct efa_rdm_ope *ope, int mr_iov_start, uint64_t access);
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_pke_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ void efa_rdm_pke_handle_tx_error(struct efa_rdm_pke *pkt_entry, int prov_errno)
}

if (prov_errno == EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR) {
if (ep->handle_resource_management == FI_RM_DISABLED) {
if (ep->handle_resource_management == FI_RM_DISABLED && !(txe->internal_flags & EFA_RDM_OPE_CTRL)) {
/*
* Write an error to the application for RNR when resource
* management is disabled.
Expand Down

0 comments on commit 062b935

Please sign in to comment.