From 2fa798c06546ffec23e0773916a2a86ec86ea42b Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Fri, 17 Mar 2023 03:53:30 +0000 Subject: [PATCH 1/3] prov/efa: Directly call fi_readmsg and fi_writemsg for SHM Signed-off-by: Sai Sunku --- prov/efa/src/rdm/rxr_rma.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/prov/efa/src/rdm/rxr_rma.c b/prov/efa/src/rdm/rxr_rma.c index d36b298ea7c..36a3ba9b994 100644 --- a/prov/efa/src/rdm/rxr_rma.c +++ b/prov/efa/src/rdm/rxr_rma.c @@ -194,6 +194,8 @@ ssize_t rxr_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_ struct rxr_ep *rxr_ep; struct efa_rdm_peer *peer; struct rxr_op_entry *tx_entry = NULL; + fi_addr_t tmp_addr; + struct fi_msg_rma *msg_clone; bool use_lower_ep_read; EFA_DBG(FI_LOG_EP_DATA, @@ -219,6 +221,16 @@ ssize_t rxr_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_ err = -FI_EAGAIN; goto out; } + + if (peer->is_local && rxr_ep->use_shm_for_tx) { + tmp_addr = msg->addr; + msg_clone = (struct fi_msg_rma *)msg; + msg_clone->addr = peer->shm_fiaddr; + err = fi_readmsg(rxr_ep->shm_ep, msg_clone, flags); + msg_clone->addr = tmp_addr; + goto out; + } + tx_entry = rxr_rma_alloc_tx_entry(rxr_ep, msg, ofi_op_read_req, flags); if (OFI_UNLIKELY(!tx_entry)) { rxr_ep_progress_internal(rxr_ep); @@ -227,9 +239,7 @@ ssize_t rxr_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_ } use_lower_ep_read = false; - if (peer->is_local && rxr_ep->use_shm_for_tx) { - use_lower_ep_read = true; - } else if (efa_both_support_rdma_read(rxr_ep, peer)) { + if (efa_both_support_rdma_read(rxr_ep, peer)) { /* efa_both_support_rdma_read also check domain.use_device_rdma, * so we do not check it here */ @@ -444,6 +454,8 @@ ssize_t rxr_rma_writemsg(struct fid_ep *ep, struct efa_rdm_peer *peer; struct rxr_ep *rxr_ep; struct rxr_op_entry *tx_entry; + fi_addr_t tmp_addr; + struct fi_msg_rma *msg_clone; EFA_DBG(FI_LOG_EP_DATA, "write iov_len %lu flags: %lx\n", @@ -464,6 +476,15 @@ ssize_t rxr_rma_writemsg(struct fid_ep *ep, goto out; } + if (peer->is_local && rxr_ep->use_shm_for_tx) { + tmp_addr = msg->addr; + msg_clone = (struct fi_msg_rma *)msg; + msg_clone->addr = peer->shm_fiaddr; + err = fi_writemsg(rxr_ep->shm_ep, msg, flags); + msg_clone->addr = tmp_addr; + goto out; + } + tx_entry = rxr_rma_alloc_tx_entry(rxr_ep, msg, ofi_op_write, flags); if (OFI_UNLIKELY(!tx_entry)) { rxr_ep_progress_internal(rxr_ep); From 8ab13d22ae6866d455a972097071a0ecd2dc9289 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Fri, 17 Mar 2023 15:40:55 +0000 Subject: [PATCH 2/3] prov/efa: Clean up unused SHM code part 1 Signed-off-by: Sai Sunku --- prov/efa/src/rdm/rxr_ep.c | 9 +-- prov/efa/src/rdm/rxr_op_entry.c | 71 +++-------------- prov/efa/src/rdm/rxr_pkt_cmd.c | 6 +- prov/efa/src/rdm/rxr_pkt_entry.c | 114 ++++++++++++--------------- prov/efa/src/rdm/rxr_pkt_type_misc.c | 5 +- prov/efa/src/rdm/rxr_rma.c | 56 ------------- 6 files changed, 69 insertions(+), 192 deletions(-) diff --git a/prov/efa/src/rdm/rxr_ep.c b/prov/efa/src/rdm/rxr_ep.c index ce27e73bed0..21944a3ab65 100644 --- a/prov/efa/src/rdm/rxr_ep.c +++ b/prov/efa/src/rdm/rxr_ep.c @@ -782,12 +782,13 @@ void rxr_ep_set_use_shm_for_tx(struct rxr_ep *ep) /* App provided hints supercede environmental variables. * - * Using the shm provider comes with some overheads, particularly in the - * progress engine when polling an empty completion queue, so avoid + * Using the shm provider comes with some overheads, so avoid * initializing the provider if the app provides a hint that it does not * require node-local communication. We can still loopback over the EFA * device in cases where the app violates the hint and continues * communicating with node-local peers. + * + * aws-ofi-nccl relies on this feature. */ if (ep->user_info /* If the app requires explicitly remote communication */ @@ -2067,9 +2068,7 @@ void rxr_ep_progress_internal(struct rxr_ep *ep) * The core's TX queue is full so we can't do any * additional work. */ - bool use_shm = peer->is_local && ep->use_shm_for_tx; - - if (!use_shm && ep->efa_outstanding_tx_ops == ep->efa_max_outstanding_tx_ops) + if (ep->efa_outstanding_tx_ops == ep->efa_max_outstanding_tx_ops) goto out; ret = rxr_op_entry_post_remote_read(op_entry); diff --git a/prov/efa/src/rdm/rxr_op_entry.c b/prov/efa/src/rdm/rxr_op_entry.c index f3db8eb1559..96158963c8f 100644 --- a/prov/efa/src/rdm/rxr_op_entry.c +++ b/prov/efa/src/rdm/rxr_op_entry.c @@ -225,10 +225,10 @@ void rxr_rx_entry_release(struct rxr_op_entry *rx_entry) * user's data buffer is on host memory (Though user can register * its buffer, and provide its descriptor as an optimization). * - * However, there are a few occations that EFA device and shm + * However, there are a few occations that EFA device * require memory to be register with them: * - * First, when EFA device is used to send data: + * When EFA device is used to send data: * * If a non-read based protocol (such as eager, meidum, longcts) * is used, the send buffer must be registered with EFA device. @@ -236,14 +236,6 @@ void rxr_rx_entry_release(struct rxr_op_entry *rx_entry) * If a read based protocol is used, both send buffer * and receive buffer must be registered with EFA device. * - * Second, when shm is used: - * If eager protocol is used, no registration is needed (because - * shm does not require registration for local buffer) - * - * If a read based protocol is used, the send buffer must - * be registered with shm, because send buffer is used as - * remote buffer in a read based protocol. - * * Therefore, when user did not provide descriptors for the buffer(s), * EFA provider need to bridge the gap. * @@ -258,9 +250,7 @@ void rxr_rx_entry_release(struct rxr_op_entry *rx_entry) * Because of the high cost of memory registration, this happens * only when MR cache is available, which is checked by the caller * of this function on sender side. (this happens when - * - * 1. EFA device is used with non-eager protocols and - * 2. SHM is used with long read protocol + * EFA device is used with non-eager protocols and * * This function is not guaranteed to fill all descriptors (which * is why the function name has try). When memory registration fail due @@ -287,36 +277,15 @@ void rxr_rx_entry_release(struct rxr_op_entry *rx_entry) void rxr_op_entry_try_fill_desc(struct rxr_op_entry *op_entry, int mr_iov_start, uint64_t access) { int i, err; - struct efa_rdm_peer *peer; - - peer = rxr_ep_get_peer(op_entry->ep, op_entry->addr); for (i = mr_iov_start; i < op_entry->iov_count; ++i) { if (op_entry->desc[i]) continue; - - if (peer->is_local && op_entry->ep->use_shm_for_tx) { - if (access == FI_REMOTE_READ) { - /* this happens when longread protocol message protocl was used - * with shm. The send buffer is going to be read by receiver, - * therefore must be registered with shm provider. - */ - assert(op_entry->type == RXR_TX_ENTRY); - err = efa_mr_reg_shm(&rxr_ep_domain(op_entry->ep)->util_domain.domain_fid, - op_entry->iov + i, - access, &op_entry->mr[i]); - } else { - assert(access == FI_SEND || access == FI_RECV); - /* shm does not require registration for send and recv */ - err = 0; - } - } else { - err = fi_mr_regv(&rxr_ep_domain(op_entry->ep)->util_domain.domain_fid, - op_entry->iov + i, 1, - access, - 0, 0, 0, &op_entry->mr[i], NULL); - } + err = fi_mr_regv( + &rxr_ep_domain(op_entry->ep)->util_domain.domain_fid, + op_entry->iov + i, 1, access, 0, 0, 0, &op_entry->mr[i], + NULL); if (err) { EFA_WARN(FI_LOG_EP_CTRL, @@ -441,10 +410,6 @@ size_t rxr_tx_entry_max_req_data_capacity(struct rxr_ep *ep, struct rxr_op_entry peer = rxr_ep_get_peer(ep, tx_entry->addr); assert(peer); - if (peer->is_local && ep->use_shm_for_tx) { - return rxr_env.shm_max_medium_size; - } - if (efa_rdm_peer_need_raw_addr_hdr(peer)) header_flags |= RXR_REQ_OPT_RAW_ADDR_HDR; else if (efa_rdm_peer_need_connid(peer)) @@ -1254,17 +1219,13 @@ int rxr_op_entry_post_remote_read(struct rxr_op_entry *op_entry) int iov_idx = 0, rma_iov_idx = 0; size_t iov_offset = 0, rma_iov_offset = 0; size_t read_once_len, max_read_once_len; - bool use_shm; struct rxr_ep *ep; - struct efa_rdm_peer *peer; struct rxr_pkt_entry *pkt_entry; assert(op_entry->iov_count > 0); assert(op_entry->rma_iov_count > 0); ep = op_entry->ep; - peer = rxr_ep_get_peer(ep, op_entry->addr); - use_shm = peer->is_local && ep->use_shm_for_tx; if (op_entry->bytes_read_total_len == 0) { @@ -1275,10 +1236,7 @@ int rxr_op_entry_post_remote_read(struct rxr_op_entry *op_entry) * Note that because send operation used a pkt_entry as wr_id, * we had to use a pkt_entry as context for read too. */ - if (use_shm) - pkt_entry = rxr_pkt_entry_alloc(ep, ep->shm_tx_pkt_pool, RXR_PKT_FROM_SHM_TX_POOL); - else - pkt_entry = rxr_pkt_entry_alloc(ep, ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); + pkt_entry = rxr_pkt_entry_alloc(ep, ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); if (OFI_UNLIKELY(!pkt_entry)) return -FI_EAGAIN; @@ -1297,11 +1255,9 @@ int rxr_op_entry_post_remote_read(struct rxr_op_entry *op_entry) assert(op_entry->bytes_read_submitted < op_entry->bytes_read_total_len); - if (!use_shm) { - rxr_op_entry_try_fill_desc(op_entry, 0, FI_RECV); - } + rxr_op_entry_try_fill_desc(op_entry, 0, FI_RECV); - max_read_once_len = use_shm ? SIZE_MAX : MIN(rxr_env.efa_read_segment_size, rxr_ep_domain(ep)->device->max_rdma_size); + max_read_once_len = MIN(rxr_env.efa_read_segment_size, rxr_ep_domain(ep)->device->max_rdma_size); assert(max_read_once_len > 0); err = rxr_locate_iov_pos(op_entry->iov, op_entry->iov_count, @@ -1327,7 +1283,6 @@ int rxr_op_entry_post_remote_read(struct rxr_op_entry *op_entry) assert(rma_iov_idx < op_entry->rma_iov_count); assert(rma_iov_offset < op_entry->rma_iov[rma_iov_idx].len); - if (!use_shm) { if (ep->efa_outstanding_tx_ops == ep->efa_max_outstanding_tx_ops) return -FI_EAGAIN; @@ -1340,12 +1295,8 @@ int rxr_op_entry_post_remote_read(struct rxr_op_entry *op_entry) */ return -FI_EAGAIN; } - } - if (use_shm) - pkt_entry = rxr_pkt_entry_alloc(ep, ep->shm_tx_pkt_pool, RXR_PKT_FROM_SHM_TX_POOL); - else - pkt_entry = rxr_pkt_entry_alloc(ep, ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); + pkt_entry = rxr_pkt_entry_alloc(ep, ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); if (OFI_UNLIKELY(!pkt_entry)) return -FI_EAGAIN; diff --git a/prov/efa/src/rdm/rxr_pkt_cmd.c b/prov/efa/src/rdm/rxr_pkt_cmd.c index 01c3fad73c9..c57cf634db3 100644 --- a/prov/efa/src/rdm/rxr_pkt_cmd.c +++ b/prov/efa/src/rdm/rxr_pkt_cmd.c @@ -280,11 +280,7 @@ ssize_t rxr_pkt_post_one(struct rxr_ep *rxr_ep, struct rxr_op_entry *op_entry, addr = op_entry->addr; peer = rxr_ep_get_peer(rxr_ep, addr); assert(peer); - if (peer->is_local && rxr_ep->use_shm_for_tx) { - pkt_entry = rxr_pkt_entry_alloc(rxr_ep, rxr_ep->shm_tx_pkt_pool, RXR_PKT_FROM_SHM_TX_POOL); - } else { - pkt_entry = rxr_pkt_entry_alloc(rxr_ep, rxr_ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); - } + pkt_entry = rxr_pkt_entry_alloc(rxr_ep, rxr_ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); if (!pkt_entry) return -FI_EAGAIN; diff --git a/prov/efa/src/rdm/rxr_pkt_entry.c b/prov/efa/src/rdm/rxr_pkt_entry.c index bf093a45aa5..93da3c5ecff 100644 --- a/prov/efa/src/rdm/rxr_pkt_entry.c +++ b/prov/efa/src/rdm/rxr_pkt_entry.c @@ -464,40 +464,34 @@ int rxr_pkt_entry_read(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry, struct efa_qp *qp; struct efa_conn *conn; struct ibv_sge sge; - bool self_comm; int err = 0; peer = rxr_ep_get_peer(ep, pkt_entry->addr); - if (peer && peer->is_local && ep->use_shm_for_tx) { - err = fi_read(ep->shm_ep, local_buf, len, efa_mr_get_shm_desc(desc), peer->shm_fiaddr, remote_buf, remote_key, pkt_entry); + if (peer == NULL) + pkt_entry->flags |= RXR_PKT_ENTRY_LOCAL_READ; + + qp = ep->base_ep.qp; + ibv_wr_start(qp->ibv_qp_ex); + qp->ibv_qp_ex->wr_id = (uintptr_t)pkt_entry; + ibv_wr_rdma_read(qp->ibv_qp_ex, remote_key, remote_buf); + + sge.addr = (uint64_t)local_buf; + sge.length = len; + sge.lkey = ((struct efa_mr *)desc)->ibv_mr->lkey; + + ibv_wr_set_sge_list(qp->ibv_qp_ex, 1, &sge); + if (peer == NULL) { + ibv_wr_set_ud_addr(qp->ibv_qp_ex, ep->base_ep.self_ah, + qp->qp_num, qp->qkey); } else { - self_comm = (peer == NULL); - if (self_comm) - pkt_entry->flags |= RXR_PKT_ENTRY_LOCAL_READ; - - qp = ep->base_ep.qp; - ibv_wr_start(qp->ibv_qp_ex); - qp->ibv_qp_ex->wr_id = (uintptr_t)pkt_entry; - ibv_wr_rdma_read(qp->ibv_qp_ex, remote_key, remote_buf); - - sge.addr = (uint64_t)local_buf; - sge.length = len; - sge.lkey = ((struct efa_mr *)desc)->ibv_mr->lkey; - - ibv_wr_set_sge_list(qp->ibv_qp_ex, 1, &sge); - if (self_comm) { - ibv_wr_set_ud_addr(qp->ibv_qp_ex, ep->base_ep.self_ah, - qp->qp_num, qp->qkey); - } else { - conn = efa_av_addr_to_conn(ep->base_ep.av, pkt_entry->addr); - assert(conn && conn->ep_addr); - ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, - conn->ep_addr->qpn, conn->ep_addr->qkey); - } - - err = ibv_wr_complete(qp->ibv_qp_ex); + conn = efa_av_addr_to_conn(ep->base_ep.av, pkt_entry->addr); + assert(conn && conn->ep_addr); + ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, + conn->ep_addr->qpn, conn->ep_addr->qkey); } + err = ibv_wr_complete(qp->ibv_qp_ex); + if (OFI_UNLIKELY(err)) return err; @@ -539,50 +533,46 @@ int rxr_pkt_entry_write(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry, rma_context_pkt = (struct rxr_rma_context_pkt *)pkt_entry->wiredata; rma_context_pkt->seg_size = len; - if (peer && peer->is_local && ep->use_shm_for_tx) { - err = fi_write(ep->shm_ep, local_buf, len, efa_mr_get_shm_desc(desc), peer->shm_fiaddr, remote_buf, remote_key, pkt_entry); - } else { - assert(((struct efa_mr *)desc)->ibv_mr); + assert(((struct efa_mr *)desc)->ibv_mr); - self_comm = (peer == NULL); - if (self_comm) - pkt_entry->flags |= RXR_PKT_ENTRY_LOCAL_WRITE; + self_comm = (peer == NULL); + if (self_comm) + pkt_entry->flags |= RXR_PKT_ENTRY_LOCAL_WRITE; - qp = ep->base_ep.qp; - ibv_wr_start(qp->ibv_qp_ex); - qp->ibv_qp_ex->wr_id = (uintptr_t)pkt_entry; + qp = ep->base_ep.qp; + ibv_wr_start(qp->ibv_qp_ex); + qp->ibv_qp_ex->wr_id = (uintptr_t)pkt_entry; - if (tx_entry->fi_flags & FI_REMOTE_CQ_DATA) { - /* assert that we are sending the entire buffer as a + if (tx_entry->fi_flags & FI_REMOTE_CQ_DATA) { + /* assert that we are sending the entire buffer as a single IOV when immediate data is also included. */ - assert( len == tx_entry->bytes_write_total_len ); - ibv_wr_rdma_write_imm(qp->ibv_qp_ex, remote_key, - remote_buf, tx_entry->cq_entry.data); - } else { - ibv_wr_rdma_write(qp->ibv_qp_ex, remote_key, remote_buf); - } + assert(len == tx_entry->bytes_write_total_len); + ibv_wr_rdma_write_imm(qp->ibv_qp_ex, remote_key, remote_buf, + tx_entry->cq_entry.data); + } else { + ibv_wr_rdma_write(qp->ibv_qp_ex, remote_key, remote_buf); + } - sge.addr = (uint64_t)local_buf; - sge.length = len; - sge.lkey = ((struct efa_mr *)desc)->ibv_mr->lkey; + sge.addr = (uint64_t)local_buf; + sge.length = len; + sge.lkey = ((struct efa_mr *)desc)->ibv_mr->lkey; - /* As an optimization, we should consider implementing multiple- + /* As an optimization, we should consider implementing multiple- iov writes using an IBV wr with multiple sge entries. For now, each WR contains only one sge. */ - ibv_wr_set_sge_list(qp->ibv_qp_ex, 1, &sge); - if (self_comm) { - ibv_wr_set_ud_addr(qp->ibv_qp_ex, ep->base_ep.self_ah, - qp->qp_num, qp->qkey); - } else { - conn = efa_av_addr_to_conn(ep->base_ep.av, pkt_entry->addr); - assert(conn && conn->ep_addr); - ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, - conn->ep_addr->qpn, conn->ep_addr->qkey); - } - - err = ibv_wr_complete(qp->ibv_qp_ex); + ibv_wr_set_sge_list(qp->ibv_qp_ex, 1, &sge); + if (self_comm) { + ibv_wr_set_ud_addr(qp->ibv_qp_ex, ep->base_ep.self_ah, + qp->qp_num, qp->qkey); + } else { + conn = efa_av_addr_to_conn(ep->base_ep.av, pkt_entry->addr); + assert(conn && conn->ep_addr); + ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, + conn->ep_addr->qpn, conn->ep_addr->qkey); } + err = ibv_wr_complete(qp->ibv_qp_ex); + if (OFI_UNLIKELY(err)) return err; diff --git a/prov/efa/src/rdm/rxr_pkt_type_misc.c b/prov/efa/src/rdm/rxr_pkt_type_misc.c index 47bb6c9a966..82852b0f0d9 100644 --- a/prov/efa/src/rdm/rxr_pkt_type_misc.c +++ b/prov/efa/src/rdm/rxr_pkt_type_misc.c @@ -98,10 +98,7 @@ ssize_t rxr_pkt_post_handshake(struct rxr_ep *ep, struct efa_rdm_peer *peer) ssize_t ret; addr = peer->efa_fiaddr; - if (peer->is_local && ep->use_shm_for_tx) - pkt_entry = rxr_pkt_entry_alloc(ep, ep->shm_tx_pkt_pool, RXR_PKT_FROM_SHM_TX_POOL); - else - pkt_entry = rxr_pkt_entry_alloc(ep, ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); + pkt_entry = rxr_pkt_entry_alloc(ep, ep->efa_tx_pkt_pool, RXR_PKT_FROM_EFA_TX_POOL); if (OFI_UNLIKELY(!pkt_entry)) return -FI_EAGAIN; diff --git a/prov/efa/src/rdm/rxr_rma.c b/prov/efa/src/rdm/rxr_rma.c index 36a3ba9b994..6c241fed603 100644 --- a/prov/efa/src/rdm/rxr_rma.c +++ b/prov/efa/src/rdm/rxr_rma.c @@ -107,57 +107,6 @@ rxr_rma_alloc_tx_entry(struct rxr_ep *rxr_ep, return tx_entry; } -size_t rxr_rma_post_shm_write(struct rxr_ep *rxr_ep, struct rxr_op_entry *tx_entry) -{ - struct rxr_pkt_entry *pkt_entry; - struct fi_msg_rma msg; - struct efa_rdm_peer *peer; - struct rxr_rma_context_pkt *rma_context_pkt; - int i, err; - - assert(tx_entry->op == ofi_op_write); - peer = rxr_ep_get_peer(rxr_ep, tx_entry->addr); - assert(peer); - - pkt_entry = rxr_pkt_entry_alloc(rxr_ep, rxr_ep->shm_tx_pkt_pool, RXR_PKT_FROM_SHM_TX_POOL); - if (OFI_UNLIKELY(!pkt_entry)) - return -FI_EAGAIN; - - rxr_pkt_init_write_context(tx_entry, pkt_entry); - rma_context_pkt = (struct rxr_rma_context_pkt *)pkt_entry->wiredata; - rma_context_pkt->seg_size = tx_entry->bytes_write_total_len; - - /* If no FI_MR_VIRT_ADDR being set, have to use 0-based offset */ - if (!(rxr_ep_domain(rxr_ep)->shm_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR)) { - for (i = 0; i < tx_entry->iov_count; i++) - tx_entry->rma_iov[i].addr = 0; - } - - msg.msg_iov = tx_entry->iov; - msg.iov_count = tx_entry->iov_count; - msg.addr = peer->shm_fiaddr; - msg.rma_iov = tx_entry->rma_iov; - msg.rma_iov_count = tx_entry->rma_iov_count; - msg.context = pkt_entry; - msg.data = tx_entry->cq_entry.data; - msg.desc = tx_entry->desc; - rxr_convert_desc_for_shm(msg.iov_count, tx_entry->desc); - - err = fi_writemsg(rxr_ep->shm_ep, &msg, tx_entry->fi_flags); - if (err) { - rxr_pkt_entry_release_tx(rxr_ep, pkt_entry); - return err; - } - - tx_entry->bytes_write_submitted = tx_entry->bytes_write_total_len; - -#if ENABLE_DEBUG - dlist_insert_tail(&pkt_entry->dbg_entry, &rxr_ep->tx_pkt_list); -#endif - rxr_ep_record_tx_op_submitted(rxr_ep, pkt_entry); - return 0; -} - /* rma_read functions */ ssize_t rxr_rma_post_efa_emulated_read(struct rxr_ep *ep, struct rxr_op_entry *tx_entry) { @@ -380,11 +329,6 @@ ssize_t rxr_rma_post_write(struct rxr_ep *ep, struct rxr_op_entry *tx_entry) peer = rxr_ep_get_peer(ep, tx_entry->addr); assert(peer); - if (peer->is_local && ep->use_shm_for_tx) { - rxr_op_entry_prepare_to_post_write(tx_entry); - return rxr_rma_post_shm_write(ep, tx_entry); - } - if (rxr_rma_should_write_using_rdma(ep, tx_entry, peer)) { rxr_op_entry_prepare_to_post_write(tx_entry); return rxr_op_entry_post_remote_write(tx_entry); From c1deeec18bbbac8259561611e6eceeff39dad003 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Fri, 17 Mar 2023 16:55:42 +0000 Subject: [PATCH 3/3] prov/efa: Remove unused SHM code part 2: remove inject Signed-off-by: Sai Sunku --- prov/efa/src/rdm/rxr_atomic.c | 2 -- prov/efa/src/rdm/rxr_ep.c | 5 ++- prov/efa/src/rdm/rxr_msg.c | 4 +-- prov/efa/src/rdm/rxr_op_entry.c | 12 +------ prov/efa/src/rdm/rxr_op_entry.h | 7 +--- prov/efa/src/rdm/rxr_pkt_cmd.c | 54 ++++++++-------------------- prov/efa/src/rdm/rxr_pkt_cmd.h | 6 ++-- prov/efa/src/rdm/rxr_pkt_entry.c | 22 ------------ prov/efa/src/rdm/rxr_pkt_entry.h | 4 --- prov/efa/src/rdm/rxr_pkt_type_data.c | 2 +- prov/efa/src/rdm/rxr_pkt_type_misc.c | 2 +- prov/efa/src/rdm/rxr_pkt_type_req.c | 12 +++---- prov/efa/src/rdm/rxr_rma.c | 10 +++--- 13 files changed, 36 insertions(+), 106 deletions(-) diff --git a/prov/efa/src/rdm/rxr_atomic.c b/prov/efa/src/rdm/rxr_atomic.c index 4f647b47884..8a856efc048 100644 --- a/prov/efa/src/rdm/rxr_atomic.c +++ b/prov/efa/src/rdm/rxr_atomic.c @@ -200,7 +200,6 @@ ssize_t rxr_atomic_generic_efa(struct rxr_ep *rxr_ep, err = rxr_pkt_post_req(rxr_ep, tx_entry, RXR_DC_WRITE_RTA_PKT, - 0, 0); } else { /* @@ -211,7 +210,6 @@ ssize_t rxr_atomic_generic_efa(struct rxr_ep *rxr_ep, err = rxr_pkt_post_req(rxr_ep, tx_entry, req_pkt_type_list[op], - 0, 0); } diff --git a/prov/efa/src/rdm/rxr_ep.c b/prov/efa/src/rdm/rxr_ep.c index 21944a3ab65..4cc1871f80c 100644 --- a/prov/efa/src/rdm/rxr_ep.c +++ b/prov/efa/src/rdm/rxr_ep.c @@ -1935,8 +1935,7 @@ void rxr_ep_progress_internal(struct rxr_ep *ep) continue; assert(op_entry->rxr_flags & RXR_OP_ENTRY_QUEUED_CTRL); - ret = rxr_pkt_post(ep, op_entry, op_entry->queued_ctrl.type, - op_entry->queued_ctrl.inject, 0); + ret = rxr_pkt_post(ep, op_entry, op_entry->queued_ctrl_type, 0); if (ret == -FI_EAGAIN) break; @@ -2005,7 +2004,7 @@ void rxr_ep_progress_internal(struct rxr_ep *ep) if (peer->flags & EFA_RDM_PEER_IN_BACKOFF) break; - ret = rxr_pkt_post(ep, op_entry, RXR_DATA_PKT, false, flags); + ret = rxr_pkt_post(ep, op_entry, RXR_DATA_PKT, flags); if (OFI_UNLIKELY(ret)) { if (ret == -FI_EAGAIN) goto out; diff --git a/prov/efa/src/rdm/rxr_msg.c b/prov/efa/src/rdm/rxr_msg.c index 84f78f38c44..54e9068e2cf 100644 --- a/prov/efa/src/rdm/rxr_msg.c +++ b/prov/efa/src/rdm/rxr_msg.c @@ -156,7 +156,7 @@ ssize_t rxr_msg_post_rtm(struct rxr_ep *ep, struct rxr_op_entry *tx_entry, int u if (rtm_type < RXR_EXTRA_REQ_PKT_BEGIN) { /* rtm requires only baseline feature, which peer should always support. */ - return rxr_pkt_post_req(ep, tx_entry, rtm_type, 0, 0); + return rxr_pkt_post_req(ep, tx_entry, rtm_type, 0); } /* @@ -172,7 +172,7 @@ ssize_t rxr_msg_post_rtm(struct rxr_ep *ep, struct rxr_op_entry *tx_entry, int u if (!rxr_pkt_req_supported_by_peer(rtm_type, peer)) return -FI_EOPNOTSUPP; - return rxr_pkt_post_req(ep, tx_entry, rtm_type, 0, 0); + return rxr_pkt_post_req(ep, tx_entry, rtm_type, 0); } ssize_t rxr_msg_generic_send(struct fid_ep *ep, const struct fi_msg *msg, diff --git a/prov/efa/src/rdm/rxr_op_entry.c b/prov/efa/src/rdm/rxr_op_entry.c index 96158963c8f..75fc510c8e1 100644 --- a/prov/efa/src/rdm/rxr_op_entry.c +++ b/prov/efa/src/rdm/rxr_op_entry.c @@ -1006,8 +1006,6 @@ void rxr_op_entry_handle_recv_completed(struct rxr_op_entry *op_entry) { struct rxr_op_entry *tx_entry = NULL; struct rxr_op_entry *rx_entry = NULL; - struct efa_rdm_peer *peer; - bool inject; int err; /* It is important to write completion before sending ctrl packet, because the @@ -1071,19 +1069,11 @@ void rxr_op_entry_handle_recv_completed(struct rxr_op_entry *op_entry) * * Hence, the rx_entry can be safely released only when we got * the send completion of the ctrl packet. - * - * Another interesting point is that when inject was used, the - * rx_entry was released by rxr_pkt_post_or_queue(), because - * when inject was used, lower device will not provider send - * completion for the ctrl packet. */ if (op_entry->rxr_flags & RXR_TX_ENTRY_DELIVERY_COMPLETE_REQUESTED) { assert(op_entry->type == RXR_RX_ENTRY); rx_entry = op_entry; /* Intentionally assigned for easier understanding */ - peer = rxr_ep_get_peer(rx_entry->ep, rx_entry->addr); - assert(peer); - inject = peer->is_local && rx_entry->ep->use_shm_for_tx; - err = rxr_pkt_post_or_queue(rx_entry->ep, rx_entry, RXR_RECEIPT_PKT, inject); + err = rxr_pkt_post_or_queue(rx_entry->ep, rx_entry, RXR_RECEIPT_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "Posting of ctrl packet failed when complete rx! err=%s(%d)\n", diff --git a/prov/efa/src/rdm/rxr_op_entry.h b/prov/efa/src/rdm/rxr_op_entry.h index ccba3922bc3..d94d0020ed2 100644 --- a/prov/efa/src/rdm/rxr_op_entry.h +++ b/prov/efa/src/rdm/rxr_op_entry.h @@ -56,11 +56,6 @@ enum rxr_op_comm_type { RXR_RX_RECV, /* rx_entry large msg recv data pkts */ }; -struct rxr_queued_ctrl_info { - int type; - int inject; -}; - struct rxr_atomic_hdr { /* atomic_op is different from tx_op */ uint32_t atomic_op; @@ -116,7 +111,7 @@ struct rxr_op_entry { uint64_t total_len; enum rxr_op_comm_type state; - struct rxr_queued_ctrl_info queued_ctrl; + int queued_ctrl_type; uint64_t fi_flags; uint16_t rxr_flags; diff --git a/prov/efa/src/rdm/rxr_pkt_cmd.c b/prov/efa/src/rdm/rxr_pkt_cmd.c index c57cf634db3..922ac9530eb 100644 --- a/prov/efa/src/rdm/rxr_pkt_cmd.c +++ b/prov/efa/src/rdm/rxr_pkt_cmd.c @@ -261,7 +261,6 @@ void rxr_pkt_handle_ctrl_sent(struct rxr_ep *rxr_ep, struct rxr_pkt_entry *pkt_e * @param[in] rxr_ep endpoint * @param[in] x_entry pointer to rxr_op_entry. (either a tx_entry or an rx_entry) * @param[in] pkt_type packet type. - * @param[in] inject send control packet via inject or not. * @param[in] flags additional flags to apply for fi_sendmsg. * currently only accepted flags is FI_MORE. * @return On success return 0, otherwise return a negative libfabric error code. @@ -270,7 +269,7 @@ void rxr_pkt_handle_ctrl_sent(struct rxr_ep *rxr_ep, struct rxr_pkt_entry *pkt_e */ static inline ssize_t rxr_pkt_post_one(struct rxr_ep *rxr_ep, struct rxr_op_entry *op_entry, - int pkt_type, bool inject, uint64_t flags) + int pkt_type, uint64_t flags) { struct rxr_pkt_entry *pkt_entry; struct efa_rdm_peer *peer; @@ -294,20 +293,10 @@ ssize_t rxr_pkt_post_one(struct rxr_ep *rxr_ep, struct rxr_op_entry *op_entry, return err; } - /* If the send (or inject) succeeded, the function rxr_pkt_entry_send - * (or rxr_pkt_entry_inject) will increase the counter in rxr_ep that - * tracks number of outstanding TX ops. + /* If the send succeeded, the function rxr_pkt_entry_send will increase the + * counter in rxr_ep that tracks number of outstanding TX ops. */ - if (inject) { - /* - * Currently, the only accepted flags is FI_MORE, which is not - * compatible with inject. Add an additional check here to make - * sure flags is set by the caller correctly. - */ - assert(!flags); - err = rxr_pkt_entry_inject(rxr_ep, pkt_entry, addr); - } else - err = rxr_pkt_entry_send(rxr_ep, pkt_entry, flags); + err = rxr_pkt_entry_send(rxr_ep, pkt_entry, flags); if (OFI_UNLIKELY(err)) { rxr_pkt_entry_release_tx(rxr_ep, pkt_entry); @@ -317,14 +306,6 @@ ssize_t rxr_pkt_post_one(struct rxr_ep *rxr_ep, struct rxr_op_entry *op_entry, peer->flags |= EFA_RDM_PEER_REQ_SENT; rxr_pkt_handle_ctrl_sent(rxr_ep, pkt_entry); - /* If injection succeeded, packet should be considered as sent completed. - * therefore call rxr_pkt_handle_send_completion(). - * rxr_pkt_handle_send_completion() will release pkt_entry and decrease - * the counter in rxr_ep that tracks number of outstanding TX ops. - */ - if (inject) - rxr_pkt_handle_send_completion(rxr_ep, pkt_entry); - return 0; } @@ -337,19 +318,16 @@ ssize_t rxr_pkt_post_one(struct rxr_ep *rxr_ep, struct rxr_op_entry *op_entry, * @param[in] rxr_ep endpoint * @param[in] op_entry pointer to rxr_op_entry. (either a tx_entry or an rx_entry) * @param[in] pkt_type packet type. - * @param[in] inject send control packet via inject or not. * @return On success return 0, otherwise return a negative libfabric error code. Possible error codes include: * -FI_EAGAIN temporarily out of resource */ -ssize_t rxr_pkt_post(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_type, bool inject, uint64_t flags) +ssize_t rxr_pkt_post(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_type, uint64_t flags) { ssize_t err; size_t num_req, i; uint64_t extra_flags; if (rxr_pkt_type_is_mulreq(pkt_type)) { - assert(!inject); - if(rxr_pkt_type_is_runt(pkt_type)) rxr_tx_entry_set_runt_size(ep, op_entry); @@ -363,7 +341,7 @@ ssize_t rxr_pkt_post(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_t for (i = 0; i < num_req; ++i) { extra_flags = (i == num_req - 1) ? 0 : FI_MORE; - err = rxr_pkt_post_one(ep, op_entry, pkt_type, 0, flags | extra_flags); + err = rxr_pkt_post_one(ep, op_entry, pkt_type, flags | extra_flags); if (OFI_UNLIKELY(err)) return err; } @@ -372,7 +350,7 @@ ssize_t rxr_pkt_post(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_t return 0; } - return rxr_pkt_post_one(ep, op_entry, pkt_type, inject, flags); + return rxr_pkt_post_one(ep, op_entry, pkt_type, flags); } /** @@ -388,19 +366,17 @@ ssize_t rxr_pkt_post(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_t * @param[in] rxr_ep endpoint * @param[in] x_entry pointer to rxr_op_entry. (either a tx_entry or an rx_entry) * @param[in] pkt_type packet type. - * @param[in] inject send control packet via inject or not. * @return On success return 0, otherwise return a negative libfabric error code. */ -ssize_t rxr_pkt_post_or_queue(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_type, bool inject) +ssize_t rxr_pkt_post_or_queue(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int pkt_type) { ssize_t err; - err = rxr_pkt_post(ep, op_entry, pkt_type, inject, 0); + err = rxr_pkt_post(ep, op_entry, pkt_type, 0); if (err == -FI_EAGAIN) { assert(!(op_entry->rxr_flags & RXR_OP_ENTRY_QUEUED_RNR)); op_entry->rxr_flags |= RXR_OP_ENTRY_QUEUED_CTRL; - op_entry->queued_ctrl.type = pkt_type; - op_entry->queued_ctrl.inject = inject; + op_entry->queued_ctrl_type = pkt_type; dlist_insert_tail(&op_entry->queued_ctrl_entry, &ep->op_entry_queued_ctrl_list); err = 0; @@ -429,20 +405,18 @@ ssize_t rxr_pkt_post_or_queue(struct rxr_ep *ep, struct rxr_op_entry *op_entry, * @param[in] rxr_ep endpoint * @param[in] op_entry pointer to rxr_op_entry. (either a tx_entry or an rx_entry) * @param[in] pkt_type packet type. - * @param[in] inject send control packet via inject or not. * @return On success return 0, otherwise return a negative libfabric error code. */ -ssize_t rxr_pkt_post_req(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int req_type, bool inject, uint64_t flags) +ssize_t rxr_pkt_post_req(struct rxr_ep *ep, struct rxr_op_entry *op_entry, int req_type, uint64_t flags) { assert(op_entry->type == RXR_TX_ENTRY); assert(req_type >= RXR_REQ_PKT_BEGIN); if (rxr_pkt_type_is_mulreq(req_type)) { - assert(!inject); - return rxr_pkt_post_or_queue(ep, op_entry, req_type, inject); + return rxr_pkt_post_or_queue(ep, op_entry, req_type); } - return rxr_pkt_post(ep, op_entry, req_type, inject, flags); + return rxr_pkt_post(ep, op_entry, req_type, flags); } /* @@ -503,7 +477,7 @@ ssize_t rxr_pkt_trigger_handshake(struct rxr_ep *ep, dlist_insert_tail(&tx_entry->ep_entry, &ep->tx_entry_list); - err = rxr_pkt_post(ep, tx_entry, RXR_EAGER_RTW_PKT, 0, 0); + err = rxr_pkt_post(ep, tx_entry, RXR_EAGER_RTW_PKT, 0); if (OFI_UNLIKELY(err)) return err; diff --git a/prov/efa/src/rdm/rxr_pkt_cmd.h b/prov/efa/src/rdm/rxr_pkt_cmd.h index 583504cd22e..6e233099705 100644 --- a/prov/efa/src/rdm/rxr_pkt_cmd.h +++ b/prov/efa/src/rdm/rxr_pkt_cmd.h @@ -37,13 +37,13 @@ #include "rxr.h" ssize_t rxr_pkt_post(struct rxr_ep *ep, struct rxr_op_entry *op_entry, - int pkt_type, bool inject, uint64_t flags); + int pkt_type, uint64_t flags); ssize_t rxr_pkt_post_or_queue(struct rxr_ep *ep, struct rxr_op_entry *op_entry, - int req_type, bool inject); + int req_type); ssize_t rxr_pkt_post_req(struct rxr_ep *ep, struct rxr_op_entry *tx_entry, - int req_type, bool inject, uint64_t flags); + int req_type, uint64_t flags); fi_addr_t rxr_pkt_determine_addr(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry); diff --git a/prov/efa/src/rdm/rxr_pkt_entry.c b/prov/efa/src/rdm/rxr_pkt_entry.c index 93da3c5ecff..48c619dbcf8 100644 --- a/prov/efa/src/rdm/rxr_pkt_entry.c +++ b/prov/efa/src/rdm/rxr_pkt_entry.c @@ -630,28 +630,6 @@ ssize_t rxr_pkt_entry_recv(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry, return err; } -ssize_t rxr_pkt_entry_inject(struct rxr_ep *ep, - struct rxr_pkt_entry *pkt_entry, - fi_addr_t addr) -{ - struct efa_rdm_peer *peer; - ssize_t ret; - - /* currently only EOR packet is injected using shm ep */ - peer = rxr_ep_get_peer(ep, addr); - assert(peer); - - assert(ep->use_shm_for_tx && peer->is_local); - ret = fi_inject(ep->shm_ep, rxr_pkt_start(pkt_entry), pkt_entry->pkt_size, - peer->shm_fiaddr); - - if (OFI_UNLIKELY(ret)) - return ret; - - rxr_ep_record_tx_op_submitted(ep, pkt_entry); - return 0; -} - /* * Functions for pkt_rx_map */ diff --git a/prov/efa/src/rdm/rxr_pkt_entry.h b/prov/efa/src/rdm/rxr_pkt_entry.h index 603c088e7b3..f09ea65f430 100644 --- a/prov/efa/src/rdm/rxr_pkt_entry.h +++ b/prov/efa/src/rdm/rxr_pkt_entry.h @@ -297,10 +297,6 @@ ssize_t rxr_pkt_entry_recv(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry, void **desc, uint64_t flags); -ssize_t rxr_pkt_entry_inject(struct rxr_ep *ep, - struct rxr_pkt_entry *pkt_entry, - fi_addr_t addr); - int rxr_pkt_entry_write(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry, void *local_buf, size_t len, void *desc, uint64_t remote_buf, size_t remote_key); diff --git a/prov/efa/src/rdm/rxr_pkt_type_data.c b/prov/efa/src/rdm/rxr_pkt_type_data.c index e1bd73f5953..3d6f302a177 100644 --- a/prov/efa/src/rdm/rxr_pkt_type_data.c +++ b/prov/efa/src/rdm/rxr_pkt_type_data.c @@ -176,7 +176,7 @@ void rxr_pkt_proc_data(struct rxr_ep *ep, return; if (!op_entry->window) { - err = rxr_pkt_post_or_queue(ep, op_entry, RXR_CTS_PKT, 0); + err = rxr_pkt_post_or_queue(ep, op_entry, RXR_CTS_PKT); if (err) { EFA_WARN(FI_LOG_CQ, "post CTS packet failed!\n"); rxr_rx_entry_handle_error(op_entry, -err, FI_EFA_ERR_PKT_POST); diff --git a/prov/efa/src/rdm/rxr_pkt_type_misc.c b/prov/efa/src/rdm/rxr_pkt_type_misc.c index 82852b0f0d9..8383be2791a 100644 --- a/prov/efa/src/rdm/rxr_pkt_type_misc.c +++ b/prov/efa/src/rdm/rxr_pkt_type_misc.c @@ -419,7 +419,7 @@ void rxr_pkt_handle_rma_read_completion(struct rxr_ep *ep, rxr_tracepoint(read_completed, rx_entry->msg_id, (size_t) rx_entry->cq_entry.op_context, rx_entry->total_len, (size_t) rx_entry); - err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_EOR_PKT, false); + err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_EOR_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "Posting of EOR failed! err=%s(%d)\n", diff --git a/prov/efa/src/rdm/rxr_pkt_type_req.c b/prov/efa/src/rdm/rxr_pkt_type_req.c index ab3d177facd..07c21316d3a 100644 --- a/prov/efa/src/rdm/rxr_pkt_type_req.c +++ b/prov/efa/src/rdm/rxr_pkt_type_req.c @@ -1450,7 +1450,7 @@ ssize_t rxr_pkt_proc_matched_rtm(struct rxr_ep *ep, ep->pending_recv_counter++; #endif rx_entry->state = RXR_RX_RECV; - ret = rxr_pkt_post_or_queue(ep, rx_entry, RXR_CTS_PKT, 0); + ret = rxr_pkt_post_or_queue(ep, rx_entry, RXR_CTS_PKT); return ret; } @@ -2006,7 +2006,7 @@ void rxr_pkt_handle_longcts_rtw_recv(struct rxr_ep *ep, #endif rx_entry->state = RXR_RX_RECV; rx_entry->tx_id = tx_id; - err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_CTS_PKT, 0); + err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_CTS_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "Cannot post CTS packet\n"); rxr_rx_entry_handle_error(rx_entry, -err, FI_EFA_ERR_PKT_POST); @@ -2155,7 +2155,7 @@ void rxr_pkt_handle_rtr_recv(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry) rx_entry->cq_entry.buf = rx_entry->iov[0].iov_base; rx_entry->total_len = rx_entry->cq_entry.len; - err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_READRSP_PKT, 0); + err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_READRSP_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "Posting of readrsp packet failed! err=%ld\n", err); efa_eq_write_error(&ep->base_ep.util_ep, FI_EIO, FI_EFA_ERR_PKT_POST); @@ -2428,7 +2428,7 @@ int rxr_pkt_proc_dc_write_rta(struct rxr_ep *ep, return ret; } - err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_RECEIPT_PKT, 0); + err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_RECEIPT_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, "Posting of receipt packet failed! err=%s\n", @@ -2512,7 +2512,7 @@ int rxr_pkt_proc_fetch_rta(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry) offset += rx_entry->iov[i].iov_len; } - err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_ATOMRSP_PKT, 0); + err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_ATOMRSP_PKT); if (OFI_UNLIKELY(err)) rxr_rx_entry_handle_error(rx_entry, -err, FI_EFA_ERR_PKT_POST); @@ -2594,7 +2594,7 @@ int rxr_pkt_proc_compare_rta(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry) } } - err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_ATOMRSP_PKT, 0); + err = rxr_pkt_post_or_queue(ep, rx_entry, RXR_ATOMRSP_PKT); if (OFI_UNLIKELY(err)) { efa_eq_write_error(&ep->base_ep.util_ep, FI_EIO, FI_EFA_ERR_PKT_POST); ofi_buf_free(rx_entry->atomrsp_data); diff --git a/prov/efa/src/rdm/rxr_rma.c b/prov/efa/src/rdm/rxr_rma.c index 6c241fed603..d404b511a9d 100644 --- a/prov/efa/src/rdm/rxr_rma.c +++ b/prov/efa/src/rdm/rxr_rma.c @@ -119,12 +119,12 @@ ssize_t rxr_rma_post_efa_emulated_read(struct rxr_ep *ep, struct rxr_op_entry *t #endif if (tx_entry->total_len < ep->mtu_size - sizeof(struct rxr_readrsp_hdr)) { - err = rxr_pkt_post_req(ep, tx_entry, RXR_SHORT_RTR_PKT, 0, 0); + err = rxr_pkt_post_req(ep, tx_entry, RXR_SHORT_RTR_PKT, 0); } else { assert(rxr_env.tx_min_credits > 0); tx_entry->window = MIN(tx_entry->total_len, rxr_env.tx_min_credits * ep->max_data_payload_size); - err = rxr_pkt_post_req(ep, tx_entry, RXR_LONGCTS_RTR_PKT, 0, 0); + err = rxr_pkt_post_req(ep, tx_entry, RXR_LONGCTS_RTR_PKT, 0); } if (OFI_UNLIKELY(err)) { @@ -372,7 +372,7 @@ ssize_t rxr_rma_post_write(struct rxr_ep *ep, struct rxr_op_entry *tx_entry) if (tx_entry->total_len >= rxr_ep_domain(ep)->hmem_info[iface].min_read_write_size && rxr_ep_determine_rdma_read_support(ep, tx_entry->addr, peer) && (tx_entry->desc[0] || efa_is_cache_available(rxr_ep_domain(ep)))) { - err = rxr_pkt_post_req(ep, tx_entry, RXR_LONGREAD_RTW_PKT, 0, 0); + err = rxr_pkt_post_req(ep, tx_entry, RXR_LONGREAD_RTW_PKT, 0); if (err != -FI_ENOMEM) return err; /* @@ -383,11 +383,11 @@ ssize_t rxr_rma_post_write(struct rxr_ep *ep, struct rxr_op_entry *tx_entry) if (tx_entry->total_len <= max_eager_rtw_data_size) { ctrl_type = delivery_complete_requested ? RXR_DC_EAGER_RTW_PKT : RXR_EAGER_RTW_PKT; - return rxr_pkt_post_req(ep, tx_entry, ctrl_type, 0, 0); + return rxr_pkt_post_req(ep, tx_entry, ctrl_type, 0); } ctrl_type = delivery_complete_requested ? RXR_DC_LONGCTS_RTW_PKT : RXR_LONGCTS_RTW_PKT; - return rxr_pkt_post_req(ep, tx_entry, ctrl_type, 0, 0); + return rxr_pkt_post_req(ep, tx_entry, ctrl_type, 0); } ssize_t rxr_rma_writemsg(struct fid_ep *ep,