diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 56e80bc146d..1387f6f8130 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -236,7 +236,9 @@ int efa_rdm_ep_create_buffer_pools(struct efa_rdm_ep *ep) ret = ofi_bufpool_create(&ep->user_rx_pkt_pool, sizeof(struct efa_rdm_pke), EFA_RDM_BUFPOOL_ALIGNMENT, - 0, ep->base_ep.info->rx_attr->size, 0); + ep->base_ep.info->rx_attr->size, + ep->base_ep.info->rx_attr->size, /* max count==chunk_cnt means pool is not allowed to grow */ + 0); if (ret) goto err_free; diff --git a/prov/efa/src/rdm/efa_rdm_ep_utils.c b/prov/efa/src/rdm/efa_rdm_ep_utils.c index 69812d0f90c..51e4e8c405a 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_utils.c +++ b/prov/efa/src/rdm/efa_rdm_ep_utils.c @@ -214,10 +214,8 @@ int efa_rdm_ep_post_user_recv_buf(struct efa_rdm_ep *ep, struct efa_rdm_ope *rxe assert(rxe->iov_count > 0 && rxe->iov_count <= ep->base_ep.info->rx_attr->iov_limit); assert(rxe->iov[0].iov_len >= ep->msg_prefix_size); pkt_entry = efa_rdm_pke_alloc(ep, ep->user_rx_pkt_pool, EFA_RDM_PKE_FROM_USER_RX_POOL); - if (OFI_UNLIKELY(!pkt_entry)) { - EFA_WARN(FI_LOG_EP_DATA, "Failed to allocate pkt_entry for user rx\n"); - return -FI_ENOMEM; - } + if (OFI_UNLIKELY(!pkt_entry)) + return -FI_EAGAIN; pkt_entry->ope = rxe; rxe->state = EFA_RDM_RXE_MATCHED; diff --git a/prov/efa/src/rdm/efa_rdm_msg.c b/prov/efa/src/rdm/efa_rdm_msg.c index cdbabe128c1..dc583c0255e 100644 --- a/prov/efa/src/rdm/efa_rdm_msg.c +++ b/prov/efa/src/rdm/efa_rdm_msg.c @@ -917,6 +917,9 @@ ssize_t efa_rdm_msg_generic_recv(struct efa_rdm_ep *ep, const struct fi_msg *msg } ret = efa_rdm_ep_post_user_recv_buf(ep, rxe, flags); + if (OFI_UNLIKELY(ret)) + efa_rdm_rxe_release(rxe); + ofi_genlock_unlock(srx_ctx->lock); } else if (op == ofi_op_tagged) { ret = util_srx_generic_trecv(ep->peer_srx_ep, msg->msg_iov, msg->desc, diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index f8dd2073df4..b87382c1a0f 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -1119,6 +1119,46 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state) free(recv_buff.buff); } +/** + * @brief When user posts more than rx size fi_recv, we should return eagain and make sure + * there is no rx entry leaked + */ +void test_efa_rdm_ep_zcpy_recv_eagain(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_unit_test_buff recv_buff; + int i; + struct efa_rdm_ep *efa_rdm_ep; + + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + assert_non_null(resource->hints); + + resource->hints->caps = FI_MSG; + + /* enable zero-copy recv mode in ep */ + test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, true); + + efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); + + /* Construct a recv buffer with mr */ + efa_unit_test_buff_construct(&recv_buff, resource, 16); + + for (i = 0; i < efa_rdm_ep->base_ep.info->rx_attr->size; i++) + assert_int_equal(fi_recv(resource->ep, recv_buff.buff, recv_buff.size, fi_mr_desc(recv_buff.mr), FI_ADDR_UNSPEC, NULL), 0); + + /* we should have rx number of rx entry before and after the extra recv post */ + assert_true(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list) == efa_rdm_ep->base_ep.info->rx_attr->size); + assert_int_equal(fi_recv(resource->ep, recv_buff.buff, recv_buff.size, fi_mr_desc(recv_buff.mr), FI_ADDR_UNSPEC, NULL), -FI_EAGAIN); + assert_true(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list) == efa_rdm_ep->base_ep.info->rx_attr->size); + + /** + * the buf is still posted to rdma-core, so unregistering mr can + * return non-zero. Currently ignore this failure. + */ + (void) fi_close(&recv_buff.mr->fid); + free(recv_buff.buff); +} + /** * @brief when efa_rdm_ep_post_handshake_error failed due to pkt pool exhaustion, * make sure both txe is cleaned diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 7c485058132..2232ea36059 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -112,6 +112,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_no_mr_local, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_close_discard_posted_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_eagain, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index c4605c8e962..d44368bc81f 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -126,6 +126,7 @@ void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy(); void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_no_mr_local(); void test_efa_rdm_ep_close_discard_posted_recv(); void test_efa_rdm_ep_zcpy_recv_cancel(); +void test_efa_rdm_ep_zcpy_recv_eagain(); void test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion(); void test_dgram_cq_read_empty_cq(); void test_ibv_cq_ex_read_empty_cq();