From f8136cd2047a68946b499465bec35006e00e809c Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Mon, 11 Nov 2024 22:06:25 +0000 Subject: [PATCH] prov/efa: Fix the error handling for unsolicited recv When getting an wc error for rdma with imm + unsolicited recv, there is no wr_id / pkt_entry associated. Libfabric should write the eq error directly. Also added unit tests to cover both solicited and unsolicited recv error path. Signed-off-by: Shi Jin --- prov/efa/Makefile.include | 3 +- prov/efa/src/rdm/efa_rdm_cq.c | 6 ++ prov/efa/test/efa_unit_test_cq.c | 93 +++++++++++++++++++++++++++++ prov/efa/test/efa_unit_test_mocks.c | 16 +++++ prov/efa/test/efa_unit_test_mocks.h | 8 +++ prov/efa/test/efa_unit_tests.c | 3 + prov/efa/test/efa_unit_tests.h | 2 + 7 files changed, 130 insertions(+), 1 deletion(-) diff --git a/prov/efa/Makefile.include b/prov/efa/Makefile.include index 4963fe404e3..fc065cb42e4 100644 --- a/prov/efa/Makefile.include +++ b/prov/efa/Makefile.include @@ -161,7 +161,8 @@ prov_efa_test_efa_unit_test_LDFLAGS = $(cmocka_rpath) $(efa_LDFLAGS) $(cmocka_LD -Wl,--wrap=efadv_query_device \ -Wl,--wrap=ofi_cudaMalloc \ -Wl,--wrap=ofi_copy_from_hmem_iov \ - -Wl,--wrap=efa_rdm_pke_read + -Wl,--wrap=efa_rdm_pke_read \ + -Wl,--wrap=efa_device_support_unsolicited_write_recv if HAVE_EFADV_CQ_EX prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=efadv_create_cq diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index 67a02e55f3d..2d8c1d8811f 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -487,6 +487,12 @@ void efa_rdm_cq_poll_ibv_cq(ssize_t cqe_to_process, struct efa_ibv_cq *ibv_cq) break; case IBV_WC_RECV: /* fall through */ case IBV_WC_RECV_RDMA_WITH_IMM: + if (efa_rdm_cq_wc_is_unsolicited(ibv_cq->ibv_cq_ex)) { + EFA_WARN(FI_LOG_CQ, "Receive error %s (%d) for unsolicited write recv", + efa_strerror(prov_errno), prov_errno); + efa_base_ep_write_eq_error(&ep->base_ep, to_fi_errno(prov_errno), prov_errno); + break; + } efa_rdm_pke_handle_rx_error(pkt_entry, prov_errno); break; default: diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index 76d45368e87..0c823d0f15b 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -345,6 +345,99 @@ void test_ibv_cq_ex_read_bad_recv_status(struct efa_resource **state) assert_int_equal(eq_err_entry.prov_errno, EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE); } +/** + * @brief verify that fi_cq_read/fi_eq_read works properly when rdma-core return bad status for + * recv rdma with imm. + * + * When getting a wc error of op code IBV_WC_RECV_RDMA_WITH_IMM, libfabric cannot find the + * corresponding application operation to write a cq error. + * It will write an EQ error instead. + * + * @param[in] state struct efa_resource that is managed by the framework + * @param[in] use_unsolicited_recv whether to use unsolicited write recv + */ +void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(struct efa_resource **state, bool use_unsolicited_recv) +{ + struct efa_rdm_ep *efa_rdm_ep; + struct efa_resource *resource = *state; + struct fi_cq_data_entry cq_entry; + struct fi_eq_err_entry eq_err_entry; + int ret; + struct efa_rdm_cq *efa_rdm_cq; + + + efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); + + efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, util_cq.cq_fid.fid); + + efa_rdm_cq->ibv_cq.ibv_cq_ex->start_poll = &efa_mock_ibv_start_poll_return_mock; + efa_rdm_cq->ibv_cq.ibv_cq_ex->end_poll = &efa_mock_ibv_end_poll_check_mock; + efa_rdm_cq->ibv_cq.ibv_cq_ex->read_opcode = &efa_mock_ibv_read_opcode_return_mock; + efa_rdm_cq->ibv_cq.ibv_cq_ex->read_vendor_err = &efa_mock_ibv_read_vendor_err_return_mock; + efa_rdm_cq->ibv_cq.ibv_cq_ex->read_qp_num = &efa_mock_ibv_read_qp_num_return_mock; + + will_return(efa_mock_ibv_start_poll_return_mock, 0); + will_return(efa_mock_ibv_end_poll_check_mock, NULL); + /* efa_mock_ibv_read_opcode_return_mock() will be called once in release mode, + * but will be called twice in debug mode. because there is an assertion that called ibv_read_opcode(), + * therefore use will_return_always() + */ + will_return_always(efa_mock_ibv_read_opcode_return_mock, IBV_WC_RECV_RDMA_WITH_IMM); + will_return_always(efa_mock_ibv_read_qp_num_return_mock, efa_rdm_ep->base_ep.qp->qp_num); + will_return(efa_mock_ibv_read_vendor_err_return_mock, EFA_IO_COMP_STATUS_FLUSHED); + + g_efa_unit_test_mocks.efa_device_support_unsolicited_write_recv = &efa_mock_efa_device_support_unsolicited_write_recv; + +#if HAVE_CAPS_UNSOLICITED_WRITE_RECV + if (use_unsolicited_recv) { + efadv_cq_from_ibv_cq_ex(efa_rdm_cq->ibv_cq.ibv_cq_ex)->wc_is_unsolicited = &efa_mock_efadv_wc_is_unsolicited; + will_return(efa_mock_efa_device_support_unsolicited_write_recv, true); + will_return(efa_mock_efadv_wc_is_unsolicited, true); + efa_rdm_cq->ibv_cq.ibv_cq_ex->wr_id = 0; + } else { + /* + * For solicited write recv, it will consume an internal rx pkt + */ + will_return(efa_mock_efa_device_support_unsolicited_write_recv, false); + struct efa_rdm_pke *pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); + assert_non_null(pkt_entry); + efa_rdm_ep->efa_rx_pkts_posted = efa_rdm_ep_get_rx_pool_size(efa_rdm_ep); + efa_rdm_cq->ibv_cq.ibv_cq_ex->wr_id = (uintptr_t)pkt_entry; + } +#else + /* + * Always test with solicited recv + */ + will_return(efa_mock_efa_device_support_unsolicited_write_recv, false); + struct efa_rdm_pke *pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); + assert_non_null(pkt_entry); + efa_rdm_ep->efa_rx_pkts_posted = efa_rdm_ep_get_rx_pool_size(efa_rdm_ep); + efa_rdm_cq->ibv_cq.ibv_cq_ex->wr_id = (uintptr_t)pkt_entry; +#endif + /* the recv rdma with imm will not populate to application cq because it's an EFA internal error and + * and not related to any application operations. Currently we can only read the error from eq. + */ + efa_rdm_cq->ibv_cq.ibv_cq_ex->status = IBV_WC_GENERAL_ERR; + ret = fi_cq_read(resource->cq, &cq_entry, 1); + assert_int_equal(ret, -FI_EAGAIN); + + ret = fi_eq_readerr(resource->eq, &eq_err_entry, 0); + assert_int_equal(ret, sizeof(eq_err_entry)); + assert_int_not_equal(eq_err_entry.err, FI_SUCCESS); + assert_int_equal(eq_err_entry.prov_errno, EFA_IO_COMP_STATUS_FLUSHED); +} + +void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_unsolicited_recv(struct efa_resource **state) +{ + test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(state, true); +} + +void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv(struct efa_resource **state) +{ + test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(state, false); +} + /** * @brief verify that fi_cq_read/fi_cq_readerr works properly when ibv_start_poll failed. * diff --git a/prov/efa/test/efa_unit_test_mocks.c b/prov/efa/test/efa_unit_test_mocks.c index ee97098d001..75dd2bad732 100644 --- a/prov/efa/test/efa_unit_test_mocks.c +++ b/prov/efa/test/efa_unit_test_mocks.c @@ -182,6 +182,11 @@ uint32_t efa_mock_ibv_read_wc_flags_return_mock(struct ibv_cq_ex *current) return mock(); } +bool efa_mock_efadv_wc_is_unsolicited(struct efadv_cq *efadv_cq) +{ + return mock(); +} + int g_ofi_copy_from_hmem_iov_call_counter; ssize_t efa_mock_ofi_copy_from_hmem_iov_inc_counter(void *dest, size_t size, enum fi_hmem_iface hmem_iface, uint64_t device, @@ -197,6 +202,11 @@ int efa_mock_efa_rdm_pke_read_return_mock(struct efa_rdm_ope *ope) return mock(); } +bool efa_mock_efa_device_support_unsolicited_write_recv() +{ + return mock(); +} + struct efa_unit_test_mocks g_efa_unit_test_mocks = { .local_host_id = 0, .peer_host_id = 0, @@ -213,6 +223,7 @@ struct efa_unit_test_mocks g_efa_unit_test_mocks = { #endif .ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov, .efa_rdm_pke_read = __real_efa_rdm_pke_read, + .efa_device_support_unsolicited_write_recv = __real_efa_device_support_unsolicited_write_recv, .ibv_is_fork_initialized = __real_ibv_is_fork_initialized, #if HAVE_EFADV_QUERY_MR .efadv_query_mr = __real_efadv_query_mr, @@ -347,6 +358,11 @@ int __wrap_efa_rdm_pke_read(struct efa_rdm_ope *ope) return g_efa_unit_test_mocks.efa_rdm_pke_read(ope); } +bool __wrap_efa_device_support_unsolicited_write_recv(void) +{ + return g_efa_unit_test_mocks.efa_device_support_unsolicited_write_recv(); +} + enum ibv_fork_status __wrap_ibv_is_fork_initialized(void) { return g_efa_unit_test_mocks.ibv_is_fork_initialized(); diff --git a/prov/efa/test/efa_unit_test_mocks.h b/prov/efa/test/efa_unit_test_mocks.h index ec9af71b7ec..3e764c91fb1 100644 --- a/prov/efa/test/efa_unit_test_mocks.h +++ b/prov/efa/test/efa_unit_test_mocks.h @@ -72,6 +72,8 @@ uint32_t efa_mock_ibv_read_qp_num_return_mock(struct ibv_cq_ex *current); uint32_t efa_mock_ibv_read_wc_flags_return_mock(struct ibv_cq_ex *current); +bool efa_mock_efadv_wc_is_unsolicited(struct efadv_cq *efadv_cq); + ssize_t __real_ofi_copy_from_hmem_iov(void *dest, size_t size, enum fi_hmem_iface hmem_iface, uint64_t device, const struct iovec *hmem_iov, @@ -85,8 +87,12 @@ ssize_t efa_mock_ofi_copy_from_hmem_iov_inc_counter(void *dest, size_t size, int __real_efa_rdm_pke_read(struct efa_rdm_ope *ope); +bool __real_efa_device_support_unsolicited_write_recv(); + int efa_mock_efa_rdm_pke_read_return_mock(struct efa_rdm_ope *ope); +bool efa_mock_efa_device_support_unsolicited_write_recv(void); + struct efa_unit_test_mocks { uint64_t local_host_id; @@ -118,6 +124,8 @@ struct efa_unit_test_mocks int (*efa_rdm_pke_read)(struct efa_rdm_ope *ope); + bool (*efa_device_support_unsolicited_write_recv)(void); + enum ibv_fork_status (*ibv_is_fork_initialized)(void); #if HAVE_EFADV_QUERY_MR diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index cf3bc976884..7c485058132 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -61,6 +61,7 @@ static int efa_unit_test_mocks_teardown(void **state) #endif .ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov, .efa_rdm_pke_read = __real_efa_rdm_pke_read, + .efa_device_support_unsolicited_write_recv = __real_efa_device_support_unsolicited_write_recv, .ibv_is_fork_initialized = __real_ibv_is_fork_initialized, }; @@ -122,6 +123,8 @@ int main(void) cmocka_unit_test_setup_teardown(test_rdm_cq_read_bad_send_status_invalid_qpn, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_rdm_cq_read_bad_send_status_message_too_long, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_bad_recv_status, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_unsolicited_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_recover_forgotten_peer_ah, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_ignore_removed_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 5422295f1b3..c4605c8e962 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -137,6 +137,8 @@ void test_rdm_cq_read_bad_send_status_unreachable_receiver(); void test_rdm_cq_read_bad_send_status_invalid_qpn(); void test_rdm_cq_read_bad_send_status_message_too_long(); void test_ibv_cq_ex_read_bad_recv_status(); +void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_unsolicited_recv(); +void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv(); void test_ibv_cq_ex_read_recover_forgotten_peer_ah(); void test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer(); void test_ibv_cq_ex_read_ignore_removed_peer();