Skip to content

Commit

Permalink
prov/efa: fix efa multi recv setopt segfault
Browse files Browse the repository at this point in the history
efa uses the util srx and sets the minimum multi receive size through
the srx. However, the srx code doesn't get initialized until the endpoint
gets enabled. So if the application calls setopt (before FI_ENABLE), this
will segfault because the srx has not been initialized. Instead, we need
to save the multi recv size in the efa endpoint to be valid during setopt
and then pass that into the util_srx creation to set the multi recv size

Signed-off-by: Alexia Ingerson <[email protected]>
Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Dec 10, 2024
1 parent 95a7e84 commit 38adf06
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 10 deletions.
3 changes: 0 additions & 3 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1650,7 +1650,6 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname,
{
struct efa_rdm_ep *efa_rdm_ep;
int intval, ret;
struct util_srx_ctx *srx;

efa_rdm_ep = container_of(fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid);

Expand All @@ -1663,8 +1662,6 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname,
return -FI_EINVAL;

efa_rdm_ep->min_multi_recv_size = *(size_t *)optval;
srx = util_get_peer_srx(efa_rdm_ep->peer_srx_ep)->ep_fid.fid.context;
srx->min_multi_recv_size = *(size_t *)optval;
break;
case FI_OPT_EFA_RNR_RETRY:
if (optlen != sizeof(size_t))
Expand Down
12 changes: 5 additions & 7 deletions prov/efa/test/efa_unit_test_srx.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,19 @@ void test_efa_srx_min_multi_recv_size(struct efa_resource **state)
struct util_srx_ctx *srx_ctx;
size_t min_multi_recv_size_new;

efa_unit_test_resource_construct(resource, FI_EP_RDM);
efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM);

efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);
srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep);
/*
* After ep is enabled, the srx->min_multi_recv_size should be
* exactly the same with ep->min_multi_recv_size
*/
assert_true(efa_rdm_ep->min_multi_recv_size == srx_ctx->min_multi_recv_size);
/* Set a new min_multi_recv_size via setopt*/
min_multi_recv_size_new = 1024;
assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV,
&min_multi_recv_size_new, sizeof(min_multi_recv_size_new)), 0);

/* Enable EP */
assert_int_equal(fi_enable(resource->ep), FI_SUCCESS);

/* Check whether srx->min_multi_recv_size is set correctly */
srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep);
assert_true(srx_ctx->min_multi_recv_size == min_multi_recv_size_new);
}

Expand Down

0 comments on commit 38adf06

Please sign in to comment.