diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index 776a7da4530..a1a3444146b 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -161,13 +161,13 @@ static void efa_rdm_cntr_progress(struct util_cntr *cntr) * some idle endpoints and never poll completions for them. Move these initial posts to * the first polling before having a long term fix. */ - if (!efa_cntr->initial_rx_to_all_eps_posted) { + if (efa_cntr->need_to_scan_ep_list) { dlist_foreach(&cntr->ep_list, item) { fid_entry = container_of(item, struct fid_list_entry, entry); efa_rdm_ep = container_of(fid_entry->fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid); efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep); } - efa_cntr->initial_rx_to_all_eps_posted = true; + efa_cntr->need_to_scan_ep_list = false; } dlist_foreach(&efa_cntr->ibv_cq_poll_list, item) { @@ -193,7 +193,7 @@ int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, return -FI_ENOMEM; dlist_init(&cntr->ibv_cq_poll_list); - cntr->initial_rx_to_all_eps_posted = false; + cntr->need_to_scan_ep_list = false; efa_domain = container_of(domain, struct efa_domain, util_domain.domain_fid); diff --git a/prov/efa/src/efa_cntr.h b/prov/efa/src/efa_cntr.h index 05227159d49..bcfde8784a2 100644 --- a/prov/efa/src/efa_cntr.h +++ b/prov/efa/src/efa_cntr.h @@ -13,7 +13,7 @@ struct efa_cntr { struct fid_cntr *shm_cntr; struct dlist_entry ibv_cq_poll_list; /* Only used by RDM EP type */ - bool initial_rx_to_all_eps_posted; + bool need_to_scan_ep_list; }; int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index ae48a37807b..d7380623cd9 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -631,13 +631,13 @@ static void efa_rdm_cq_progress(struct util_cq *cq) * some idle endpoints and never poll completions for them. Move these initial posts to * the first cq read call before having a long term fix. */ - if (!efa_rdm_cq->initial_rx_to_all_eps_posted) { + if (efa_rdm_cq->need_to_scan_ep_list) { dlist_foreach(&cq->ep_list, item) { fid_entry = container_of(item, struct fid_list_entry, entry); efa_rdm_ep = container_of(fid_entry->fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid); efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep); } - efa_rdm_cq->initial_rx_to_all_eps_posted = true; + efa_rdm_cq->need_to_scan_ep_list = false; } dlist_foreach(&efa_rdm_cq->ibv_cq_poll_list, item) { @@ -683,7 +683,7 @@ int efa_rdm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, attr->size = MAX(efa_domain->rdm_cq_size, attr->size); dlist_init(&cq->ibv_cq_poll_list); - cq->initial_rx_to_all_eps_posted = false; + cq->need_to_scan_ep_list = false; ret = ofi_cq_init(&efa_prov, domain, attr, &cq->util_cq, &efa_rdm_cq_progress, context); diff --git a/prov/efa/src/rdm/efa_rdm_cq.h b/prov/efa/src/rdm/efa_rdm_cq.h index 5bb7b2b80c0..4e88a8b7f63 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.h +++ b/prov/efa/src/rdm/efa_rdm_cq.h @@ -12,7 +12,7 @@ struct efa_rdm_cq { struct fid_cq *shm_cq; struct efa_ibv_cq ibv_cq; struct dlist_entry ibv_cq_poll_list; - bool initial_rx_to_all_eps_posted; + bool need_to_scan_ep_list; }; /* diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 282a384e325..5adcec05dff 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -1174,6 +1174,9 @@ int efa_rdm_ep_insert_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep) if (ret) return ret; } + ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock); + efa_cntr->need_to_scan_ep_list = true; + ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock); } } @@ -1199,6 +1202,9 @@ int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) if (ret) return ret; } + ofi_genlock_lock(&tx_cq->util_cq.ep_list_lock); + tx_cq->need_to_scan_ep_list = true; + ofi_genlock_unlock(&tx_cq->util_cq.ep_list_lock); } if (rx_cq) { @@ -1211,6 +1217,9 @@ int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) if (ret) return ret; } + ofi_genlock_lock(&rx_cq->util_cq.ep_list_lock); + rx_cq->need_to_scan_ep_list = true; + ofi_genlock_unlock(&rx_cq->util_cq.ep_list_lock); } return FI_SUCCESS; diff --git a/prov/efa/test/efa_unit_test_cntr.c b/prov/efa/test/efa_unit_test_cntr.c index aeb44d51195..2aa2ea60927 100644 --- a/prov/efa/test/efa_unit_test_cntr.c +++ b/prov/efa/test/efa_unit_test_cntr.c @@ -121,7 +121,8 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) efa_cntr = container_of(cntr, struct efa_cntr, util_cntr.cntr_fid); - assert_false(efa_cntr->initial_rx_to_all_eps_posted); + /* cntr read need to scan the ep list since a ep is bind */ + assert_true(efa_cntr->need_to_scan_ep_list); cnt = fi_cntr_read(cntr); /* No completion should be read */ @@ -132,7 +133,8 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0); assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0); - assert_true(efa_cntr->initial_rx_to_all_eps_posted); + /* scan is done */ + assert_false(efa_cntr->need_to_scan_ep_list); /* ep must be closed before cq/av/eq... */ fi_close(&resource->ep->fid); resource->ep = NULL; diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index a34f8e3f9bf..a130ad460c2 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -580,7 +580,8 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state) assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, 0); assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0); - assert_false(efa_rdm_cq->initial_rx_to_all_eps_posted); + /* cq read need to scan the ep list since a ep is bind */ + assert_true(efa_rdm_cq->need_to_scan_ep_list); fi_cq_read(resource->cq, NULL, 0); /* At this time, rx pool size number of rx pkts are posted */ @@ -588,7 +589,8 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state) assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0); assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0); - assert_true(efa_rdm_cq->initial_rx_to_all_eps_posted); + /* scan is done */ + assert_false(efa_rdm_cq->need_to_scan_ep_list); } #if HAVE_EFADV_CQ_EX /**