From e80fd9fb1b3caf19bdf4d9348ba13ed0220cd4eb Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Tue, 10 Sep 2024 14:48:46 -0700 Subject: [PATCH] src: Continue to resolve build errors encountered in CI --- src/shmem_synchronization.h | 81 ++++++++++++++++++++++--------------- src/synchronization_c.c4 | 38 ++++++++--------- src/transport_none.h | 6 +-- src/transport_portals4.h | 8 ++-- src/transport_ucx.h | 8 ++-- 5 files changed, 79 insertions(+), 62 deletions(-) diff --git a/src/shmem_synchronization.h b/src/shmem_synchronization.h index 4e15a926..de9f0026 100644 --- a/src/shmem_synchronization.h +++ b/src/shmem_synchronization.h @@ -99,42 +99,59 @@ shmem_internal_fence(shmem_ctx_t ctx) #define SHMEM_TEST(type, a, b, ret) COMP(type, SYNC_LOAD(a), b, ret) -#define SHMEM_WAIT_POLL(var, value) \ - do { \ - while (SYNC_LOAD(var) == value) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { \ - shmem_transport_probe(nic_idx); \ - SPINLOCK_BODY(); \ - } \ - } \ +#ifdef USE_OFI +#define SHMEM_TRANSPORT_PROBE_ALL() \ + do { \ + for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { \ + shmem_transport_probe(nic_idx); \ + } \ + } while (0); +#define SHMEM_TRANSPORT_PROBE_ALL_SPINLOCK() \ + do { \ + for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { \ + shmem_transport_probe(nic_idx); \ + SPINLOCK_BODY(); \ + } \ + } while (0); +#else +#define SHMEM_TRANSPORT_PROBE_ALL() \ + do { \ + shmem_transport_probe(0); \ + } while (0); +#define SHMEM_TRANSPORT_PROBE_ALL_SPINLOCK() \ + do { \ + shmem_transport_probe(0); \ + SPINLOCK_BODY(); \ + } while (0); +#endif + +#define SHMEM_WAIT_POLL(var, value) \ + do { \ + while (SYNC_LOAD(var) == value) { \ + SHMEM_TRANSPORT_PROBE_ALL_SPINLOCK(); \ + } \ } while(0) -#define SHMEM_WAIT_UNTIL_POLL(var, cond, value) \ - do { \ - int cmpret; \ - \ - COMP(cond, SYNC_LOAD(var), value, cmpret); \ - while (!cmpret) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { \ - shmem_transport_probe(nic_idx); \ - SPINLOCK_BODY(); \ - } \ - COMP(cond, SYNC_LOAD(var), value, cmpret); \ - } \ +#define SHMEM_WAIT_UNTIL_POLL(var, cond, value) \ + do { \ + int cmpret; \ + \ + COMP(cond, SYNC_LOAD(var), value, cmpret); \ + while (!cmpret) { \ + SHMEM_TRANSPORT_PROBE_ALL_SPINLOCK(); \ + COMP(cond, SYNC_LOAD(var), value, cmpret); \ + } \ } while(0) -#define SHMEM_SIGNAL_WAIT_UNTIL_POLL(var, cond, value, sat_value) \ - do { \ - int cmpret; \ - \ - COMP_SIGNAL(cond, SYNC_LOAD(var), value, cmpret, sat_value); \ - while (!cmpret) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { \ - shmem_transport_probe(nic_idx); \ - SPINLOCK_BODY(); \ - } \ - COMP_SIGNAL(cond, SYNC_LOAD(var), value, cmpret, sat_value); \ - } \ +#define SHMEM_SIGNAL_WAIT_UNTIL_POLL(var, cond, value, sat_value) \ + do { \ + int cmpret; \ + \ + COMP_SIGNAL(cond, SYNC_LOAD(var), value, cmpret, sat_value); \ + while (!cmpret) { \ + SHMEM_TRANSPORT_PROBE_ALL_SPINLOCK(); \ + COMP_SIGNAL(cond, SYNC_LOAD(var), value, cmpret, sat_value); \ + } \ } while(0) #define SHMEM_WAIT_BLOCK(var, value) \ diff --git a/src/synchronization_c.c4 b/src/synchronization_c.c4 index 6fe67b64..271bc065 100644 --- a/src/synchronization_c.c4 +++ b/src/synchronization_c.c4 @@ -232,7 +232,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return; \ } \ \ @@ -268,7 +268,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ALL') } \ \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); } \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return; \ } \ \ @@ -304,7 +304,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ALL_VECTOR') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return SIZE_MAX; \ } \ \ @@ -324,7 +324,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ALL_VECTOR') } \ } \ } \ - if (!cmpret) for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + if (!cmpret) SHMEM_TRANSPORT_PROBE_ALL(); \ } \ \ shmem_internal_membar_acq_rel(); \ @@ -354,7 +354,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return SIZE_MAX; \ } \ \ @@ -374,7 +374,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY') } \ } \ } \ - if (!cmpret) for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + if (!cmpret) SHMEM_TRANSPORT_PROBE_ALL(); \ } \ \ shmem_internal_membar_acq_rel(); \ @@ -408,7 +408,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY_VECTOR') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return 0; \ } \ \ @@ -423,7 +423,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY_VECTOR') } \ } \ } \ - if (!cmpret) for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + if (!cmpret) SHMEM_TRANSPORT_PROBE_ALL(); \ } \ shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ @@ -456,7 +456,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_SOME') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return 0; \ } \ \ @@ -471,7 +471,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_SOME') } \ } \ } \ - if (!cmpret) for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + if (!cmpret) SHMEM_TRANSPORT_PROBE_ALL(); \ } \ shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ @@ -495,7 +495,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_SOME_VECTOR') shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ } else { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ } \ return cmpret; \ } @@ -520,7 +520,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST') int cmpret; \ SHMEM_TEST(cond, &vars[i], value, cmpret); \ if (!cmpret) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return 0; \ } \ } \ @@ -551,7 +551,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ALL') int cmpret; \ SHMEM_TEST(cond, &vars[i], values[i], cmpret); \ if (!cmpret) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return 0; \ } \ } \ @@ -596,7 +596,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ALL_VECTOR') shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ } else \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ \ return found_idx; \ } @@ -635,7 +635,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ANY') shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ } else \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ \ return found_idx; \ } @@ -666,7 +666,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ANY_VECTOR') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return 0; \ } \ \ @@ -680,7 +680,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ANY_VECTOR') } \ } \ } \ - if (!cmpret) for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + if (!cmpret) SHMEM_TRANSPORT_PROBE_ALL(); \ shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ return ncompleted; \ @@ -712,7 +712,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_SOME') } \ } \ if (nelems == 0 || num_ignored == nelems) { \ - for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + SHMEM_TRANSPORT_PROBE_ALL(); \ return 0; \ } \ \ @@ -726,7 +726,7 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_SOME') } \ } \ } \ - if (!cmpret) for (size_t nic_idx = 0; nic_idx < shmem_transport_ofi_num_nics; nic_idx++) { shmem_transport_probe( nic_idx ); }; \ + if (!cmpret) SHMEM_TRANSPORT_PROBE_ALL(); \ shmem_internal_membar_acq_rel(); \ shmem_transport_syncmem(); \ return ncompleted; \ diff --git a/src/transport_none.h b/src/transport_none.h index 3411be4a..63c6cb01 100644 --- a/src/transport_none.h +++ b/src/transport_none.h @@ -120,7 +120,7 @@ shmem_transport_put_scalar(shmem_transport_ctx_t* ctx, void *target, const void static inline void shmem_transport_put_nb(shmem_transport_ctx_t* ctx, void *target, const void *source, size_t len, - int pe, long *completion) + int pe, long *completion, size_t nic_idx) { RAISE_ERROR_STR("No path to peer"); } @@ -135,7 +135,7 @@ shmem_transport_put_signal_nbi(shmem_transport_ctx_t* ctx, void *target, const v static inline void -shmem_transport_put_wait(shmem_transport_ctx_t* ctx, long *completion, size_t nic_idx) +shmem_transport_put_wait(shmem_transport_ctx_t* ctx, long *completion) { /* No op */ } @@ -217,7 +217,7 @@ shmem_transport_atomic(shmem_transport_ctx_t* ctx, void *target, const void *sou static inline void shmem_transport_atomicv(shmem_transport_ctx_t* ctx, void *target, const void *source, size_t len, - int pe, shm_internal_op_t op, shm_internal_datatype_t datatype, long *completion) + int pe, shm_internal_op_t op, shm_internal_datatype_t datatype, long *completion, size_t nic_idx) { RAISE_ERROR_STR("No path to peer"); } diff --git a/src/transport_portals4.h b/src/transport_portals4.h index 46f1aed8..c93bb624 100644 --- a/src/transport_portals4.h +++ b/src/transport_portals4.h @@ -762,7 +762,7 @@ shmem_transport_swap_nbi(shmem_transport_ctx_t* ctx, void *target, int pe, ptl_datatype_t datatype, size_t nic_idx) { /* transport_swap already buffers the source argument */ - shmem_transport_swap(ctx, target, source, dest, len, pe, datatype); + shmem_transport_swap(ctx, target, source, dest, len, pe, datatype, nic_idx); } @@ -814,7 +814,7 @@ shmem_transport_cswap_nbi(shmem_transport_ctx_t* ctx, void *target, ptl_datatype_t datatype, size_t nic_idx) { /* transport_cswap already buffers the source and operand arguments */ - shmem_transport_cswap(ctx, target, source, dest, operand, len, pe, datatype); + shmem_transport_cswap(ctx, target, source, dest, operand, len, pe, datatype, nic_idx); } @@ -894,7 +894,7 @@ shmem_transport_atomic(shmem_transport_ctx_t* ctx, void *target, const void *sou static inline void shmem_transport_atomicv(shmem_transport_ctx_t* ctx, void *target, const void *source, size_t len, int pe, - ptl_op_t op, ptl_datatype_t datatype, long *completion) + ptl_op_t op, ptl_datatype_t datatype, long *completion, size_t nic_idx) { int ret; ptl_pt_index_t pt; @@ -1064,7 +1064,7 @@ shmem_transport_fetch_atomic_nbi(shmem_transport_ctx_t* ctx, void *target, ptl_datatype_t datatype, size_t nic_idx) { /* transport_fetch_atomic already buffers the source argument */ - shmem_transport_fetch_atomic(ctx, target, source, dest, len, pe, op, datatype); + shmem_transport_fetch_atomic(ctx, target, source, dest, len, pe, op, datatype, nic_idx); } diff --git a/src/transport_ucx.h b/src/transport_ucx.h index 19ec9fe0..7e74710e 100644 --- a/src/transport_ucx.h +++ b/src/transport_ucx.h @@ -252,7 +252,7 @@ shmem_transport_put_scalar(shmem_transport_ctx_t* ctx, void *target, const void static inline void shmem_transport_put_nb(shmem_transport_ctx_t* ctx, void *target, const void *source, size_t len, - int pe, long *completion) + int pe, long *completion, size_t nic_idx) { ucs_status_t status; ucp_rkey_h rkey; @@ -520,7 +520,7 @@ shmem_transport_atomic(shmem_transport_ctx_t* ctx, void *target, const void *sou static inline void shmem_transport_atomicv(shmem_transport_ctx_t* ctx, void *target, const void *source, size_t len, - int pe, shm_internal_op_t op, shm_internal_datatype_t datatype, long *completion) + int pe, shm_internal_op_t op, shm_internal_datatype_t datatype, long *completion, size_t nic_idx) { /* Used only by reductions, currently redirected to softwre reductions via * the shmem_transport_atomic_supported query below. */ @@ -690,11 +690,11 @@ shmem_transport_mswap(shmem_transport_ctx_t* ctx, void *target, const void *sour while (!done) { uint32_t v; - shmem_transport_atomic_fetch(ctx, &v, target, len, pe, datatype); + shmem_transport_atomic_fetch(ctx, &v, target, len, pe, datatype, nic_idx); uint32_t new = (v & ~*(uint32_t *)mask) | (*(uint32_t *)source & *(uint32_t *)mask); - shmem_transport_cswap(ctx, target, &new, dest, &v, len, pe, datatype); + shmem_transport_cswap(ctx, target, &new, dest, &v, len, pe, datatype, nic_idx); if (*(uint32_t *)dest == v) done = 1; /* Manual progress to avoid deadlock for application-level polling */