diff --git a/src/mpi/errhan/errnames.txt b/src/mpi/errhan/errnames.txt index 1f63d201396..334b7633378 100644 --- a/src/mpi/errhan/errnames.txt +++ b/src/mpi/errhan/errnames.txt @@ -950,6 +950,11 @@ is too big (> MPIU_SHMW_GHND_SZ) **set_thread_affinity:Failed to set the async thread affinity **set_thread_affinity %d:Failed to set the async thread affinity to the logical processor [%d] +## MPIX_Stream +**ch3nostream:Stream is not supported in ch3. +**ch4nostream:No streams available. Configure --enable-thread-cs=per-vci and --with-ch4-max-vcis=# to enable streams. +**outofstream:No streams available. Use MPIR_CVAR_CH4_RESERVE_VCIS to reserve the number of streams can be allocated. + # ----------------------------------------------------------------------------- # The following names are defined but not used (see the -careful option # for extracterrmsgs) (still to do: move the undefined names here) diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h index c2e21435593..9d8e831edb6 100644 --- a/src/mpid/ch3/include/mpidpre.h +++ b/src/mpid/ch3/include/mpidpre.h @@ -532,6 +532,9 @@ int MPID_Init(int required, int *provided); int MPID_InitCompleted( void ); +int MPID_Allocate_vci(int *vci); +int MPID_Deallocate_vci(int vci); + int MPID_Finalize(void); int MPID_Abort( MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg ); diff --git a/src/mpid/ch3/src/mpid_init.c b/src/mpid/ch3/src/mpid_init.c index 3218c978ec7..61475aa2c3f 100644 --- a/src/mpid/ch3/src/mpid_init.c +++ b/src/mpid/ch3/src/mpid_init.c @@ -265,6 +265,19 @@ int MPID_InitCompleted( void ) /* --END ERROR HANDLING-- */ } +int MPID_Allocate_vci(int *vci) +{ + int mpi_errno = MPI_SUCCESS; + *vci = 0; + MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3nostream"); + return mpi_errno; +} + +int MPID_Deallocate_vci(int vci) +{ + MPIR_Assert(0); + return MPI_SUCCESS; +} /* * Initialize the process group structure by using PMI calls. * This routine initializes PMI and uses PMI calls to setup the diff --git a/src/mpid/ch4/include/mpidch4.h b/src/mpid/ch4/include/mpidch4.h index 59983db2cf0..c783a3158e4 100644 --- a/src/mpid/ch4/include/mpidch4.h +++ b/src/mpid/ch4/include/mpidch4.h @@ -14,6 +14,8 @@ int MPID_Init(int, int *); int MPID_InitCompleted(void); +int MPID_Allocate_vci(int *vci); +int MPID_Deallocate_vci(int vci); MPL_STATIC_INLINE_PREFIX int MPID_Cancel_recv(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX; MPL_STATIC_INLINE_PREFIX int MPID_Cancel_send(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX; int MPID_Comm_disconnect(MPIR_Comm *); diff --git a/src/mpid/ch4/netmod/ofi/ofi_impl.h b/src/mpid/ch4/netmod/ofi/ofi_impl.h index c98b6d3147e..40d68780460 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_impl.h +++ b/src/mpid/ch4/netmod/ofi/ofi_impl.h @@ -50,7 +50,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_get_vni(int flag, MPIR_Comm * comm_ptr, #if MPIDI_CH4_MAX_VCIS == 1 return 0; #else - return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_OFI_global.num_vnis; + int vni = MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag); + MPIR_Assert(vni < MPIDI_OFI_global.num_vnis); + return vni; #endif } diff --git a/src/mpid/ch4/netmod/ofi/ofi_init.c b/src/mpid/ch4/netmod/ofi/ofi_init.c index 3c4e235502d..1328e533fe5 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_init.c +++ b/src/mpid/ch4/netmod/ofi/ofi_init.c @@ -242,18 +242,6 @@ categories : minor version of the OFI library used with MPICH. If using this CVAR, it is recommended that the user also specifies a specific OFI provider. - - name : MPIR_CVAR_CH4_OFI_MAX_VNIS - category : CH4_OFI - type : int - default : 0 - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_LOCAL - description : >- - If set to positive, this CVAR specifies the maximum number of CH4 VNIs - that OFI netmod exposes. If set to 0 (the default) or bigger than - MPIR_CVAR_CH4_NUM_VCIS, the number of exposed VNIs is set to MPIR_CVAR_CH4_NUM_VCIS. - - name : MPIR_CVAR_CH4_OFI_MAX_RMA_SEP_CTX category : CH4_OFI type : int @@ -613,22 +601,10 @@ int MPIDI_OFI_init_local(int *tag_bits) /* Create transport level communication contexts. */ /* ------------------------------------------------------------------------ */ - int num_vnis = 1; - if (MPIR_CVAR_CH4_OFI_MAX_VNIS == 0 || MPIR_CVAR_CH4_OFI_MAX_VNIS > MPIDI_global.n_vcis) { - num_vnis = MPIDI_global.n_vcis; - } else { - num_vnis = MPIR_CVAR_CH4_OFI_MAX_VNIS; - } - - /* TODO: update num_vnis according to provider capabilities, such as - * prov_use->domain_attr->{tx,rx}_ctx_cnt + /* TODO: check provider capabilities, such as prov_use->domain_attr->{tx,rx}_ctx_cnt, + * abort if we can't support the requested number of vnis. */ - if (num_vnis > MPIDI_OFI_MAX_VNIS) { - num_vnis = MPIDI_OFI_MAX_VNIS; - } - /* for best performance, we ensure 1-to-1 vci/vni mapping. ref: MPIDI_OFI_vci_to_vni */ - /* TODO: allow less num_vnis. Option 1. runtime MOD; 2. override MPIDI_global.n_vcis */ - MPIR_Assert(num_vnis == MPIDI_global.n_vcis); + int num_vnis = MPIDI_global.n_total_vcis; /* Multiple vni without using domain require MPIDI_OFI_ENABLE_SCALABLE_ENDPOINTS */ #ifndef MPIDI_OFI_VNI_USE_DOMAIN diff --git a/src/mpid/ch4/netmod/ofi/ofi_win.c b/src/mpid/ch4/netmod/ofi/ofi_win.c index 61b1b3afad1..9592ab02f14 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_win.c +++ b/src/mpid/ch4/netmod/ofi/ofi_win.c @@ -310,7 +310,7 @@ static int win_set_per_win_sync(MPIR_Win * win) static void win_init_am(MPIR_Win * win) { - MPIDI_WIN(win, am_vci) %= MPIDI_OFI_global.num_vnis; + MPIR_Assert(MPIDI_WIN(win, am_vci) < MPIDI_OFI_global.num_vnis); } /* diff --git a/src/mpid/ch4/netmod/ucx/ucx_am.h b/src/mpid/ch4/netmod/ucx/ucx_am.h index 35818ee5f87..5672d9c62e0 100644 --- a/src/mpid/ch4/netmod/ucx/ucx_am.h +++ b/src/mpid/ch4/netmod/ucx/ucx_am.h @@ -54,8 +54,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_isend(int rank, MPIR_FUNC_ENTER; - int src_vni = src_vci % MPIDI_UCX_global.num_vnis; - int dst_vni = dst_vci % MPIDI_UCX_global.num_vnis; + int src_vni = src_vci; + int dst_vni = dst_vci; + MPIR_Assert(src_vni < MPIDI_UCX_global.num_vnis); + MPIR_Assert(dst_vni < MPIDI_UCX_global.num_vnis); ep = MPIDI_UCX_COMM_TO_EP(comm, rank, src_vni, dst_vni); int dt_contig; @@ -186,8 +188,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_send_hdr(int rank, MPIR_FUNC_ENTER; - int src_vni = src_vci % MPIDI_UCX_global.num_vnis; - int dst_vni = dst_vci % MPIDI_UCX_global.num_vnis; + int src_vni = src_vci; + int dst_vni = dst_vci; + MPIR_Assert(src_vni < MPIDI_UCX_global.num_vnis); + MPIR_Assert(dst_vni < MPIDI_UCX_global.num_vnis); ep = MPIDI_UCX_COMM_TO_EP(comm, rank, src_vni, dst_vni); /* initialize our portion of the hdr */ diff --git a/src/mpid/ch4/netmod/ucx/ucx_impl.h b/src/mpid/ch4/netmod/ucx/ucx_impl.h index 471ff9bbcc2..eaee42e02bb 100644 --- a/src/mpid/ch4/netmod/ucx/ucx_impl.h +++ b/src/mpid/ch4/netmod/ucx/ucx_impl.h @@ -120,7 +120,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_UCX_vci_to_vni(int vci) MPL_STATIC_INLINE_PREFIX int MPIDI_UCX_get_vni(int flag, MPIR_Comm * comm_ptr, int src_rank, int dst_rank, int tag) { - return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_UCX_global.num_vnis; + int vni; + return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag); + MPIR_Assert(vni < MPIDI_UCX_global.num_vnis); + return vni; } /* for rma, we need ensure rkey is consistent with the per-vni ep, diff --git a/src/mpid/ch4/netmod/ucx/ucx_init.c b/src/mpid/ch4/netmod/ucx/ucx_init.c index 63113575ef2..771262c6b19 100644 --- a/src/mpid/ch4/netmod/ucx/ucx_init.c +++ b/src/mpid/ch4/netmod/ucx/ucx_init.c @@ -8,29 +8,6 @@ #include "mpidu_bc.h" #include -/* -=== BEGIN_MPI_T_CVAR_INFO_BLOCK === - -categories : - - name : CH4_UCX - description : A category for CH4 UCX netmod variables - -cvars: - - name : MPIR_CVAR_CH4_UCX_MAX_VNIS - category : CH4_UCX - type : int - default : 0 - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_LOCAL - description : >- - If set to positive, this CVAR specifies the maximum number of CH4 VNIs - that UCX netmod exposes. If set to 0 (the default) or bigger than - MPIR_CVAR_CH4_NUM_VCIS, the number of exposed VNIs is set to MPIR_CVAR_CH4_NUM_VCIS. - -=== END_MPI_T_CVAR_INFO_BLOCK === -*/ - static void request_init_callback(void *request); static void request_init_callback(void *request) @@ -43,18 +20,8 @@ static void request_init_callback(void *request) static void init_num_vnis(void) { - int num_vnis = 1; - if (MPIR_CVAR_CH4_UCX_MAX_VNIS == 0 || MPIR_CVAR_CH4_UCX_MAX_VNIS > MPIDI_global.n_vcis) { - num_vnis = MPIDI_global.n_vcis; - } else { - num_vnis = MPIR_CVAR_CH4_UCX_MAX_VNIS; - } - - /* for best performance, we ensure 1-to-1 vci/vni mapping. ref: MPIDI_OFI_vci_to_vni */ - /* TODO: allow less num_vnis. Option 1. runtime MOD; 2. override MPIDI_global.n_vcis */ - MPIR_Assert(num_vnis == MPIDI_global.n_vcis); - - MPIDI_UCX_global.num_vnis = num_vnis; + /* TODO: check capabilities, abort if we can't support the requested number of vnis. */ + MPIDI_UCX_global.num_vnis = MPIDI_global.n_total_vcis; } static int init_worker(int vni) diff --git a/src/mpid/ch4/netmod/ucx/ucx_win.c b/src/mpid/ch4/netmod/ucx/ucx_win.c index d81cef1e82d..f6576f8a6be 100644 --- a/src/mpid/ch4/netmod/ucx/ucx_win.c +++ b/src/mpid/ch4/netmod/ucx/ucx_win.c @@ -176,7 +176,7 @@ static int win_init(MPIR_Win * win) int mpi_errno = MPI_SUCCESS; MPIR_FUNC_ENTER; - MPIDI_WIN(win, am_vci) %= MPIDI_UCX_global.num_vnis; + MPIR_Assert(MPIDI_WIN(win, am_vci) < MPIDI_UCX_global.num_vnis); memset(&MPIDI_UCX_WIN(win), 0, sizeof(MPIDI_UCX_win_t)); diff --git a/src/mpid/ch4/shm/posix/posix_init.c b/src/mpid/ch4/shm/posix/posix_init.c index 43fa4c8c5c1..ba6cb2853ef 100644 --- a/src/mpid/ch4/shm/posix/posix_init.c +++ b/src/mpid/ch4/shm/posix/posix_init.c @@ -150,7 +150,7 @@ int MPIDI_POSIX_init_local(int *tag_bits /* unused */) MPIDI_POSIX_global.local_rank_0 = local_rank_0; - MPIDI_POSIX_global.num_vsis = MPIDI_global.n_vcis; + MPIDI_POSIX_global.num_vsis = MPIDI_global.n_total_vcis; /* This is used to track messages that the eager submodule was not ready to send. */ for (int vsi = 0; vsi < MPIDI_CH4_MAX_VCIS; vsi++) { mpi_errno = MPIDU_genq_private_pool_create_unsafe(MPIDI_POSIX_AM_HDR_POOL_CELL_SIZE, diff --git a/src/mpid/ch4/src/ch4_init.c b/src/mpid/ch4/src/ch4_init.c index c54c2226301..15acbd51d37 100644 --- a/src/mpid/ch4/src/ch4_init.c +++ b/src/mpid/ch4/src/ch4_init.c @@ -82,7 +82,17 @@ verbosity : MPI_T_VERBOSITY_USER_BASIC scope : MPI_T_SCOPE_LOCAL description : >- - Sets the number of VCIs that user needs (should be a subset of MPIDI_CH4_MAX_VCIS). + Sets the number of VCIs to be implicitly used (should be a subset of MPIDI_CH4_MAX_VCIS). + + - name : MPIR_CVAR_CH4_RESERVE_VCIS + category : CH4 + type : int + default : 0 + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_LOCAL + description : >- + Sets the number of VCIs that user can explicitly allocate (should be a subset of MPIDI_CH4_MAX_VCIS). - name : MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE category : COLLECTIVE @@ -404,16 +414,17 @@ int MPID_Init(int requested, int *provided) /* Initialize multiple VCIs */ /* TODO: add checks to ensure MPIDI_vci_t is padded or aligned to MPL_CACHELINE_SIZE */ - MPIDI_global.n_vcis = 1; - if (MPIR_CVAR_CH4_NUM_VCIS > 1) { - MPIDI_global.n_vcis = MPIR_CVAR_CH4_NUM_VCIS; - /* There are configured maxes that we need observe. */ - /* TODO: check them at configure time to avoid discrepancy */ - MPIR_Assert(MPIDI_global.n_vcis <= MPIDI_CH4_MAX_VCIS); - MPIR_Assert(MPIDI_global.n_vcis <= MPIR_REQUEST_NUM_POOLS); - } + MPIR_Assert(MPIR_CVAR_CH4_NUM_VCIS >= 1); /* number of vcis used in implicit vci hashing */ + MPIR_Assert(MPIR_CVAR_CH4_RESERVE_VCIS >= 0); /* maximum number of vcis can be reserved */ + + MPIDI_global.n_vcis = MPIR_CVAR_CH4_NUM_VCIS; + MPIDI_global.n_total_vcis = MPIDI_global.n_vcis + MPIR_CVAR_CH4_RESERVE_VCIS; + MPIDI_global.n_reserved_vcis = 0; + + MPIR_Assert(MPIDI_global.n_total_vcis <= MPIDI_CH4_MAX_VCIS); + MPIR_Assert(MPIDI_global.n_total_vcis <= MPIR_REQUEST_NUM_POOLS); - for (int i = 0; i < MPIDI_global.n_vcis; i++) { + for (int i = 0; i < MPIDI_global.n_total_vcis; i++) { int err; MPID_Thread_mutex_create(&MPIDI_VCI(i).lock, &err); MPIR_Assert(err == 0); @@ -507,6 +518,40 @@ int MPID_InitCompleted(void) goto fn_exit; } +int MPID_Allocate_vci(int *vci) +{ + int mpi_errno = MPI_SUCCESS; + + *vci = 0; +#if MPIDI_CH4_MAX_VCIS == 1 + MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch4nostream"); +#else + + if (MPIDI_global.n_vcis + MPIDI_global.n_reserved_vcis >= MPIDI_global.n_total_vcis) { + MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**outofstream"); + } else { + MPIDI_global.n_reserved_vcis++; + for (int i = MPIDI_global.n_vcis; i < MPIDI_global.n_total_vcis; i++) { + if (!MPIDI_VCI(i).allocated) { + MPIDI_VCI(i).allocated = true; + *vci = i; + break; + } + } + } +#endif + return mpi_errno; +} + +int MPID_Deallocate_vci(int vci) +{ + MPIR_Assert(vci < MPIDI_global.n_total_vcis && vci >= MPIDI_global.n_vcis); + MPIR_Assert(MPIDI_VCI(vci).allocated); + MPIDI_VCI(vci).allocated = false; + MPIDI_global.n_reserved_vcis--; + return MPI_SUCCESS; +} + int MPID_Finalize(void) { int mpi_errno; @@ -539,7 +584,7 @@ int MPID_Finalize(void) MPIR_Assert(err == 0); } - for (int i = 0; i < MPIDI_global.n_vcis; i++) { + for (int i = 0; i < MPIDI_global.n_total_vcis; i++) { int err; MPID_Thread_mutex_destroy(&MPIDI_VCI(i).lock, &err); MPIR_Assert(err == 0); diff --git a/src/mpid/ch4/src/ch4_progress.h b/src/mpid/ch4/src/ch4_progress.h index 74bf74d2451..5ba74f8c59b 100644 --- a/src/mpid/ch4/src/ch4_progress.h +++ b/src/mpid/ch4/src/ch4_progress.h @@ -47,7 +47,8 @@ extern int global_vci_poll_count; MPL_STATIC_INLINE_PREFIX int MPIDI_do_global_progress(void) { - if (MPIDI_global.n_vcis == 1 || !MPIDI_global.is_initialized || !MPIR_CVAR_CH4_GLOBAL_PROGRESS) { + if (MPIDI_global.n_total_vcis == 1 || !MPIDI_global.is_initialized || + !MPIR_CVAR_CH4_GLOBAL_PROGRESS) { return 0; } else { global_vci_poll_count++; @@ -153,7 +154,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_progress_test(MPID_Progress_state * state, in #else /* multiple vci */ if (MPIDI_do_global_progress()) { - for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) { + for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) { MPIDI_PROGRESS(vci); if (wait) { MPIDI_check_progress_made_vci(state, vci); @@ -201,10 +202,10 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_progress_state_init(MPID_Progress_state * st state->vci_count = 1; } else { /* global progress by default */ - for (int i = 0; i < MPIDI_global.n_vcis; i++) { + for (int i = 0; i < MPIDI_global.n_total_vcis; i++) { state->vci[i] = i; } - state->vci_count = MPIDI_global.n_vcis; + state->vci_count = MPIDI_global.n_total_vcis; } } @@ -215,8 +216,9 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_progress_state_init_count(MPID_Progress_stat #if MPIDI_CH4_MAX_VCIS == 1 state->progress_counts[0] = MPL_atomic_relaxed_load_int(&MPIDI_VCI(0).progress_count); #else - for (int i = 0; i < MPIDI_global.n_vcis; i++) { - state->progress_counts[i] = MPL_atomic_relaxed_load_int(&MPIDI_VCI(i).progress_count); + for (int i = 0; i < state->vci_count; i++) { + state->progress_counts[i] = + MPL_atomic_relaxed_load_int(&MPIDI_VCI(state->vci[i]).progress_count); } #endif } diff --git a/src/mpid/ch4/src/ch4_types.h b/src/mpid/ch4/src/ch4_types.h index e12430eaa2e..45d47795090 100644 --- a/src/mpid/ch4/src/ch4_types.h +++ b/src/mpid/ch4/src/ch4_types.h @@ -254,6 +254,7 @@ typedef struct MPIDI_per_vci { MPL_atomic_uint64_t exp_seq_no; MPL_atomic_uint64_t nxt_seq_no; + bool allocated; char pad MPL_ATTR_ALIGNED(MPL_CACHELINE_SIZE); } MPIDI_per_vci_t; @@ -279,7 +280,9 @@ typedef struct MPIDI_CH4_Global_t { int my_sigusr1_count; #endif - int n_vcis; + int n_vcis; /* num of vcis used for implicit hashing */ + int n_reserved_vcis; /* num of reserved vcis */ + int n_total_vcis; /* total num of vcis, must > n_vcis + n_reserved_vcis */ MPIDI_per_vci_t per_vci[MPIDI_CH4_MAX_VCIS]; #if defined(MPIDI_CH4_USE_WORK_QUEUES) diff --git a/src/mpid/ch4/src/ch4_vci.h b/src/mpid/ch4/src/ch4_vci.h index 0d8a4258b50..646a9482243 100644 --- a/src/mpid/ch4/src/ch4_vci.h +++ b/src/mpid/ch4/src/ch4_vci.h @@ -13,12 +13,7 @@ #define MPIDI_Request_get_vci(req) MPIR_REQUEST_POOL(req) #define MPIDI_VCI_INVALID (-1) -/* VCI hashing function (fast path) - * NOTE: The returned vci should always MOD NUMVCIS, where NUMVCIS is - * the number of VCIs determined at init time - * Potentially, we'd like to make it config constants of power of 2 - * TODO: move the MOD here. - */ +/* VCI hashing function (fast path) */ /* For consistent hashing, we may need differentiate between src and dst vci and whether * it is being called from sender side or receiver side (consdier intercomm). We use an @@ -47,7 +42,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr, MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr, int src_rank, int dst_rank, int tag) { - return comm_ptr->seq; + return comm_ptr->seq % MPIDI_global.n_vcis; } #elif MPIDI_CH4_VCI_METHOD == MPICH_VCI__TAG @@ -59,13 +54,15 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr, MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr, int src_rank, int dst_rank, int tag) { + int vci; if (!(flag & 0x1)) { /* src */ - return (tag == MPI_ANY_TAG) ? 0 : ((tag >> 10) & 0x1f); + vci = (tag == MPI_ANY_TAG) ? 0 : ((tag >> 10) & 0x1f); } else { /* dst */ - return (tag == MPI_ANY_TAG) ? 0 : ((tag >> 5) & 0x1f); + vci = (tag == MPI_ANY_TAG) ? 0 : ((tag >> 5) & 0x1f); } + return vci % MPIDI_global.n_vcis; } #elif MPIDI_CH4_VCI_METHOD == MPICH_VCI__IMPLICIT @@ -183,7 +180,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_get_receiver_vci(MPIR_Comm * comm, if (is_vci_restricted_to_zero(comm)) { vci_idx = 0; } else if (use_user_defined_vci) { - vci_idx = comm->hints[MPIR_COMM_HINT_RECEIVER_VCI]; + vci_idx = comm->hints[MPIR_COMM_HINT_RECEIVER_VCI] % MPIDI_global.n_vcis; } else { /* If mpi_any_tag and mpi_any_source can be used for recv, all messages * should be received on a single vci. Otherwise, messages sent from a diff --git a/src/mpid/ch4/src/ch4_wait.h b/src/mpid/ch4/src/ch4_wait.h index bf9c8e244d1..fede099c44d 100644 --- a/src/mpid/ch4/src/ch4_wait.h +++ b/src/mpid/ch4/src/ch4_wait.h @@ -68,7 +68,7 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_set_progress_vci_n(int n, MPIR_Request ** re } if (!found) { state->vci[idx++] = vci; - MPIR_Assert(vci < MPIDI_global.n_vcis); + MPIR_Assert(vci < MPIDI_global.n_total_vcis); } } state->vci_count = idx; diff --git a/src/mpid/ch4/src/mpidig_init.c b/src/mpid/ch4/src/mpidig_init.c index 8290759b9a8..58991296d1e 100644 --- a/src/mpid/ch4/src/mpidig_init.c +++ b/src/mpid/ch4/src/mpidig_init.c @@ -132,7 +132,7 @@ int MPIDIG_am_init(void) int mpi_errno = MPI_SUCCESS; MPIR_FUNC_ENTER; - for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) { + for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) { MPIDI_global.per_vci[vci].posted_list = NULL; MPIDI_global.per_vci[vci].unexp_list = NULL; @@ -236,7 +236,7 @@ void MPIDIG_am_finalize(void) MPIR_FUNC_ENTER; MPIDIU_map_destroy(MPIDI_global.win_map); - for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) { + for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) { MPIDU_genq_private_pool_destroy_unsafe(MPIDI_global.per_vci[vci].request_pool); MPIDU_genq_private_pool_destroy_unsafe(MPIDI_global.per_vci[vci].unexp_pack_buf_pool); }