diff --git a/src/mpi/errhan/errnames.txt b/src/mpi/errhan/errnames.txt
index 1f63d201396..334b7633378 100644
--- a/src/mpi/errhan/errnames.txt
+++ b/src/mpi/errhan/errnames.txt
@@ -950,6 +950,11 @@ is too big (> MPIU_SHMW_GHND_SZ)
 **set_thread_affinity:Failed to set the async thread affinity
 **set_thread_affinity %d:Failed to set the async thread affinity to the logical processor [%d]
 
+## MPIX_Stream
+**ch3nostream:Stream is not supported in ch3.
+**ch4nostream:No streams available. Configure --enable-thread-cs=per-vci and --with-ch4-max-vcis=# to enable streams.
+**outofstream:No streams available. Use MPIR_CVAR_CH4_RESERVE_VCIS to reserve the number of streams can be allocated.
+
 # -----------------------------------------------------------------------------
 # The following names are defined but not used (see the -careful option 
 # for extracterrmsgs) (still to do: move the undefined names here)
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index c2e21435593..9d8e831edb6 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -532,6 +532,9 @@ int MPID_Init(int required, int *provided);
 
 int MPID_InitCompleted( void );
 
+int MPID_Allocate_vci(int *vci);
+int MPID_Deallocate_vci(int vci);
+
 int MPID_Finalize(void);
 
 int MPID_Abort( MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg );
diff --git a/src/mpid/ch3/src/mpid_init.c b/src/mpid/ch3/src/mpid_init.c
index 3218c978ec7..61475aa2c3f 100644
--- a/src/mpid/ch3/src/mpid_init.c
+++ b/src/mpid/ch3/src/mpid_init.c
@@ -265,6 +265,19 @@ int MPID_InitCompleted( void )
     /* --END ERROR HANDLING-- */
 }
 
+int MPID_Allocate_vci(int *vci)
+{
+    int mpi_errno = MPI_SUCCESS;
+    *vci = 0;
+    MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3nostream");
+    return mpi_errno;
+}
+
+int MPID_Deallocate_vci(int vci)
+{
+    MPIR_Assert(0);
+    return MPI_SUCCESS;
+}
 /*
  * Initialize the process group structure by using PMI calls.
  * This routine initializes PMI and uses PMI calls to setup the 
diff --git a/src/mpid/ch4/include/mpidch4.h b/src/mpid/ch4/include/mpidch4.h
index 59983db2cf0..c783a3158e4 100644
--- a/src/mpid/ch4/include/mpidch4.h
+++ b/src/mpid/ch4/include/mpidch4.h
@@ -14,6 +14,8 @@
 
 int MPID_Init(int, int *);
 int MPID_InitCompleted(void);
+int MPID_Allocate_vci(int *vci);
+int MPID_Deallocate_vci(int vci);
 MPL_STATIC_INLINE_PREFIX int MPID_Cancel_recv(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
 MPL_STATIC_INLINE_PREFIX int MPID_Cancel_send(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
 int MPID_Comm_disconnect(MPIR_Comm *);
diff --git a/src/mpid/ch4/netmod/ofi/ofi_impl.h b/src/mpid/ch4/netmod/ofi/ofi_impl.h
index c98b6d3147e..40d68780460 100644
--- a/src/mpid/ch4/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch4/netmod/ofi/ofi_impl.h
@@ -50,7 +50,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_get_vni(int flag, MPIR_Comm * comm_ptr,
 #if MPIDI_CH4_MAX_VCIS == 1
     return 0;
 #else
-    return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_OFI_global.num_vnis;
+    int vni = MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag);
+    MPIR_Assert(vni < MPIDI_OFI_global.num_vnis);
+    return vni;
 #endif
 }
 
diff --git a/src/mpid/ch4/netmod/ofi/ofi_init.c b/src/mpid/ch4/netmod/ofi/ofi_init.c
index 3c4e235502d..1328e533fe5 100644
--- a/src/mpid/ch4/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch4/netmod/ofi/ofi_init.c
@@ -242,18 +242,6 @@ categories :
         minor version of the OFI library used with MPICH. If using this CVAR,
         it is recommended that the user also specifies a specific OFI provider.
 
-    - name        : MPIR_CVAR_CH4_OFI_MAX_VNIS
-      category    : CH4_OFI
-      type        : int
-      default     : 0
-      class       : none
-      verbosity   : MPI_T_VERBOSITY_USER_BASIC
-      scope       : MPI_T_SCOPE_LOCAL
-      description : >-
-        If set to positive, this CVAR specifies the maximum number of CH4 VNIs
-        that OFI netmod exposes. If set to 0 (the default) or bigger than
-        MPIR_CVAR_CH4_NUM_VCIS, the number of exposed VNIs is set to MPIR_CVAR_CH4_NUM_VCIS.
-
     - name        : MPIR_CVAR_CH4_OFI_MAX_RMA_SEP_CTX
       category    : CH4_OFI
       type        : int
@@ -613,22 +601,10 @@ int MPIDI_OFI_init_local(int *tag_bits)
     /* Create transport level communication contexts.                           */
     /* ------------------------------------------------------------------------ */
 
-    int num_vnis = 1;
-    if (MPIR_CVAR_CH4_OFI_MAX_VNIS == 0 || MPIR_CVAR_CH4_OFI_MAX_VNIS > MPIDI_global.n_vcis) {
-        num_vnis = MPIDI_global.n_vcis;
-    } else {
-        num_vnis = MPIR_CVAR_CH4_OFI_MAX_VNIS;
-    }
-
-    /* TODO: update num_vnis according to provider capabilities, such as
-     * prov_use->domain_attr->{tx,rx}_ctx_cnt
+    /* TODO: check provider capabilities, such as prov_use->domain_attr->{tx,rx}_ctx_cnt,
+     *       abort if we can't support the requested number of vnis.
      */
-    if (num_vnis > MPIDI_OFI_MAX_VNIS) {
-        num_vnis = MPIDI_OFI_MAX_VNIS;
-    }
-    /* for best performance, we ensure 1-to-1 vci/vni mapping. ref: MPIDI_OFI_vci_to_vni */
-    /* TODO: allow less num_vnis. Option 1. runtime MOD; 2. override MPIDI_global.n_vcis */
-    MPIR_Assert(num_vnis == MPIDI_global.n_vcis);
+    int num_vnis = MPIDI_global.n_total_vcis;
 
     /* Multiple vni without using domain require MPIDI_OFI_ENABLE_SCALABLE_ENDPOINTS */
 #ifndef MPIDI_OFI_VNI_USE_DOMAIN
diff --git a/src/mpid/ch4/netmod/ofi/ofi_win.c b/src/mpid/ch4/netmod/ofi/ofi_win.c
index 61b1b3afad1..9592ab02f14 100644
--- a/src/mpid/ch4/netmod/ofi/ofi_win.c
+++ b/src/mpid/ch4/netmod/ofi/ofi_win.c
@@ -310,7 +310,7 @@ static int win_set_per_win_sync(MPIR_Win * win)
 
 static void win_init_am(MPIR_Win * win)
 {
-    MPIDI_WIN(win, am_vci) %= MPIDI_OFI_global.num_vnis;
+    MPIR_Assert(MPIDI_WIN(win, am_vci) < MPIDI_OFI_global.num_vnis);
 }
 
 /*
diff --git a/src/mpid/ch4/netmod/ucx/ucx_am.h b/src/mpid/ch4/netmod/ucx/ucx_am.h
index 35818ee5f87..5672d9c62e0 100644
--- a/src/mpid/ch4/netmod/ucx/ucx_am.h
+++ b/src/mpid/ch4/netmod/ucx/ucx_am.h
@@ -54,8 +54,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_isend(int rank,
 
     MPIR_FUNC_ENTER;
 
-    int src_vni = src_vci % MPIDI_UCX_global.num_vnis;
-    int dst_vni = dst_vci % MPIDI_UCX_global.num_vnis;
+    int src_vni = src_vci;
+    int dst_vni = dst_vci;
+    MPIR_Assert(src_vni < MPIDI_UCX_global.num_vnis);
+    MPIR_Assert(dst_vni < MPIDI_UCX_global.num_vnis);
     ep = MPIDI_UCX_COMM_TO_EP(comm, rank, src_vni, dst_vni);
 
     int dt_contig;
@@ -186,8 +188,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_send_hdr(int rank,
 
     MPIR_FUNC_ENTER;
 
-    int src_vni = src_vci % MPIDI_UCX_global.num_vnis;
-    int dst_vni = dst_vci % MPIDI_UCX_global.num_vnis;
+    int src_vni = src_vci;
+    int dst_vni = dst_vci;
+    MPIR_Assert(src_vni < MPIDI_UCX_global.num_vnis);
+    MPIR_Assert(dst_vni < MPIDI_UCX_global.num_vnis);
     ep = MPIDI_UCX_COMM_TO_EP(comm, rank, src_vni, dst_vni);
 
     /* initialize our portion of the hdr */
diff --git a/src/mpid/ch4/netmod/ucx/ucx_impl.h b/src/mpid/ch4/netmod/ucx/ucx_impl.h
index 471ff9bbcc2..eaee42e02bb 100644
--- a/src/mpid/ch4/netmod/ucx/ucx_impl.h
+++ b/src/mpid/ch4/netmod/ucx/ucx_impl.h
@@ -120,7 +120,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_UCX_vci_to_vni(int vci)
 MPL_STATIC_INLINE_PREFIX int MPIDI_UCX_get_vni(int flag, MPIR_Comm * comm_ptr,
                                                int src_rank, int dst_rank, int tag)
 {
-    return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_UCX_global.num_vnis;
+    int vni;
+    return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag);
+    MPIR_Assert(vni < MPIDI_UCX_global.num_vnis);
+    return vni;
 }
 
 /* for rma, we need ensure rkey is consistent with the per-vni ep,
diff --git a/src/mpid/ch4/netmod/ucx/ucx_init.c b/src/mpid/ch4/netmod/ucx/ucx_init.c
index 63113575ef2..771262c6b19 100644
--- a/src/mpid/ch4/netmod/ucx/ucx_init.c
+++ b/src/mpid/ch4/netmod/ucx/ucx_init.c
@@ -8,29 +8,6 @@
 #include "mpidu_bc.h"
 #include <ucp/api/ucp.h>
 
-/*
-=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
-
-categories :
-    - name : CH4_UCX
-      description : A category for CH4 UCX netmod variables
-
-cvars:
-    - name        : MPIR_CVAR_CH4_UCX_MAX_VNIS
-      category    : CH4_UCX
-      type        : int
-      default     : 0
-      class       : none
-      verbosity   : MPI_T_VERBOSITY_USER_BASIC
-      scope       : MPI_T_SCOPE_LOCAL
-      description : >-
-        If set to positive, this CVAR specifies the maximum number of CH4 VNIs
-        that UCX netmod exposes. If set to 0 (the default) or bigger than
-        MPIR_CVAR_CH4_NUM_VCIS, the number of exposed VNIs is set to MPIR_CVAR_CH4_NUM_VCIS.
-
-=== END_MPI_T_CVAR_INFO_BLOCK ===
-*/
-
 static void request_init_callback(void *request);
 
 static void request_init_callback(void *request)
@@ -43,18 +20,8 @@ static void request_init_callback(void *request)
 
 static void init_num_vnis(void)
 {
-    int num_vnis = 1;
-    if (MPIR_CVAR_CH4_UCX_MAX_VNIS == 0 || MPIR_CVAR_CH4_UCX_MAX_VNIS > MPIDI_global.n_vcis) {
-        num_vnis = MPIDI_global.n_vcis;
-    } else {
-        num_vnis = MPIR_CVAR_CH4_UCX_MAX_VNIS;
-    }
-
-    /* for best performance, we ensure 1-to-1 vci/vni mapping. ref: MPIDI_OFI_vci_to_vni */
-    /* TODO: allow less num_vnis. Option 1. runtime MOD; 2. override MPIDI_global.n_vcis */
-    MPIR_Assert(num_vnis == MPIDI_global.n_vcis);
-
-    MPIDI_UCX_global.num_vnis = num_vnis;
+    /* TODO: check capabilities, abort if we can't support the requested number of vnis. */
+    MPIDI_UCX_global.num_vnis = MPIDI_global.n_total_vcis;
 }
 
 static int init_worker(int vni)
diff --git a/src/mpid/ch4/netmod/ucx/ucx_win.c b/src/mpid/ch4/netmod/ucx/ucx_win.c
index d81cef1e82d..f6576f8a6be 100644
--- a/src/mpid/ch4/netmod/ucx/ucx_win.c
+++ b/src/mpid/ch4/netmod/ucx/ucx_win.c
@@ -176,7 +176,7 @@ static int win_init(MPIR_Win * win)
     int mpi_errno = MPI_SUCCESS;
     MPIR_FUNC_ENTER;
 
-    MPIDI_WIN(win, am_vci) %= MPIDI_UCX_global.num_vnis;
+    MPIR_Assert(MPIDI_WIN(win, am_vci) < MPIDI_UCX_global.num_vnis);
 
     memset(&MPIDI_UCX_WIN(win), 0, sizeof(MPIDI_UCX_win_t));
 
diff --git a/src/mpid/ch4/shm/posix/posix_init.c b/src/mpid/ch4/shm/posix/posix_init.c
index 43fa4c8c5c1..ba6cb2853ef 100644
--- a/src/mpid/ch4/shm/posix/posix_init.c
+++ b/src/mpid/ch4/shm/posix/posix_init.c
@@ -150,7 +150,7 @@ int MPIDI_POSIX_init_local(int *tag_bits /* unused */)
 
     MPIDI_POSIX_global.local_rank_0 = local_rank_0;
 
-    MPIDI_POSIX_global.num_vsis = MPIDI_global.n_vcis;
+    MPIDI_POSIX_global.num_vsis = MPIDI_global.n_total_vcis;
     /* This is used to track messages that the eager submodule was not ready to send. */
     for (int vsi = 0; vsi < MPIDI_CH4_MAX_VCIS; vsi++) {
         mpi_errno = MPIDU_genq_private_pool_create_unsafe(MPIDI_POSIX_AM_HDR_POOL_CELL_SIZE,
diff --git a/src/mpid/ch4/src/ch4_init.c b/src/mpid/ch4/src/ch4_init.c
index c54c2226301..15acbd51d37 100644
--- a/src/mpid/ch4/src/ch4_init.c
+++ b/src/mpid/ch4/src/ch4_init.c
@@ -82,7 +82,17 @@
       verbosity   : MPI_T_VERBOSITY_USER_BASIC
       scope       : MPI_T_SCOPE_LOCAL
       description : >-
-        Sets the number of VCIs that user needs (should be a subset of MPIDI_CH4_MAX_VCIS).
+        Sets the number of VCIs to be implicitly used (should be a subset of MPIDI_CH4_MAX_VCIS).
+
+    - name        : MPIR_CVAR_CH4_RESERVE_VCIS
+      category    : CH4
+      type        : int
+      default     : 0
+      class       : none
+      verbosity   : MPI_T_VERBOSITY_USER_BASIC
+      scope       : MPI_T_SCOPE_LOCAL
+      description : >-
+        Sets the number of VCIs that user can explicitly allocate (should be a subset of MPIDI_CH4_MAX_VCIS).
 
     - name        : MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
       category    : COLLECTIVE
@@ -404,16 +414,17 @@ int MPID_Init(int requested, int *provided)
 
     /* Initialize multiple VCIs */
     /* TODO: add checks to ensure MPIDI_vci_t is padded or aligned to MPL_CACHELINE_SIZE */
-    MPIDI_global.n_vcis = 1;
-    if (MPIR_CVAR_CH4_NUM_VCIS > 1) {
-        MPIDI_global.n_vcis = MPIR_CVAR_CH4_NUM_VCIS;
-        /* There are configured maxes that we need observe. */
-        /* TODO: check them at configure time to avoid discrepancy */
-        MPIR_Assert(MPIDI_global.n_vcis <= MPIDI_CH4_MAX_VCIS);
-        MPIR_Assert(MPIDI_global.n_vcis <= MPIR_REQUEST_NUM_POOLS);
-    }
+    MPIR_Assert(MPIR_CVAR_CH4_NUM_VCIS >= 1);   /* number of vcis used in implicit vci hashing */
+    MPIR_Assert(MPIR_CVAR_CH4_RESERVE_VCIS >= 0);       /* maximum number of vcis can be reserved */
+
+    MPIDI_global.n_vcis = MPIR_CVAR_CH4_NUM_VCIS;
+    MPIDI_global.n_total_vcis = MPIDI_global.n_vcis + MPIR_CVAR_CH4_RESERVE_VCIS;
+    MPIDI_global.n_reserved_vcis = 0;
+
+    MPIR_Assert(MPIDI_global.n_total_vcis <= MPIDI_CH4_MAX_VCIS);
+    MPIR_Assert(MPIDI_global.n_total_vcis <= MPIR_REQUEST_NUM_POOLS);
 
-    for (int i = 0; i < MPIDI_global.n_vcis; i++) {
+    for (int i = 0; i < MPIDI_global.n_total_vcis; i++) {
         int err;
         MPID_Thread_mutex_create(&MPIDI_VCI(i).lock, &err);
         MPIR_Assert(err == 0);
@@ -507,6 +518,40 @@ int MPID_InitCompleted(void)
     goto fn_exit;
 }
 
+int MPID_Allocate_vci(int *vci)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    *vci = 0;
+#if MPIDI_CH4_MAX_VCIS == 1
+    MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch4nostream");
+#else
+
+    if (MPIDI_global.n_vcis + MPIDI_global.n_reserved_vcis >= MPIDI_global.n_total_vcis) {
+        MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**outofstream");
+    } else {
+        MPIDI_global.n_reserved_vcis++;
+        for (int i = MPIDI_global.n_vcis; i < MPIDI_global.n_total_vcis; i++) {
+            if (!MPIDI_VCI(i).allocated) {
+                MPIDI_VCI(i).allocated = true;
+                *vci = i;
+                break;
+            }
+        }
+    }
+#endif
+    return mpi_errno;
+}
+
+int MPID_Deallocate_vci(int vci)
+{
+    MPIR_Assert(vci < MPIDI_global.n_total_vcis && vci >= MPIDI_global.n_vcis);
+    MPIR_Assert(MPIDI_VCI(vci).allocated);
+    MPIDI_VCI(vci).allocated = false;
+    MPIDI_global.n_reserved_vcis--;
+    return MPI_SUCCESS;
+}
+
 int MPID_Finalize(void)
 {
     int mpi_errno;
@@ -539,7 +584,7 @@ int MPID_Finalize(void)
         MPIR_Assert(err == 0);
     }
 
-    for (int i = 0; i < MPIDI_global.n_vcis; i++) {
+    for (int i = 0; i < MPIDI_global.n_total_vcis; i++) {
         int err;
         MPID_Thread_mutex_destroy(&MPIDI_VCI(i).lock, &err);
         MPIR_Assert(err == 0);
diff --git a/src/mpid/ch4/src/ch4_progress.h b/src/mpid/ch4/src/ch4_progress.h
index 74bf74d2451..5ba74f8c59b 100644
--- a/src/mpid/ch4/src/ch4_progress.h
+++ b/src/mpid/ch4/src/ch4_progress.h
@@ -47,7 +47,8 @@ extern int global_vci_poll_count;
 
 MPL_STATIC_INLINE_PREFIX int MPIDI_do_global_progress(void)
 {
-    if (MPIDI_global.n_vcis == 1 || !MPIDI_global.is_initialized || !MPIR_CVAR_CH4_GLOBAL_PROGRESS) {
+    if (MPIDI_global.n_total_vcis == 1 || !MPIDI_global.is_initialized ||
+        !MPIR_CVAR_CH4_GLOBAL_PROGRESS) {
         return 0;
     } else {
         global_vci_poll_count++;
@@ -153,7 +154,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_progress_test(MPID_Progress_state * state, in
 #else
     /* multiple vci */
     if (MPIDI_do_global_progress()) {
-        for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) {
+        for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) {
             MPIDI_PROGRESS(vci);
             if (wait) {
                 MPIDI_check_progress_made_vci(state, vci);
@@ -201,10 +202,10 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_progress_state_init(MPID_Progress_state * st
         state->vci_count = 1;
     } else {
         /* global progress by default */
-        for (int i = 0; i < MPIDI_global.n_vcis; i++) {
+        for (int i = 0; i < MPIDI_global.n_total_vcis; i++) {
             state->vci[i] = i;
         }
-        state->vci_count = MPIDI_global.n_vcis;
+        state->vci_count = MPIDI_global.n_total_vcis;
     }
 }
 
@@ -215,8 +216,9 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_progress_state_init_count(MPID_Progress_stat
 #if MPIDI_CH4_MAX_VCIS == 1
     state->progress_counts[0] = MPL_atomic_relaxed_load_int(&MPIDI_VCI(0).progress_count);
 #else
-    for (int i = 0; i < MPIDI_global.n_vcis; i++) {
-        state->progress_counts[i] = MPL_atomic_relaxed_load_int(&MPIDI_VCI(i).progress_count);
+    for (int i = 0; i < state->vci_count; i++) {
+        state->progress_counts[i] =
+            MPL_atomic_relaxed_load_int(&MPIDI_VCI(state->vci[i]).progress_count);
     }
 #endif
 }
diff --git a/src/mpid/ch4/src/ch4_types.h b/src/mpid/ch4/src/ch4_types.h
index e12430eaa2e..45d47795090 100644
--- a/src/mpid/ch4/src/ch4_types.h
+++ b/src/mpid/ch4/src/ch4_types.h
@@ -254,6 +254,7 @@ typedef struct MPIDI_per_vci {
     MPL_atomic_uint64_t exp_seq_no;
     MPL_atomic_uint64_t nxt_seq_no;
 
+    bool allocated;
     char pad MPL_ATTR_ALIGNED(MPL_CACHELINE_SIZE);
 } MPIDI_per_vci_t;
 
@@ -279,7 +280,9 @@ typedef struct MPIDI_CH4_Global_t {
     int my_sigusr1_count;
 #endif
 
-    int n_vcis;
+    int n_vcis;                 /* num of vcis used for implicit hashing */
+    int n_reserved_vcis;        /* num of reserved vcis */
+    int n_total_vcis;           /* total num of vcis, must > n_vcis + n_reserved_vcis */
     MPIDI_per_vci_t per_vci[MPIDI_CH4_MAX_VCIS];
 
 #if defined(MPIDI_CH4_USE_WORK_QUEUES)
diff --git a/src/mpid/ch4/src/ch4_vci.h b/src/mpid/ch4/src/ch4_vci.h
index 0d8a4258b50..646a9482243 100644
--- a/src/mpid/ch4/src/ch4_vci.h
+++ b/src/mpid/ch4/src/ch4_vci.h
@@ -13,12 +13,7 @@
 #define MPIDI_Request_get_vci(req) MPIR_REQUEST_POOL(req)
 #define MPIDI_VCI_INVALID (-1)
 
-/* VCI hashing function (fast path)
- * NOTE: The returned vci should always MOD NUMVCIS, where NUMVCIS is
- *       the number of VCIs determined at init time
- *       Potentially, we'd like to make it config constants of power of 2
- * TODO: move the MOD here.
- */
+/* VCI hashing function (fast path) */
 
 /* For consistent hashing, we may need differentiate between src and dst vci and whether
  * it is being called from sender side or receiver side (consdier intercomm). We use an
@@ -47,7 +42,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr,
 MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr,
                                            int src_rank, int dst_rank, int tag)
 {
-    return comm_ptr->seq;
+    return comm_ptr->seq % MPIDI_global.n_vcis;
 }
 
 #elif MPIDI_CH4_VCI_METHOD == MPICH_VCI__TAG
@@ -59,13 +54,15 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr,
 MPL_STATIC_INLINE_PREFIX int MPIDI_get_vci(int flag, MPIR_Comm * comm_ptr,
                                            int src_rank, int dst_rank, int tag)
 {
+    int vci;
     if (!(flag & 0x1)) {
         /* src */
-        return (tag == MPI_ANY_TAG) ? 0 : ((tag >> 10) & 0x1f);
+        vci = (tag == MPI_ANY_TAG) ? 0 : ((tag >> 10) & 0x1f);
     } else {
         /* dst */
-        return (tag == MPI_ANY_TAG) ? 0 : ((tag >> 5) & 0x1f);
+        vci = (tag == MPI_ANY_TAG) ? 0 : ((tag >> 5) & 0x1f);
     }
+    return vci % MPIDI_global.n_vcis;
 }
 
 #elif MPIDI_CH4_VCI_METHOD == MPICH_VCI__IMPLICIT
@@ -183,7 +180,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_get_receiver_vci(MPIR_Comm * comm,
     if (is_vci_restricted_to_zero(comm)) {
         vci_idx = 0;
     } else if (use_user_defined_vci) {
-        vci_idx = comm->hints[MPIR_COMM_HINT_RECEIVER_VCI];
+        vci_idx = comm->hints[MPIR_COMM_HINT_RECEIVER_VCI] % MPIDI_global.n_vcis;
     } else {
         /* If mpi_any_tag and mpi_any_source can be used for recv, all messages
          * should be received on a single vci. Otherwise, messages sent from a
diff --git a/src/mpid/ch4/src/ch4_wait.h b/src/mpid/ch4/src/ch4_wait.h
index bf9c8e244d1..fede099c44d 100644
--- a/src/mpid/ch4/src/ch4_wait.h
+++ b/src/mpid/ch4/src/ch4_wait.h
@@ -68,7 +68,7 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_set_progress_vci_n(int n, MPIR_Request ** re
         }
         if (!found) {
             state->vci[idx++] = vci;
-            MPIR_Assert(vci < MPIDI_global.n_vcis);
+            MPIR_Assert(vci < MPIDI_global.n_total_vcis);
         }
     }
     state->vci_count = idx;
diff --git a/src/mpid/ch4/src/mpidig_init.c b/src/mpid/ch4/src/mpidig_init.c
index 8290759b9a8..58991296d1e 100644
--- a/src/mpid/ch4/src/mpidig_init.c
+++ b/src/mpid/ch4/src/mpidig_init.c
@@ -132,7 +132,7 @@ int MPIDIG_am_init(void)
     int mpi_errno = MPI_SUCCESS;
     MPIR_FUNC_ENTER;
 
-    for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) {
+    for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) {
         MPIDI_global.per_vci[vci].posted_list = NULL;
         MPIDI_global.per_vci[vci].unexp_list = NULL;
 
@@ -236,7 +236,7 @@ void MPIDIG_am_finalize(void)
     MPIR_FUNC_ENTER;
 
     MPIDIU_map_destroy(MPIDI_global.win_map);
-    for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) {
+    for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) {
         MPIDU_genq_private_pool_destroy_unsafe(MPIDI_global.per_vci[vci].request_pool);
         MPIDU_genq_private_pool_destroy_unsafe(MPIDI_global.per_vci[vci].unexp_pack_buf_pool);
     }