Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pt2pt: add MPID_Allocate_vci #5904

Merged
merged 5 commits into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/mpi/errhan/errnames.txt
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,11 @@ is too big (> MPIU_SHMW_GHND_SZ)
**set_thread_affinity:Failed to set the async thread affinity
**set_thread_affinity %d:Failed to set the async thread affinity to the logical processor [%d]

## MPIX_Stream
**ch3nostream:Stream is not supported in ch3.
**ch4nostream:No streams available. Configure --enable-thread-cs=per-vci and --with-ch4-max-vcis=# to enable streams.
**outofstream:No streams available. Use MPIR_CVAR_CH4_RESERVE_VCIS to reserve the number of streams can be allocated.
Comment on lines +954 to +956
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: its not obvious that vci is what is meant by "stream" in these error messages

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you suggest?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the issue is that this error message is meant for callers of MPIX_Stream_create? In that case it makes sense, its just odd to try to allocate X and get an error message that there are no more Y.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Right, the message is meant for MPIX_Stream_create. Only the lower layer has the details on what is wrong, so we have to craft the message there.


# -----------------------------------------------------------------------------
# The following names are defined but not used (see the -careful option
# for extracterrmsgs) (still to do: move the undefined names here)
3 changes: 3 additions & 0 deletions src/mpid/ch3/include/mpidpre.h
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,9 @@ int MPID_Init(int required, int *provided);

int MPID_InitCompleted( void );

int MPID_Allocate_vci(int *vci);
int MPID_Deallocate_vci(int vci);

int MPID_Finalize(void);

int MPID_Abort( MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg );
Expand Down
13 changes: 13 additions & 0 deletions src/mpid/ch3/src/mpid_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,19 @@ int MPID_InitCompleted( void )
/* --END ERROR HANDLING-- */
}

int MPID_Allocate_vci(int *vci)
{
int mpi_errno = MPI_SUCCESS;
*vci = 0;
MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3nostream");
return mpi_errno;
}

int MPID_Deallocate_vci(int vci)
{
MPIR_Assert(0);
return MPI_SUCCESS;
}
/*
* Initialize the process group structure by using PMI calls.
* This routine initializes PMI and uses PMI calls to setup the
Expand Down
2 changes: 2 additions & 0 deletions src/mpid/ch4/include/mpidch4.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

int MPID_Init(int, int *);
int MPID_InitCompleted(void);
int MPID_Allocate_vci(int *vci);
int MPID_Deallocate_vci(int vci);
MPL_STATIC_INLINE_PREFIX int MPID_Cancel_recv(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Cancel_send(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
int MPID_Comm_disconnect(MPIR_Comm *);
Expand Down
4 changes: 3 additions & 1 deletion src/mpid/ch4/netmod/ofi/ofi_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_get_vni(int flag, MPIR_Comm * comm_ptr,
#if MPIDI_CH4_MAX_VCIS == 1
return 0;
#else
return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_OFI_global.num_vnis;
int vni = MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag);
MPIR_Assert(vni < MPIDI_OFI_global.num_vnis);
return vni;
#endif
}

Expand Down
30 changes: 3 additions & 27 deletions src/mpid/ch4/netmod/ofi/ofi_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,18 +242,6 @@ categories :
minor version of the OFI library used with MPICH. If using this CVAR,
it is recommended that the user also specifies a specific OFI provider.

- name : MPIR_CVAR_CH4_OFI_MAX_VNIS
category : CH4_OFI
type : int
default : 0
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_LOCAL
description : >-
If set to positive, this CVAR specifies the maximum number of CH4 VNIs
that OFI netmod exposes. If set to 0 (the default) or bigger than
MPIR_CVAR_CH4_NUM_VCIS, the number of exposed VNIs is set to MPIR_CVAR_CH4_NUM_VCIS.

- name : MPIR_CVAR_CH4_OFI_MAX_RMA_SEP_CTX
category : CH4_OFI
type : int
Expand Down Expand Up @@ -613,22 +601,10 @@ int MPIDI_OFI_init_local(int *tag_bits)
/* Create transport level communication contexts. */
/* ------------------------------------------------------------------------ */

int num_vnis = 1;
if (MPIR_CVAR_CH4_OFI_MAX_VNIS == 0 || MPIR_CVAR_CH4_OFI_MAX_VNIS > MPIDI_global.n_vcis) {
num_vnis = MPIDI_global.n_vcis;
} else {
num_vnis = MPIR_CVAR_CH4_OFI_MAX_VNIS;
}

/* TODO: update num_vnis according to provider capabilities, such as
* prov_use->domain_attr->{tx,rx}_ctx_cnt
/* TODO: check provider capabilities, such as prov_use->domain_attr->{tx,rx}_ctx_cnt,
* abort if we can't support the requested number of vnis.
*/
if (num_vnis > MPIDI_OFI_MAX_VNIS) {
num_vnis = MPIDI_OFI_MAX_VNIS;
}
/* for best performance, we ensure 1-to-1 vci/vni mapping. ref: MPIDI_OFI_vci_to_vni */
/* TODO: allow less num_vnis. Option 1. runtime MOD; 2. override MPIDI_global.n_vcis */
MPIR_Assert(num_vnis == MPIDI_global.n_vcis);
int num_vnis = MPIDI_global.n_total_vcis;

/* Multiple vni without using domain require MPIDI_OFI_ENABLE_SCALABLE_ENDPOINTS */
#ifndef MPIDI_OFI_VNI_USE_DOMAIN
Expand Down
2 changes: 1 addition & 1 deletion src/mpid/ch4/netmod/ofi/ofi_win.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ static int win_set_per_win_sync(MPIR_Win * win)

static void win_init_am(MPIR_Win * win)
{
MPIDI_WIN(win, am_vci) %= MPIDI_OFI_global.num_vnis;
MPIR_Assert(MPIDI_WIN(win, am_vci) < MPIDI_OFI_global.num_vnis);
}

/*
Expand Down
12 changes: 8 additions & 4 deletions src/mpid/ch4/netmod/ucx/ucx_am.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_isend(int rank,

MPIR_FUNC_ENTER;

int src_vni = src_vci % MPIDI_UCX_global.num_vnis;
int dst_vni = dst_vci % MPIDI_UCX_global.num_vnis;
int src_vni = src_vci;
int dst_vni = dst_vci;
MPIR_Assert(src_vni < MPIDI_UCX_global.num_vnis);
MPIR_Assert(dst_vni < MPIDI_UCX_global.num_vnis);
ep = MPIDI_UCX_COMM_TO_EP(comm, rank, src_vni, dst_vni);

int dt_contig;
Expand Down Expand Up @@ -186,8 +188,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_send_hdr(int rank,

MPIR_FUNC_ENTER;

int src_vni = src_vci % MPIDI_UCX_global.num_vnis;
int dst_vni = dst_vci % MPIDI_UCX_global.num_vnis;
int src_vni = src_vci;
int dst_vni = dst_vci;
MPIR_Assert(src_vni < MPIDI_UCX_global.num_vnis);
MPIR_Assert(dst_vni < MPIDI_UCX_global.num_vnis);
ep = MPIDI_UCX_COMM_TO_EP(comm, rank, src_vni, dst_vni);

/* initialize our portion of the hdr */
Expand Down
5 changes: 4 additions & 1 deletion src/mpid/ch4/netmod/ucx/ucx_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_UCX_vci_to_vni(int vci)
MPL_STATIC_INLINE_PREFIX int MPIDI_UCX_get_vni(int flag, MPIR_Comm * comm_ptr,
int src_rank, int dst_rank, int tag)
{
return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_UCX_global.num_vnis;
int vni;
return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag);
MPIR_Assert(vni < MPIDI_UCX_global.num_vnis);
return vni;
}

/* for rma, we need ensure rkey is consistent with the per-vni ep,
Expand Down
37 changes: 2 additions & 35 deletions src/mpid/ch4/netmod/ucx/ucx_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,6 @@
#include "mpidu_bc.h"
#include <ucp/api/ucp.h>

/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===

categories :
- name : CH4_UCX
description : A category for CH4 UCX netmod variables

cvars:
- name : MPIR_CVAR_CH4_UCX_MAX_VNIS
category : CH4_UCX
type : int
default : 0
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_LOCAL
description : >-
If set to positive, this CVAR specifies the maximum number of CH4 VNIs
that UCX netmod exposes. If set to 0 (the default) or bigger than
MPIR_CVAR_CH4_NUM_VCIS, the number of exposed VNIs is set to MPIR_CVAR_CH4_NUM_VCIS.

=== END_MPI_T_CVAR_INFO_BLOCK ===
*/

static void request_init_callback(void *request);

static void request_init_callback(void *request)
Expand All @@ -43,18 +20,8 @@ static void request_init_callback(void *request)

static void init_num_vnis(void)
{
int num_vnis = 1;
if (MPIR_CVAR_CH4_UCX_MAX_VNIS == 0 || MPIR_CVAR_CH4_UCX_MAX_VNIS > MPIDI_global.n_vcis) {
num_vnis = MPIDI_global.n_vcis;
} else {
num_vnis = MPIR_CVAR_CH4_UCX_MAX_VNIS;
}

/* for best performance, we ensure 1-to-1 vci/vni mapping. ref: MPIDI_OFI_vci_to_vni */
/* TODO: allow less num_vnis. Option 1. runtime MOD; 2. override MPIDI_global.n_vcis */
MPIR_Assert(num_vnis == MPIDI_global.n_vcis);

MPIDI_UCX_global.num_vnis = num_vnis;
/* TODO: check capabilities, abort if we can't support the requested number of vnis. */
MPIDI_UCX_global.num_vnis = MPIDI_global.n_total_vcis;
}

static int init_worker(int vni)
Expand Down
2 changes: 1 addition & 1 deletion src/mpid/ch4/netmod/ucx/ucx_win.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ static int win_init(MPIR_Win * win)
int mpi_errno = MPI_SUCCESS;
MPIR_FUNC_ENTER;

MPIDI_WIN(win, am_vci) %= MPIDI_UCX_global.num_vnis;
MPIR_Assert(MPIDI_WIN(win, am_vci) < MPIDI_UCX_global.num_vnis);

memset(&MPIDI_UCX_WIN(win), 0, sizeof(MPIDI_UCX_win_t));

Expand Down
2 changes: 1 addition & 1 deletion src/mpid/ch4/shm/posix/posix_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int MPIDI_POSIX_init_local(int *tag_bits /* unused */)

MPIDI_POSIX_global.local_rank_0 = local_rank_0;

MPIDI_POSIX_global.num_vsis = MPIDI_global.n_vcis;
MPIDI_POSIX_global.num_vsis = MPIDI_global.n_total_vcis;
/* This is used to track messages that the eager submodule was not ready to send. */
for (int vsi = 0; vsi < MPIDI_CH4_MAX_VCIS; vsi++) {
mpi_errno = MPIDU_genq_private_pool_create_unsafe(MPIDI_POSIX_AM_HDR_POOL_CELL_SIZE,
Expand Down
67 changes: 56 additions & 11 deletions src/mpid/ch4/src/ch4_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,17 @@
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_LOCAL
description : >-
Sets the number of VCIs that user needs (should be a subset of MPIDI_CH4_MAX_VCIS).
Sets the number of VCIs to be implicitly used (should be a subset of MPIDI_CH4_MAX_VCIS).

- name : MPIR_CVAR_CH4_RESERVE_VCIS
category : CH4
type : int
default : 0
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_LOCAL
description : >-
Sets the number of VCIs that user can explicitly allocate (should be a subset of MPIDI_CH4_MAX_VCIS).

- name : MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
category : COLLECTIVE
Expand Down Expand Up @@ -404,16 +414,17 @@ int MPID_Init(int requested, int *provided)

/* Initialize multiple VCIs */
/* TODO: add checks to ensure MPIDI_vci_t is padded or aligned to MPL_CACHELINE_SIZE */
MPIDI_global.n_vcis = 1;
if (MPIR_CVAR_CH4_NUM_VCIS > 1) {
MPIDI_global.n_vcis = MPIR_CVAR_CH4_NUM_VCIS;
/* There are configured maxes that we need observe. */
/* TODO: check them at configure time to avoid discrepancy */
MPIR_Assert(MPIDI_global.n_vcis <= MPIDI_CH4_MAX_VCIS);
MPIR_Assert(MPIDI_global.n_vcis <= MPIR_REQUEST_NUM_POOLS);
}
MPIR_Assert(MPIR_CVAR_CH4_NUM_VCIS >= 1); /* number of vcis used in implicit vci hashing */
MPIR_Assert(MPIR_CVAR_CH4_RESERVE_VCIS >= 0); /* maximum number of vcis can be reserved */

MPIDI_global.n_vcis = MPIR_CVAR_CH4_NUM_VCIS;
MPIDI_global.n_total_vcis = MPIDI_global.n_vcis + MPIR_CVAR_CH4_RESERVE_VCIS;
MPIDI_global.n_reserved_vcis = 0;

MPIR_Assert(MPIDI_global.n_total_vcis <= MPIDI_CH4_MAX_VCIS);
MPIR_Assert(MPIDI_global.n_total_vcis <= MPIR_REQUEST_NUM_POOLS);

for (int i = 0; i < MPIDI_global.n_vcis; i++) {
for (int i = 0; i < MPIDI_global.n_total_vcis; i++) {
int err;
MPID_Thread_mutex_create(&MPIDI_VCI(i).lock, &err);
MPIR_Assert(err == 0);
Expand Down Expand Up @@ -507,6 +518,40 @@ int MPID_InitCompleted(void)
goto fn_exit;
}

int MPID_Allocate_vci(int *vci)
{
int mpi_errno = MPI_SUCCESS;

*vci = 0;
#if MPIDI_CH4_MAX_VCIS == 1
MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch4nostream");
#else

if (MPIDI_global.n_vcis + MPIDI_global.n_reserved_vcis >= MPIDI_global.n_total_vcis) {
MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**outofstream");
} else {
MPIDI_global.n_reserved_vcis++;
for (int i = MPIDI_global.n_vcis; i < MPIDI_global.n_total_vcis; i++) {
if (!MPIDI_VCI(i).allocated) {
MPIDI_VCI(i).allocated = true;
*vci = i;
break;
}
}
}
#endif
return mpi_errno;
}

int MPID_Deallocate_vci(int vci)
{
MPIR_Assert(vci < MPIDI_global.n_total_vcis && vci >= MPIDI_global.n_vcis);
MPIR_Assert(MPIDI_VCI(vci).allocated);
MPIDI_VCI(vci).allocated = false;
MPIDI_global.n_reserved_vcis--;
return MPI_SUCCESS;
}

int MPID_Finalize(void)
{
int mpi_errno;
Expand Down Expand Up @@ -539,7 +584,7 @@ int MPID_Finalize(void)
MPIR_Assert(err == 0);
}

for (int i = 0; i < MPIDI_global.n_vcis; i++) {
for (int i = 0; i < MPIDI_global.n_total_vcis; i++) {
int err;
MPID_Thread_mutex_destroy(&MPIDI_VCI(i).lock, &err);
MPIR_Assert(err == 0);
Expand Down
14 changes: 8 additions & 6 deletions src/mpid/ch4/src/ch4_progress.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ extern int global_vci_poll_count;

MPL_STATIC_INLINE_PREFIX int MPIDI_do_global_progress(void)
{
if (MPIDI_global.n_vcis == 1 || !MPIDI_global.is_initialized || !MPIR_CVAR_CH4_GLOBAL_PROGRESS) {
if (MPIDI_global.n_total_vcis == 1 || !MPIDI_global.is_initialized ||
!MPIR_CVAR_CH4_GLOBAL_PROGRESS) {
return 0;
} else {
global_vci_poll_count++;
Expand Down Expand Up @@ -153,7 +154,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_progress_test(MPID_Progress_state * state, in
#else
/* multiple vci */
if (MPIDI_do_global_progress()) {
for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) {
for (int vci = 0; vci < MPIDI_global.n_total_vcis; vci++) {
MPIDI_PROGRESS(vci);
if (wait) {
MPIDI_check_progress_made_vci(state, vci);
Expand Down Expand Up @@ -201,10 +202,10 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_progress_state_init(MPID_Progress_state * st
state->vci_count = 1;
} else {
/* global progress by default */
for (int i = 0; i < MPIDI_global.n_vcis; i++) {
for (int i = 0; i < MPIDI_global.n_total_vcis; i++) {
state->vci[i] = i;
}
state->vci_count = MPIDI_global.n_vcis;
state->vci_count = MPIDI_global.n_total_vcis;
}
}

Expand All @@ -215,8 +216,9 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_progress_state_init_count(MPID_Progress_stat
#if MPIDI_CH4_MAX_VCIS == 1
state->progress_counts[0] = MPL_atomic_relaxed_load_int(&MPIDI_VCI(0).progress_count);
#else
for (int i = 0; i < MPIDI_global.n_vcis; i++) {
state->progress_counts[i] = MPL_atomic_relaxed_load_int(&MPIDI_VCI(i).progress_count);
for (int i = 0; i < state->vci_count; i++) {
state->progress_counts[i] =
MPL_atomic_relaxed_load_int(&MPIDI_VCI(state->vci[i]).progress_count);
}
#endif
}
Expand Down
5 changes: 4 additions & 1 deletion src/mpid/ch4/src/ch4_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ typedef struct MPIDI_per_vci {
MPL_atomic_uint64_t exp_seq_no;
MPL_atomic_uint64_t nxt_seq_no;

bool allocated;
char pad MPL_ATTR_ALIGNED(MPL_CACHELINE_SIZE);
} MPIDI_per_vci_t;

Expand All @@ -279,7 +280,9 @@ typedef struct MPIDI_CH4_Global_t {
int my_sigusr1_count;
#endif

int n_vcis;
int n_vcis; /* num of vcis used for implicit hashing */
int n_reserved_vcis; /* num of reserved vcis */
int n_total_vcis; /* total num of vcis, must > n_vcis + n_reserved_vcis */
MPIDI_per_vci_t per_vci[MPIDI_CH4_MAX_VCIS];

#if defined(MPIDI_CH4_USE_WORK_QUEUES)
Expand Down
Loading