Skip to content

Commit

Permalink
Merge pull request #12960 from hppritcha/fix_for_issue10895
Browse files Browse the repository at this point in the history
comm: beef up use of PMIx_Group_construct
  • Loading branch information
hppritcha authored Dec 9, 2024
2 parents 8d71197 + 46ff698 commit 99bec5a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 12 deletions.
32 changes: 21 additions & 11 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1784,22 +1784,22 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
leader_procs[1] = tmp;
}

/* create a unique tag for allocating the leader communicator. we can eliminate this step
* if we take a CID from the newly allocated block belonging to local_comm. this is
* a note to make this change at a later time. */
opal_asprintf (&sub_tag, "%s-OMPIi-LC", tag);
if (OPAL_UNLIKELY(NULL == sub_tag)) {
ompi_comm_free (&local_comm);
leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2);
ompi_set_group_rank (leader_group, my_proc);
if (OPAL_UNLIKELY(NULL == leader_group)) {
free(leader_procs);
ompi_comm_free (&local_comm);
return OMPI_ERR_OUT_OF_RESOURCE;
}

leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2);
ompi_set_group_rank (leader_group, my_proc);
if (OPAL_UNLIKELY(NULL == leader_group)) {
free (sub_tag);
/* create a unique tag for allocating the leader communicator. we can eliminate this step
* if we take a CID from the newly allocated block belonging to local_comm. this is
* a note to make this change at a later time. */
opal_asprintf (&sub_tag, "%s-OMPIi-LC-%s", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (leader_group, 0)));
if (OPAL_UNLIKELY(NULL == sub_tag)) {
free(leader_procs);
ompi_comm_free (&local_comm);
OBJ_RELEASE(leader_group);
return OMPI_ERR_OUT_OF_RESOURCE;
}

Expand All @@ -1809,6 +1809,7 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
rc = ompi_comm_create_from_group (leader_group, sub_tag, info, errhandler, &leader_comm);
OBJ_RELEASE(leader_group);
free (sub_tag);
sub_tag = NULL;
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
free(leader_procs);
ompi_comm_free (&local_comm);
Expand Down Expand Up @@ -1864,7 +1865,16 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
return rc;
}

rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, NULL, false, OMPI_COMM_CID_GROUP_NEW);
/*
* append the pmix CONTEXT_ID obtained when creating the leader comm as discriminator
*/
opal_asprintf (&sub_tag, "%s-%ld", tag, data[1]);
if (OPAL_UNLIKELY(NULL == sub_tag)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) sub_tag, NULL, false, OMPI_COMM_CID_GROUP_NEW);
free (sub_tag);
if ( OMPI_SUCCESS != rc ) {
OBJ_RELEASE(newcomp);
return rc;
Expand Down
17 changes: 16 additions & 1 deletion ompi/mpi/c/comm_create_from_group.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ static const char FUNC_NAME[] = "MPI_Comm_create_from_group";
int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info, MPI_Errhandler errhandler,
MPI_Comm *newcomm) {
int rc;
char *pmix_group_tag = NULL;

MEMCHECKER(
memchecker_comm(comm);
Expand Down Expand Up @@ -89,8 +90,22 @@ int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info,
}


rc = ompi_comm_create_from_group ((ompi_group_t *) group, tag, &info->super, errhandler,
/*
* we use PMIx group operations to implement comm/intercomm create from group/groups.
* PMIx group constructors require a unique tag across the processes using the same
* PMIx server. This is not equivalent to the uniqueness requirements of the tag argument
* to MPI_Comm_create_from_group and MPI_Intercomm_create_from_groups, hence an
* additional discriminator needs to be added to the user supplied tag argument.
*/
opal_asprintf (&pmix_group_tag, "%s-%s.%d", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (group, 0)),
ompi_group_size(group));
if (OPAL_UNLIKELY(NULL == pmix_group_tag)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

rc = ompi_comm_create_from_group ((ompi_group_t *) group, pmix_group_tag, &info->super, errhandler,
(ompi_communicator_t **) newcomm);
free(pmix_group_tag);
if (MPI_SUCCESS != rc) {
return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type,
rc, FUNC_NAME);
Expand Down

0 comments on commit 99bec5a

Please sign in to comment.