diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 2a9afd352be..bfb16202e43 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -1784,22 +1784,22 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead leader_procs[1] = tmp; } - /* create a unique tag for allocating the leader communicator. we can eliminate this step - * if we take a CID from the newly allocated block belonging to local_comm. this is - * a note to make this change at a later time. */ - opal_asprintf (&sub_tag, "%s-OMPIi-LC", tag); - if (OPAL_UNLIKELY(NULL == sub_tag)) { - ompi_comm_free (&local_comm); + leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2); + ompi_set_group_rank (leader_group, my_proc); + if (OPAL_UNLIKELY(NULL == leader_group)) { free(leader_procs); + ompi_comm_free (&local_comm); return OMPI_ERR_OUT_OF_RESOURCE; } - leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2); - ompi_set_group_rank (leader_group, my_proc); - if (OPAL_UNLIKELY(NULL == leader_group)) { - free (sub_tag); + /* create a unique tag for allocating the leader communicator. we can eliminate this step + * if we take a CID from the newly allocated block belonging to local_comm. this is + * a note to make this change at a later time. */ + opal_asprintf (&sub_tag, "%s-OMPIi-LC-%s", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (leader_group, 0))); + if (OPAL_UNLIKELY(NULL == sub_tag)) { free(leader_procs); ompi_comm_free (&local_comm); + OBJ_RELEASE(leader_group); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -1809,6 +1809,7 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead rc = ompi_comm_create_from_group (leader_group, sub_tag, info, errhandler, &leader_comm); OBJ_RELEASE(leader_group); free (sub_tag); + sub_tag = NULL; if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { free(leader_procs); ompi_comm_free (&local_comm); @@ -1864,7 +1865,16 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead return rc; } - rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, NULL, false, OMPI_COMM_CID_GROUP_NEW); + /* + * append the pmix CONTEXT_ID obtained when creating the leader comm as discriminator + */ + opal_asprintf (&sub_tag, "%s-%ld", tag, data[1]); + if (OPAL_UNLIKELY(NULL == sub_tag)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) sub_tag, NULL, false, OMPI_COMM_CID_GROUP_NEW); + free (sub_tag); if ( OMPI_SUCCESS != rc ) { OBJ_RELEASE(newcomp); return rc; diff --git a/ompi/mpi/c/comm_create_from_group.c b/ompi/mpi/c/comm_create_from_group.c index 0101f84a1f8..96cf895ee07 100644 --- a/ompi/mpi/c/comm_create_from_group.c +++ b/ompi/mpi/c/comm_create_from_group.c @@ -46,6 +46,7 @@ static const char FUNC_NAME[] = "MPI_Comm_create_from_group"; int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info, MPI_Errhandler errhandler, MPI_Comm *newcomm) { int rc; + char *pmix_group_tag = NULL; MEMCHECKER( memchecker_comm(comm); @@ -89,8 +90,22 @@ int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info, } - rc = ompi_comm_create_from_group ((ompi_group_t *) group, tag, &info->super, errhandler, + /* + * we use PMIx group operations to implement comm/intercomm create from group/groups. + * PMIx group constructors require a unique tag across the processes using the same + * PMIx server. This is not equivalent to the uniqueness requirements of the tag argument + * to MPI_Comm_create_from_group and MPI_Intercomm_create_from_groups, hence an + * additional discriminator needs to be added to the user supplied tag argument. + */ + opal_asprintf (&pmix_group_tag, "%s-%s.%d", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (group, 0)), + ompi_group_size(group)); + if (OPAL_UNLIKELY(NULL == pmix_group_tag)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + rc = ompi_comm_create_from_group ((ompi_group_t *) group, pmix_group_tag, &info->super, errhandler, (ompi_communicator_t **) newcomm); + free(pmix_group_tag); if (MPI_SUCCESS != rc) { return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, rc, FUNC_NAME);