From e7db71295898ab5687a09f54f836d3d0089bce05 Mon Sep 17 00:00:00 2001 From: Sonja Happ Date: Wed, 24 May 2023 09:45:38 +0200 Subject: [PATCH] mpir_pmi: Move PMI(x)_Finalize to atexit handler to enable re-init. Move the call of PMI(x)_Finalize to an atexit handler to keep the connection to the PM open between two consecutive MPI sessions. Unlike the solution implemented in PR #6337, this solution is generic and does not require changes in the PM's implementation of PMI(x)_Finalize. --- src/pmi/errnames.txt | 5 +++++ src/util/mpir_pmi.c | 26 +++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/pmi/errnames.txt b/src/pmi/errnames.txt index d1ba360f71a..1cdd89c2902 100644 --- a/src/pmi/errnames.txt +++ b/src/pmi/errnames.txt @@ -107,3 +107,8 @@ **pmix_resolve_nodes %d: PMIx_Resolve_nodes returned %d **pmix_resolve_peers: PMIx_Resolve_peers failed **pmix_resolve_peers %d: PMIx_Resolve_peers returned %d +# +# PMI finalize exit handler registration +# +**atexit_pmi_finalize: Registration of PMI finalize function in exit handler failed +**atexit_pmi_finalize %d: Registration of PMI finalize function in exit handler failed with %d \ No newline at end of file diff --git a/src/util/mpir_pmi.c b/src/util/mpir_pmi.c index 034bcd8dd03..1d37e78ed0f 100644 --- a/src/util/mpir_pmi.c +++ b/src/util/mpir_pmi.c @@ -111,10 +111,22 @@ static pmix_proc_t pmix_wcproc; static char *hwloc_topology_xmlfile; +static void MPIR_pmi_finalize_on_exit(void) +{ +#ifdef USE_PMI1_API + PMI_Finalize(); +#elif defined USE_PMI2_API + PMI2_Finalize(); +#elif defined USE_PMIX_API + PMIx_Finalize(NULL, 0); +#endif +} + int MPIR_pmi_init(void) { int mpi_errno = MPI_SUCCESS; int pmi_errno; + static bool pmi_connected = false; /* See if the user wants to override our default values */ MPL_env2int("PMI_VERSION", &pmi_version); @@ -212,6 +224,15 @@ int MPIR_pmi_init(void) world_id = 0; #endif + if (!pmi_connected) { + /* Register finalization of PM connection in exit handler */ + mpi_errno = atexit(MPIR_pmi_finalize_on_exit); + MPIR_ERR_CHKANDJUMP1(mpi_errno != 0, mpi_errno, MPI_ERR_OTHER, + "**atexit_pmi_finalize", "**atexit_pmi_finalize %d", mpi_errno); + + pmi_connected = true; + } + MPIR_Process.has_parent = has_parent; MPIR_Process.rank = rank; MPIR_Process.size = size; @@ -234,14 +255,13 @@ int MPIR_pmi_init(void) void MPIR_pmi_finalize(void) { + /* Finalize of PM interface happens in exit handler, + * here: free allocated memory */ #ifdef USE_PMI1_API - PMI_Finalize(); MPL_free(pmi_kvs_name); #elif defined(USE_PMI2_API) - PMI2_Finalize(); MPL_free(pmi_jobid); #elif defined(USE_PMIX_API) - PMIx_Finalize(NULL, 0); /* pmix_proc does not need free */ #endif