Skip to content

Commit

Permalink
Preserve MPI-I/O file hints when fapl is closed (#3755)
Browse files Browse the repository at this point in the history
* Fix for issue #3025: Save the MPI info in the file struct so H5Fget_access_plist() can retrieve it from there.
  • Loading branch information
vchoi-hdfgroup authored Oct 25, 2023
1 parent fbf77a8 commit 6578c45
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 5 deletions.
23 changes: 18 additions & 5 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,18 +402,15 @@ H5F_get_access_plist(H5F_t *f, bool app_ref)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set collective metadata read flag");
if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
MPI_Comm mpi_comm;
MPI_Info mpi_info;

/* Retrieve and set MPI communicator */
if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f)))
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI communicator");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &mpi_comm) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI communicator");

/* Retrieve and set MPI info object */
if (H5P_get(old_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI info object");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0)
/* Retrieve MPI info object */
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info object");
}
#endif /* H5_HAVE_PARALLEL */
Expand Down Expand Up @@ -1133,6 +1130,12 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
/* initialize point of no return */
f->shared->point_of_no_return = false;

#ifdef H5_HAVE_PARALLEL
/* Initialize this just in case we fail before setting this field and */
/* we try to call H5_mpi_info_free() on uninitialized memory in H5F__dest() */
f->shared->mpi_info = MPI_INFO_NULL;
#endif /* H5_HAVE_PARALLEL */

/* Copy the file creation and file access property lists into the
* new file handle. We do this early because some values might need
* to change as the file is being opened.
Expand Down Expand Up @@ -1209,6 +1212,8 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag");
if (H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->shared->coll_md_write)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag");
if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't set MPI info object");
#endif /* H5_HAVE_PARALLEL */
if (H5P_get(plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) <
0)
Expand Down Expand Up @@ -1414,6 +1419,14 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure)
f->shared->efc = NULL;
} /* end if */

#ifdef H5_HAVE_PARALLEL
if (f->shared->mpi_info != MPI_INFO_NULL) {
/* Free MPI info saved in the file struct */
if (H5_mpi_info_free(&f->shared->mpi_info) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, "can't free MPI info");
}
#endif

/* With the shutdown modifications, the contents of the metadata cache
* should be clean at this point, with the possible exception of the
* the superblock and superblock extension.
Expand Down
1 change: 1 addition & 0 deletions src/H5Fpkg.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ struct H5F_shared_t {
#ifdef H5_HAVE_PARALLEL
H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */
bool coll_md_write; /* Do all metadata writes collectively */
MPI_Info mpi_info; /* MPI info */
#endif /* H5_HAVE_PARALLEL */
};

Expand Down
102 changes: 102 additions & 0 deletions testpar/t_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1119,3 +1119,105 @@ test_evict_on_close_parallel_unsupp(void)
ret = H5Pclose(fapl_id);
VRFY((SUCCEED == ret), "H5Pclose");
}

/*
* Verify that MPI I/O hints are preserved after closing the file access property list
* as described in issue #3025
* This is a test program from the user.
*/
void
test_fapl_preserve_hints(void)
{
hid_t fid = H5I_INVALID_HID; /* HDF5 file ID */
hid_t fapl_id = H5I_INVALID_HID; /* File access plist */
const char *filename;

int nkeys_used;
bool same = false;

MPI_Info info = MPI_INFO_NULL;
const char *key = "hdf_info_fapl";
const char *value = "xyz";

MPI_Info info_used = MPI_INFO_NULL;
int flag = -1;
char value_used[20];
char key_used[20];

int i;
herr_t ret; /* Generic return value */
int mpi_ret; /* MPI return value */

filename = (const char *)GetTestParameters();

/* set up MPI parameters */
mpi_ret = MPI_Info_create(&info);
VRFY((mpi_ret >= 0), "MPI_Info_create succeeded");

mpi_ret = MPI_Info_set(info, key, value);
VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_set succeeded");

fapl_id = H5Pcreate(H5P_FILE_ACCESS);
VRFY((fapl_id != H5I_INVALID_HID), "H5Pcreate");

ret = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, info);
VRFY((ret >= 0), "H5Pset_fapl_mpio");

fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
VRFY((fid != H5I_INVALID_HID), "H5Fcreate succeeded");

ret = H5Pclose(fapl_id);
VRFY((ret >= 0), "H5Pclose succeeded");

fapl_id = H5Fget_access_plist(fid);
VRFY((fapl_id != H5I_INVALID_HID), "H5Fget_access_plist succeeded");

ret = H5Pget_fapl_mpio(fapl_id, NULL, &info_used);
VRFY((ret >= 0), "H5Pget_fapl_mpio succeeded");

VRFY((info_used != MPI_INFO_NULL), "H5Pget_fapl_mpio");

mpi_ret = MPI_Info_get_nkeys(info_used, &nkeys_used);
VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_get_nkeys succeeded");

/* Loop over the # of keys */
for (i = 0; i < nkeys_used; i++) {

/* Memset the buffers to zero */
memset(key_used, 0, 20);
memset(value_used, 0, 20);

/* Get the nth key */
mpi_ret = MPI_Info_get_nthkey(info_used, i, key_used);
VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_get_nthkey succeeded");

if (!strcmp(key_used, key)) {

mpi_ret = MPI_Info_get(info_used, key_used, 20, value_used, &flag);
VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_get succeeded");

if (!strcmp(value_used, value)) {

/* Both key_used and value_used are the same */
same = true;
break;
}
}
} /* end for */

VRFY((same == true), "key_used and value_used are the same");

ret = H5Pclose(fapl_id);
VRFY((ret >= 0), "H5Pclose succeeded");

ret = H5Fclose(fid);
VRFY((ret >= 0), "H5Fclose succeeded");

/* Free the MPI info object */
mpi_ret = MPI_Info_free(&info);
VRFY((mpi_ret >= 0), "MPI_Info_free succeeded");

mpi_ret = MPI_Info_free(&info_used);
VRFY((mpi_ret >= 0), "MPI_Info_free succeeded");

} /* end test_fapl_preserve_hints() */
2 changes: 2 additions & 0 deletions testpar/testphdf5.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,8 @@ main(int argc, char **argv)

AddTest("evictparassert", test_evict_on_close_parallel_unsupp, NULL, "Evict on close in parallel failure",
PARATESTFILE);
AddTest("fapl_preserve", test_fapl_preserve_hints, NULL, "preserve MPI I/O hints after fapl closed",
PARATESTFILE);

AddTest("idsetw", dataset_writeInd, NULL, "dataset independent write", PARATESTFILE);
AddTest("idsetr", dataset_readInd, NULL, "dataset independent read", PARATESTFILE);
Expand Down
1 change: 1 addition & 0 deletions testpar/testphdf5.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ void test_file_properties(void);
void test_delete(void);
void test_invalid_libver_bounds_file_close_assert(void);
void test_evict_on_close_parallel_unsupp(void);
void test_fapl_preserve_hints(void);
void multiple_dset_write(void);
void multiple_group_write(void);
void multiple_group_read(void);
Expand Down

0 comments on commit 6578c45

Please sign in to comment.