From 9bebc54b7909b93f6b9650118c0b34af669692de Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 16:46:04 -0600 Subject: [PATCH] Line decomp parallel (#459) * IOSS: refactor to reduce strings; give flexibility and efficiency * IOSS: Start of support for parallel line decomp * IOSS: Explicit template instantiation fix * IOSS: Allow passing filename down into line_decomp * IOSS: initial line_decompose(); compile/link does not run * IOSS: Refactor to try to eliminate duplicate code * IOSS: Add some missing includes * SLICE: Fix data_storage type * Share decomp code between slice and ioss line decomp * IOSS: Fix serial build * Pull element centroid into common utils class * Templative line_decompose; pass correct vector * Handle specified in guided_decompose * Remove unused function * elementToProc does not need 64-bit range * EXPLORE: Fix behavior after bad parse warning * EXPLORE: Better warning/info message on SELECT * Minor rearrange include files * IOSS: See if this fixes/affects msys2 build * IOSS: Add some logging/hwm code to line decomp * IOSS: Another try to see how affects msys2 build * IOSS: Better hwm logging output * Unify Slice and DecompositionUtils zoltan_decompose * IOSS: Fix msys2 build * clang-format run * SLICE: Version should be updated for latest chagnes * IOSS: Reduce storage potentially; fix zoltan free call * SLICE: Fix order of file close and mpi_finalize * IOSS: Enable decomposition statistics for line decomp * IOSS: io_shell - add_processor_id_field works for exodus also * IOSS: compose output will add a proc_id map to output * IOSS: thread-safe output_processor_id_map * CI: safer variable naming --------- Co-authored-by: Greg Sjaardema --- cmake-config | 10 +- .../seacas/applications/slice/SL_Decompose.C | 495 +-------------- .../seacas/applications/slice/SL_Decompose.h | 16 +- .../seacas/applications/slice/SL_Version.h | 4 +- packages/seacas/applications/slice/Slice.C | 13 +- .../libraries/ioss/src/Ioss_ChainGenerator.C | 58 +- .../libraries/ioss/src/Ioss_Decomposition.C | 26 +- .../libraries/ioss/src/Ioss_Decomposition.h | 5 +- .../ioss/src/Ioss_DecompositionUtils.C | 581 ++++++++++++++++++ .../ioss/src/Ioss_DecompositionUtils.h | 48 ++ .../libraries/ioss/src/Ioss_FaceGenerator.C | 21 +- .../libraries/ioss/src/Ioss_FaceGenerator.h | 10 +- .../seacas/libraries/ioss/src/Ioss_Property.h | 3 +- .../ioss/src/exodus/Ioex_DatabaseIO.C | 4 +- .../ioss/src/exodus/Ioex_DecompositionData.C | 86 ++- .../ioss/src/exodus/Ioex_DecompositionData.h | 14 +- .../ioss/src/exodus/Ioex_ParallelDatabaseIO.C | 31 +- .../ioss/src/exodus/Ioex_ParallelDatabaseIO.h | 2 + .../libraries/ioss/src/main/shell_interface.C | 8 +- 19 files changed, 853 insertions(+), 582 deletions(-) create mode 100644 packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C create mode 100644 packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h diff --git a/cmake-config b/cmake-config index b786fc8800..1b9662c3ed 100755 --- a/cmake-config +++ b/cmake-config @@ -442,9 +442,9 @@ if [ "$SANITIZER" != "NO" ] ; then #sanitizer=dataflow #: DataFlowSanitizer, a general data flow analysis. #sanitizer=cfi #: control flow integrity checks. Requires -flto. #sanitizer=safe-stack #: safe stack protection against stack-based memory corruption errors. -SANITIZE="-fsanitize=${SANITIZER} -fno-omit-frame-pointer -fPIC" +OPT_SANITIZE="-fsanitize=${SANITIZER} -fno-omit-frame-pointer -fPIC" if [ "$SANITIZER" == "integer" ] ; then - SANITIZE="$SANITIZE -fno-sanitize=unsigned-integer-overflow" + OPT_SANITIZE="$OPT_SANITIZE -fno-sanitize=unsigned-integer-overflow" fi fi @@ -488,9 +488,9 @@ cmake -G "${GENERATOR}" \ -D CMAKE_CXX_COMPILER:FILEPATH=${CXX} \ -D CMAKE_C_COMPILER:FILEPATH=${CC} \ -D CMAKE_Fortran_COMPILER:FILEPATH=${FC} \ --D CMAKE_CXX_FLAGS="${CXXFLAGS} ${CXX_WARNING_FLAGS} ${SANITIZE}" \ --D CMAKE_C_FLAGS="${CFLAGS} ${C_WARNING_FLAGS} ${SANITIZE}" \ --D CMAKE_Fortran_FLAGS="${FFLAGS} ${F77_WARNING_FLAGS} ${SANITIZE}" \ +-D CMAKE_CXX_FLAGS="${CXXFLAGS} ${CXX_WARNING_FLAGS} ${OPT_SANITIZE}" \ +-D CMAKE_C_FLAGS="${CFLAGS} ${C_WARNING_FLAGS} ${OPT_SANITIZE}" \ +-D CMAKE_Fortran_FLAGS="${FFLAGS} ${F77_WARNING_FLAGS} ${OPT_SANITIZE}" \ -D Seacas_ENABLE_STRONG_C_COMPILE_WARNINGS=${EXTRA_WARNINGS} \ -D Seacas_ENABLE_STRONG_CXX_COMPILE_WARNINGS=${EXTRA_WARNINGS} \ -D CMAKE_INSTALL_RPATH:PATH=${INSTALL_PATH}/lib \ diff --git a/packages/seacas/applications/slice/SL_Decompose.C b/packages/seacas/applications/slice/SL_Decompose.C index cb27614147..a93ef45866 100644 --- a/packages/seacas/applications/slice/SL_Decompose.C +++ b/packages/seacas/applications/slice/SL_Decompose.C @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -35,11 +36,6 @@ using idx_t = int; #endif -#if USE_ZOLTAN -#include // for Zoltan_Initialize -#include // for Zoltan -#endif - extern int debug_level; extern double seacas_timer(); extern void progress(const std::string &output); @@ -126,248 +122,6 @@ namespace { [](char a, char b) { return std::tolower(a) == std::tolower(b); }); } -#if USE_ZOLTAN - template - std::tuple, std::vector, std::vector> - get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) - { - size_t element_count = region.get_property("element_count").get_int(); - - // The zoltan methods supported in slice are all geometry based - // and use the element centroid. - std::vector x(element_count); - std::vector y(element_count); - std::vector z(element_count); - - const auto *nb = region.get_node_blocks()[0]; - std::vector coor; - nb->get_field_data("mesh_model_coordinates", coor); - - const auto &blocks = region.get_element_blocks(); - size_t el = 0; - for (auto &eb : blocks) { - std::vector connectivity; - eb->get_field_data("connectivity_raw", connectivity); - size_t blk_element_count = eb->entity_count(); - size_t blk_element_nodes = eb->topology()->number_nodes(); - - for (size_t j = 0; j < blk_element_count; j++) { - for (size_t k = 0; k < blk_element_nodes; k++) { - auto node = connectivity[j * blk_element_nodes + k] - 1; - x[el] += coor[node * 3 + 0]; - y[el] += coor[node * 3 + 1]; - z[el] += coor[node * 3 + 2]; - } - x[el] /= blk_element_nodes; - y[el] /= blk_element_nodes; - z[el] /= blk_element_nodes; - el++; - } - } - return {x, y, z}; - } - /*****************************************************************************/ - /***** Global data structure used by Zoltan callbacks. *****/ - /***** Could implement Zoltan callbacks without global data structure, *****/ - /***** but using the global data structure makes implementation quick. *****/ - struct - { - size_t ndot; /* Length of x, y, z, and part (== # of elements) */ - int *vwgt; /* vertex weights */ - double *x; /* x-coordinates */ - double *y; /* y-coordinates */ - double *z; /* z-coordinates */ - } Zoltan_Data; - - /*****************************************************************************/ - /***** ZOLTAN CALLBACK FUNCTIONS *****/ - int zoltan_num_dim(void * /*data*/, int *ierr) - { - /* Return dimensionality of coordinate data. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - *ierr = ZOLTAN_OK; - if (Zoltan_Data.z != nullptr) { - return 3; - } - if (Zoltan_Data.y != nullptr) { - return 2; - } - return 1; - } - - int zoltan_num_obj(void * /*data*/, int *ierr) - { - /* Return number of objects. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - *ierr = ZOLTAN_OK; - return Zoltan_Data.ndot; - } - - void zoltan_obj_list(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids, - ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr) - { - /* Return list of object IDs. - * Return only global IDs; don't need local IDs since running in serial. - * gids are array indices for coordinate and vwgts arrays. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - std::iota(gids, gids + Zoltan_Data.ndot, 0); - if (wdim != 0) { - for (size_t i = 0; i < Zoltan_Data.ndot; i++) { - wgts[i] = static_cast(Zoltan_Data.vwgt[i]); - } - } - - *ierr = ZOLTAN_OK; - } - - void zoltan_geom(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj, - const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom, - int *ierr) - { - /* Return coordinates for objects. - * gids are array indices for coordinate arrays. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - - for (size_t i = 0; i < static_cast(nobj); i++) { - size_t j = gids[i]; - geom[i * ndim] = Zoltan_Data.x[j]; - if (ndim > 1) { - geom[i * ndim + 1] = Zoltan_Data.y[j]; - } - if (ndim > 2) { - geom[i * ndim + 2] = Zoltan_Data.z[j]; - } - } - - *ierr = ZOLTAN_OK; - } - - template - void decompose_zoltan(const Ioss::Region ®ion, int ranks, SystemInterface &interFace, - std::vector &elem_to_proc, const std::vector &weights, - IOSS_MAYBE_UNUSED INT dummy) - { - if (ranks == 1) { - return; - } - - size_t element_count = region.get_property("element_count").get_int(); - if (element_count != static_cast(static_cast(element_count))) { - fmt::print(stderr, "ERROR: Cannot have a mesh with more than 2.1 Billion elements in a " - "Zoltan decomposition.\n"); - exit(EXIT_FAILURE); - } - - auto [x, y, z] = get_element_centroid(region, dummy); - - // Copy mesh data and pointers into structure accessible from callback fns. - Zoltan_Data.ndot = element_count; - Zoltan_Data.vwgt = const_cast(Data(weights)); - - if (interFace.ignore_x_ && interFace.ignore_y_) { - Zoltan_Data.x = Data(z); - } - else if (interFace.ignore_x_ && interFace.ignore_z_) { - Zoltan_Data.x = Data(y); - } - else if (interFace.ignore_y_ && interFace.ignore_z_) { - Zoltan_Data.x = Data(x); - } - else if (interFace.ignore_x_) { - Zoltan_Data.x = Data(y); - Zoltan_Data.y = Data(z); - } - else if (interFace.ignore_y_) { - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(z); - } - else if (!interFace.ignore_z_) { - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(y); - } - else { - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(y); - Zoltan_Data.z = Data(z); - } - - // Initialize Zoltan - int argc = 0; - char **argv = nullptr; - - float ver = 0.0; - Zoltan_Initialize(argc, argv, &ver); - fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), - interFace.decomposition_method()); - - Zoltan zz(Ioss::ParallelUtils::comm_world()); - - // Register Callback functions - // Using global Zoltan_Data; could register it here instead as data field. - zz.Set_Num_Obj_Fn(zoltan_num_obj, nullptr); - zz.Set_Obj_List_Fn(zoltan_obj_list, nullptr); - zz.Set_Num_Geom_Fn(zoltan_num_dim, nullptr); - zz.Set_Geom_Multi_Fn(zoltan_geom, nullptr); - - // Set parameters for Zoltan - zz.Set_Param("DEBUG_LEVEL", "0"); - std::string str = fmt::format("{}", ranks); - zz.Set_Param("NUM_GLOBAL_PARTS", str); - zz.Set_Param("OBJ_WEIGHT_DIM", "1"); - zz.Set_Param("LB_METHOD", interFace.decomposition_method()); - zz.Set_Param("NUM_LID_ENTRIES", "0"); - zz.Set_Param("REMAP", "0"); - zz.Set_Param("RETURN_LISTS", "PARTITION_ASSIGNMENTS"); - zz.Set_Param("RCB_RECTILINEAR_BLOCKS", "1"); - - int num_global = sizeof(INT) / sizeof(ZOLTAN_ID_TYPE); - num_global = num_global < 1 ? 1 : num_global; - - // Call partitioner - int changes = 0; - int num_local = 0; - int num_import = 1; - int num_export = 1; - ZOLTAN_ID_PTR import_global_ids = nullptr; - ZOLTAN_ID_PTR import_local_ids = nullptr; - ZOLTAN_ID_PTR export_global_ids = nullptr; - ZOLTAN_ID_PTR export_local_ids = nullptr; - int *import_procs = nullptr; - int *import_to_part = nullptr; - int *export_procs = nullptr; - int *export_to_part = nullptr; - int rc = zz.LB_Partition(changes, num_global, num_local, num_import, import_global_ids, - import_local_ids, import_procs, import_to_part, num_export, - export_global_ids, export_local_ids, export_procs, export_to_part); - - if (rc != ZOLTAN_OK) { - fmt::print(stderr, "ERROR: Problem during call to Zoltan LB_Partition.\n"); - goto End; - } - - // Sanity check - if (element_count != static_cast(num_export)) { - fmt::print(stderr, "Sanity check failed; ndot {} != num_export {}.\n", element_count, - static_cast(num_export)); - goto End; - } - - elem_to_proc.resize(element_count); - for (size_t i = 0; i < element_count; i++) { - elem_to_proc[i] = export_to_part[i]; - } - - End: - /* Clean up */ - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); - } -#endif - #if USE_METIS int get_common_node_count(const Ioss::Region ®ion) { @@ -464,64 +218,6 @@ namespace { } #endif - void output_histogram(const std::vector &proc_work, size_t avg_work, size_t median) - { - fmt::print("Work-per-processor Histogram\n"); - std::array histogram{}; - - auto wmin = *std::min_element(proc_work.begin(), proc_work.end()); - auto wmax = *std::max_element(proc_work.begin(), proc_work.end()); - - size_t hist_size = std::min(size_t(16), (wmax - wmin)); - hist_size = std::min(hist_size, proc_work.size()); - - if (hist_size <= 1) { - fmt::print("\tWork is the same on all processors; no histogram needed.\n\n"); - return; - } - - auto delta = double(wmax + 1 - wmin) / hist_size; - for (const auto &pw : proc_work) { - auto bin = size_t(double(pw - wmin) / delta); - SMART_ASSERT(bin < hist_size)(bin)(hist_size); - histogram[bin]++; - } - - size_t proc_width = Ioss::Utils::number_width(proc_work.size(), true); - size_t work_width = Ioss::Utils::number_width(wmax, true); - - fmt::print("\n\t{:^{}} {:^{}}\n", "Work Range", 2 * work_width + 2, "#", proc_width); - auto hist_max = *std::max_element(histogram.begin(), histogram.end()); - for (size_t i = 0; i < hist_size; i++) { - int max_star = 50; - int star_cnt = ((double)histogram[i] / hist_max * max_star); - std::string stars(star_cnt, '*'); - for (int j = 9; j < star_cnt;) { - stars[j] = '|'; - j += 10; - } - if (histogram[i] > 0 && star_cnt == 0) { - stars = '.'; - } - size_t w1 = wmin + size_t(i * delta); - size_t w2 = wmin + size_t((i + 1) * delta); - std::string postfix; - if (w1 <= avg_work && avg_work < w2) { - postfix += "average"; - } - if (w1 <= median && median < w2) { - if (!postfix.empty()) { - postfix += ", "; - } - postfix += "median"; - } - fmt::print("\t{:{}}..{:{}} ({:{}}):\t{:{}} {}\n", fmt::group_digits(w1), work_width, - fmt::group_digits(w2), work_width, fmt::group_digits(histogram[i]), proc_width, - stars, max_star, postfix); - } - fmt::print("\n"); - } - void scale_decomp(std::vector &elem_to_proc, int iscale, size_t num_proc) { // Do the scaling (integer division...) @@ -576,15 +272,15 @@ namespace { } // namespace template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, - IOSS_MAYBE_UNUSED int dummy); + const std::vector &weights, + IOSS_MAYBE_UNUSED int dummy); template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, + const std::vector &weights, IOSS_MAYBE_UNUSED int64_t dummy); template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy) + const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy) { progress(__func__); // Populate the 'elem_to_proc' vector with a mapping from element to processor. @@ -648,13 +344,9 @@ std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface else if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || interFace.decomposition_method() == "hsfc") { -#if USE_ZOLTAN - decompose_zoltan(region, interFace.processor_count(), interFace, elem_to_proc, weights, dummy); -#else - fmt::print(stderr, "ERROR: Zoltan library not enabled in this version of slice.\n" - " The 'rcb', 'rib', and 'hsfc' methods are not available.\n\n"); - std::exit(1); -#endif + Ioss::DecompUtils::decompose_zoltan( + region, interFace.processor_count(), interFace.decomposition_method(), elem_to_proc, + weights, interFace.ignore_x_, interFace.ignore_y_, interFace.ignore_z_, dummy); } else if (interFace.decomposition_method() == "rb" || interFace.decomposition_method() == "kway") { @@ -824,174 +516,3 @@ std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface assert(elem_to_proc.size() == element_count); return elem_to_proc; } - -template -std::map> string_chains(const Ioss::chain_t &element_chains) -{ - std::map> chains; - - for (size_t i = 0; i < element_chains.size(); i++) { - auto &chain_entry = element_chains[i]; - if (chain_entry.link >= 0) { - chains[chain_entry.element].push_back(i + 1); - } - } - return chains; -} - -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); - -template -std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) -{ - auto chains = string_chains(element_chains); - - if ((debug_level & 16) != 0) { - for (const auto &[chain_root, chain_elements] : chains) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - } - - std::vector weights(element_count, 1); - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - // * Set the weights of all elements in the chain... - // * non-root = 0, root = length of chain. - for (const auto &element : chain_elements) { - weights[element - 1] = 0; - } - weights[chain_root - 1] = static_cast(chain_elements.size()); - } - return weights; -} - -template void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); -template void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); - -template -void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count) -{ - // Get a map of all chains and the elements in the chains. Map key will be root. - auto chains = string_chains(element_chains); - - // Delta: elements added/removed from each processor... - std::vector delta(proc_count); - - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - if ((debug_level & 16) != 0) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - - std::vector chain_proc_count(proc_count); - - // * get processors used by elements in the chain... - for (const auto &element : chain_elements) { - auto proc = elem_to_proc[element - 1]; - chain_proc_count[proc]++; - } - - // * Now, subtract the `delta` from each count - for (int i = 0; i < proc_count; i++) { - chain_proc_count[i] -= delta[i]; - } - - // * Assign all elements in the chain to processor at chain root - // * Update the deltas for all processors that gain/lose elements... - auto root_proc = elem_to_proc[chain_root - 1]; - for (const auto &element : chain_elements) { - if (elem_to_proc[element - 1] != root_proc) { - auto old_proc = elem_to_proc[element - 1]; - elem_to_proc[element - 1] = root_proc; - delta[root_proc]++; - delta[old_proc]--; - } - } - } - - std::vector proc_element_count(proc_count); - for (auto proc : elem_to_proc) { - proc_element_count[proc]++; - } - if ((debug_level & 32) != 0) { - fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); - fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); - } -} - -template void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements); -template void output_decomposition_statistics(const std::vector &elem_to_proc, - int proc_count, size_t number_elements); -template -void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements) -{ - // Output histogram of elements / rank... - std::vector elem_per_rank(proc_count); - for (INT proc : elem_to_proc) { - elem_per_rank[proc]++; - } - - size_t proc_width = Ioss::Utils::number_width(proc_count, false); - size_t work_width = Ioss::Utils::number_width(number_elements, true); - - auto min_work = *std::min_element(elem_per_rank.begin(), elem_per_rank.end()); - auto max_work = *std::max_element(elem_per_rank.begin(), elem_per_rank.end()); - size_t median = 0; - { - auto pw_copy(elem_per_rank); - std::nth_element(pw_copy.begin(), pw_copy.begin() + pw_copy.size() / 2, pw_copy.end()); - median = pw_copy[pw_copy.size() / 2]; - fmt::print("\nElements per processor:\n\tMinimum = {}, Maximum = {}, Median = {}, Ratio = " - "{:.3}\n\n", - fmt::group_digits(min_work), fmt::group_digits(max_work), fmt::group_digits(median), - (double)(max_work) / min_work); - } - if (min_work == max_work) { - fmt::print("\nWork on all processors is {}\n\n", fmt::group_digits(min_work)); - } - else { - int max_star = 40; - int min_star = max_star * ((double)min_work / (double)(max_work)); - min_star = std::max(1, min_star); - int delta = max_star - min_star; - - double avg_work = (double)number_elements / (double)proc_count; - for (size_t i = 0; i < elem_per_rank.size(); i++) { - int star_cnt = - (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; - std::string stars(star_cnt, '*'); - std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; - if (elem_per_rank[i] == max_work) { - fmt::print( -#if !defined __NVCC__ - fg(fmt::color::red), -#endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - (double)elem_per_rank[i] / avg_work, stars); - } - else if (elem_per_rank[i] == min_work) { - fmt::print( -#if !defined __NVCC__ - fg(fmt::color::green), -#endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); - } - else { - fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); - } - } - - // Output Histogram... - output_histogram(elem_per_rank, (size_t)avg_work, median); - } -} diff --git a/packages/seacas/applications/slice/SL_Decompose.h b/packages/seacas/applications/slice/SL_Decompose.h index f2e49c6361..8310570193 100644 --- a/packages/seacas/applications/slice/SL_Decompose.h +++ b/packages/seacas/applications/slice/SL_Decompose.h @@ -1,4 +1,4 @@ -// Copyright(C) 1999-2023 National Technology & Engineering Solutions +// Copyright(C) 1999-2024 National Technology & Engineering Solutions // of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with // NTESS, the U.S. Government retains certain rights in this software. // @@ -12,16 +12,4 @@ #pragma once template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy); - -template -void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count); - -template -void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements); - -template -std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); + const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy); diff --git a/packages/seacas/applications/slice/SL_Version.h b/packages/seacas/applications/slice/SL_Version.h index 72fb5eaf86..1560a65a3c 100644 --- a/packages/seacas/applications/slice/SL_Version.h +++ b/packages/seacas/applications/slice/SL_Version.h @@ -9,6 +9,6 @@ static const std::array qainfo{ "slice", - "2024/04/03", - "2.2.01", + "2024/06/10", + "2.3.00", }; diff --git a/packages/seacas/applications/slice/Slice.C b/packages/seacas/applications/slice/Slice.C index 9cb38e7e79..be73865ceb 100644 --- a/packages/seacas/applications/slice/Slice.C +++ b/packages/seacas/applications/slice/Slice.C @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1373,7 +1374,7 @@ namespace { Ioss::PropertyManager properties = set_properties(interFace); Ioss::chain_t element_chains; - std::vector weights; + std::vector weights; if (interFace.lineDecomp_) { element_chains = Ioss::generate_element_chains(region, interFace.lineSurfaceList_, debug_level, dummy); @@ -1381,8 +1382,8 @@ namespace { if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || interFace.decomposition_method() == "hsfc") { - weights = - line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + weights = Ioss::DecompUtils::line_decomp_weights( + element_chains, region.get_property("element_count").get_int()); progress("generate_element_weights"); } } @@ -1399,12 +1400,12 @@ namespace { if (interFace.lineDecomp_) { // Make sure all elements on a chain are on the same processor rank... - line_decomp_modify(element_chains, elem_to_proc, interFace.processor_count()); + Ioss::DecompUtils::line_decomp_modify(element_chains, elem_to_proc, + interFace.processor_count()); } if (debug_level & 32) { - output_decomposition_statistics(elem_to_proc, interFace.processor_count(), - elem_to_proc.size()); + Ioss::DecompUtils::output_decomposition_statistics(elem_to_proc, interFace.processor_count()); } if (!create_split_files) { diff --git a/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C index 8f16e84524..92d762eae9 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C @@ -84,14 +84,14 @@ namespace { template void get_line_front(Ioss::SideSet *fs, const Ioss::ElementBlock *block, - const std::string &adj_block, Ioss::chain_t &element_chains, - front_t &front) + Ioss::chain_t &element_chains, front_t &front) { + const auto adj_block_name = block->name(); Ioss::NameList blocks; fs->block_membership(blocks); for (const auto &fs_block : blocks) { - if (fs_block == adj_block) { - // This faceset has some elements that are in `adj_block` -- put those in the `front` + if (fs_block == adj_block_name) { + // This faceset has some elements that are in `adj_block_name` -- put those in the `front` // list. Get list of "sides" in this faceset... std::vector element_side; assert(fs->side_block_count() == 1); @@ -117,23 +117,19 @@ namespace { } template - front_t get_line_front(Ioss::Region ®ion, const std::string &adj_block, + front_t get_line_front(Ioss::Region ®ion, const Ioss::ElementBlock *block, Ioss::chain_t &element_chains, const std::string &surface_list) { front_t front; // Since lines can not cross element blocks, we can process everything a block at a time. - const auto *block = region.get_element_block(adj_block); assert(block != nullptr); - if (block->topology()->shape() != Ioss::ElementShape::HEX) { - fmt::print("Skipping Element Block {}; it does not contain HEX elements.\n", adj_block); - return front; - } + assert(block->topology()->shape() == Ioss::ElementShape::HEX); if (surface_list == "ALL") { const Ioss::SideSetContainer &fss = region.get_sidesets(); for (const auto &fs : fss) { - get_line_front(fs, block, adj_block, element_chains, front); + get_line_front(fs, block, element_chains, front); } } else { @@ -141,7 +137,7 @@ namespace { for (const auto &surface : selected_surfaces) { auto *sset = region.get_sideset(surface); if (sset != nullptr) { - get_line_front(sset, block, adj_block, element_chains, front); + get_line_front(sset, block, element_chains, front); } } } @@ -201,33 +197,47 @@ namespace Ioss { size_t numel = region.get_property("element_count").get_int(); Ioss::chain_t element_chains(numel); - // Generate the faces for use later... - Ioss::FaceGenerator face_generator(region); - face_generator.generate_faces((INT)0, true, true); - // Determine which element block(s) are adjacent to the faceset specifying "lines" // The `adjacent_blocks` contains the names of all element blocks that are adjacent to the // surface(s) that specify the faces at the 'root' of the lines... - Ioss::NameList adjacent_blocks = get_adjacent_blocks(region, surface_list); - if (adjacent_blocks.empty()) { + Ioss::NameList adjacent_block_names = get_adjacent_blocks(region, surface_list); + if (adjacent_block_names.empty()) { fmt::print("WARNING: No surfaces in the model matched the input surface list ({}).\n\tNo " "chains will be generated.\n", surface_list); } - for (const auto &adj_block : adjacent_blocks) { + + // Get the EB* corresponding to the EB names... + Ioss::ElementBlockContainer adjacent_blocks; + adjacent_blocks.reserve(adjacent_block_names.size()); + for (const auto &blk_name : adjacent_block_names) { + auto *eb = region.get_element_block(blk_name); + assert(eb != nullptr); + if (eb->topology()->shape() != Ioss::ElementShape::HEX) { + fmt::print("Skipping Element Block {}; it does not contain HEX elements.\n", blk_name); + } + else { + adjacent_blocks.push_back(eb); + } + } + + // Generate the faces for use later... (only generate on the blocks touching the front) + Ioss::FaceGenerator face_generator(region); + face_generator.generate_block_faces(adjacent_blocks, (INT)0, true); + + for (const auto *block : adjacent_blocks) { // Get the offset into the element_chains vector... - const auto *block = region.get_element_block(adj_block); - auto offset = block->get_offset() + 1; - auto count = block->entity_count(); + auto offset = block->get_offset() + 1; + auto count = block->entity_count(); - auto front = get_line_front(region, adj_block, element_chains, surface_list); + auto front = get_line_front(region, block, element_chains, surface_list); if (front.empty()) { continue; } // We want a vector giving us the Face for each face of each element in the block... connectivity_t face_connectivity(count); - generate_face_connectivity(face_generator.faces(adj_block), static_cast(offset), + generate_face_connectivity(face_generator.faces(block), static_cast(offset), face_connectivity); // For each face on the "front" (at the beginning the boundary sideset faces) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index efda361b47..48df8029e0 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -301,6 +301,13 @@ namespace Ioss { props.get("PARMETIS_COMMON_NODE_COUNT").get_int() > 0) { m_commonNodeCount = props.get("PARMETIS_COMMON_NODE_COUNT").get_int(); } + + if (props.exists("LINE_DECOMPOSITION")) { + // The value of the property should be a comma-separated list of surface/sideset names from + // which the lines will grow, or the value "ALL" for all surfaces in the model. + m_lineDecomp = true; + m_decompExtra = props.get("LINE_DECOMPOSITION").get_string(); + } } template IOSS_EXPORT void @@ -452,6 +459,12 @@ namespace Ioss { if (m_method == "MAP") { guided_decompose(); } + if (m_method == "SPECIFIED") { + // Currently used for line decomposition with another decomposition type. + // The line-modified decomposition is done prior to this and builds the + // `m_elementToProc` which is then used here to decompose the elements... + guided_decompose(); + } show_progress("\tfinished with decomposition method"); Ioss::sort(importElementMap); @@ -666,7 +679,7 @@ namespace Ioss { template void Decomposition::guided_decompose() { show_progress(__func__); - assert(m_method == "MAP" || m_method == "VARIABLE"); + assert(m_method == "MAP" || m_method == "VARIABLE" || m_method == "SPECIFIED"); // - Read my portion of the map / variable. // - count # of exports to each rank // -- exportElementCount[proc] @@ -675,13 +688,7 @@ namespace Ioss { // - communicate to all proc -- becomes importElementMap. // Create `exportElementIndex` from `exportElementCount` - std::string label; - if (m_method == "MAP") { - label = "map"; - } - else { - label = "variable"; - } + std::string label = m_method; // If the "m_decompExtra" string contains a comma, then the // value following the comma is either an integer "scale" @@ -699,7 +706,7 @@ namespace Ioss { // [0..m_processorCount). double scale = 1.0; auto pos = m_decompExtra.find(","); - if (pos != std::string::npos) { + if (m_method != "SPECIFIED" && pos != std::string::npos) { // Extract the string following the comma... auto scale_str = m_decompExtra.substr(pos + 1); if (scale_str == "AUTO" || scale_str == "auto") { @@ -1080,6 +1087,7 @@ namespace Ioss { #endif #if !defined(NO_ZOLTAN_SUPPORT) + template void Decomposition::zoltan_decompose(Zoltan &zz) { show_progress(__func__); diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h index 5bae8e227b..14ca00bd4e 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h @@ -277,6 +277,7 @@ namespace Ioss { void simple_decompose(); void simple_node_decompose(); void guided_decompose(); + void line_decompose(); void calculate_element_centroids(const std::vector &x, const std::vector &y, const std::vector &z); @@ -801,11 +802,13 @@ namespace Ioss { size_t m_importPreLocalNodeIndex{0}; bool m_retainFreeNodes{true}; + bool m_lineDecomp{false}; bool m_showProgress{false}; bool m_showHWM{false}; - std::vector m_elementToProc; // Used by "MAP" scheme... + std::vector m_elementToProc; // Used by "MAP" scheme... std::vector m_centroids; + std::vector m_weights; std::vector m_pointer; // Index into adjacency, processor list for each element... std::vector m_adjacency; // Size is sum of element connectivity sizes diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C new file mode 100644 index 0000000000..b065b56154 --- /dev/null +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -0,0 +1,581 @@ +/* + * Copyright(C) 2024 National Technology & Engineering Solutions + * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with + * NTESS, the U.S. Government retains certain rights in this software. + * + * See packages/seacas/LICENSE for details + */ + +#include +#include +#include +#include + +#include "Ioss_ChainGenerator.h" +#include "Ioss_CodeTypes.h" +#include "Ioss_DecompositionUtils.h" +#include "Ioss_ElementBlock.h" +#include "Ioss_NodeBlock.h" +#include "Ioss_ParallelUtils.h" +#include "Ioss_Region.h" +#include "Ioss_SmartAssert.h" + +#include +#include +#include +#if !defined __NVCC__ +#include +#endif + +#if !defined(NO_ZOLTAN_SUPPORT) +#include // for Zoltan_Initialize +#include // for Zoltan +#endif + +namespace { +#if !defined(NO_ZOLTAN_SUPPORT) + /*****************************************************************************/ + /***** Global data structure used by Zoltan callbacks. *****/ + /***** Could implement Zoltan callbacks without global data structure, *****/ + /***** but using the global data structure makes implementation quick. *****/ + struct + { + size_t ndot; /* Length of x, y, z, and part (== # of elements) */ + float *vwgt; /* vertex weights */ + double *x; /* x-coordinates */ + double *y; /* y-coordinates */ + double *z; /* z-coordinates */ + } Zoltan_Data; + + /*****************************************************************************/ + /***** ZOLTAN CALLBACK FUNCTIONS *****/ + int zoltan_num_dim(void * /*data*/, int *ierr) + { + /* Return dimensionality of coordinate data. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + *ierr = ZOLTAN_OK; + if (Zoltan_Data.z != nullptr) { + return 3; + } + if (Zoltan_Data.y != nullptr) { + return 2; + } + return 1; + } + + int zoltan_num_obj(void * /*data*/, int *ierr) + { + /* Return number of objects. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + *ierr = ZOLTAN_OK; + return Zoltan_Data.ndot; + } + + void zoltan_obj_list(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids, + ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr) + { + /* Return list of object IDs. + * Return only global IDs; don't need local IDs since running in serial. + * gids are array indices for coordinate and vwgts arrays. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + std::iota(gids, gids + Zoltan_Data.ndot, 0); + if (wdim != 0) { + for (size_t i = 0; i < Zoltan_Data.ndot; i++) { + wgts[i] = static_cast(Zoltan_Data.vwgt[i]); + } + } + + *ierr = ZOLTAN_OK; + } + + void zoltan_geom(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj, + const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom, + int *ierr) + { + /* Return coordinates for objects. + * gids are array indices for coordinate arrays. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + + for (size_t i = 0; i < static_cast(nobj); i++) { + size_t j = gids[i]; + geom[i * ndim] = Zoltan_Data.x[j]; + if (ndim > 1) { + geom[i * ndim + 1] = Zoltan_Data.y[j]; + } + if (ndim > 2) { + geom[i * ndim + 2] = Zoltan_Data.z[j]; + } + } + + *ierr = ZOLTAN_OK; + } +#endif + + template + std::map> string_chains(const Ioss::chain_t &element_chains) + { + std::map> chains; + + for (size_t i = 0; i < element_chains.size(); i++) { + auto &chain_entry = element_chains[i]; + if (chain_entry.link >= 0) { + chains[chain_entry.element].push_back(i + 1); + } + } + return chains; + } + + void output_histogram(const std::vector &proc_work, size_t avg_work, size_t median) + { + fmt::print("Work-per-processor Histogram\n"); + std::array histogram{}; + + auto wmin = *std::min_element(proc_work.begin(), proc_work.end()); + auto wmax = *std::max_element(proc_work.begin(), proc_work.end()); + + size_t hist_size = std::min(size_t(16), (wmax - wmin)); + hist_size = std::min(hist_size, proc_work.size()); + + if (hist_size <= 1) { + fmt::print("\tWork is the same on all processors; no histogram needed.\n\n"); + return; + } + + auto delta = double(wmax + 1 - wmin) / hist_size; + for (const auto &pw : proc_work) { + auto bin = size_t(double(pw - wmin) / delta); + SMART_ASSERT(bin < hist_size)(bin)(hist_size); + histogram[bin]++; + } + + size_t proc_width = Ioss::Utils::number_width(proc_work.size(), true); + size_t work_width = Ioss::Utils::number_width(wmax, true); + + fmt::print("\n\t{:^{}} {:^{}}\n", "Work Range", 2 * work_width + 2, "#", proc_width); + auto hist_max = *std::max_element(histogram.begin(), histogram.end()); + for (size_t i = 0; i < hist_size; i++) { + int max_star = 50; + int star_cnt = ((double)histogram[i] / hist_max * max_star); + std::string stars(star_cnt, '*'); + for (int j = 9; j < star_cnt;) { + stars[j] = '|'; + j += 10; + } + if (histogram[i] > 0 && star_cnt == 0) { + stars = '.'; + } + size_t w1 = wmin + size_t(i * delta); + size_t w2 = wmin + size_t((i + 1) * delta); + std::string postfix; + if (w1 <= avg_work && avg_work < w2) { + postfix += "average"; + } + if (w1 <= median && median < w2) { + if (!postfix.empty()) { + postfix += ", "; + } + postfix += "median"; + } + fmt::print("\t{:{}}..{:{}} ({:{}}):\t{:{}} {}\n", fmt::group_digits(w1), work_width, + fmt::group_digits(w2), work_width, fmt::group_digits(histogram[i]), proc_width, + stars, max_star, postfix); + } + fmt::print("\n"); + } +} // namespace + +namespace Ioss { + template IOSS_EXPORT void + DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED int dummy); + template IOSS_EXPORT void + DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED int64_t dummy); + + template + void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, + const std::string &method, std::vector &elem_to_proc, + const std::vector &weights, bool ignore_x, + bool ignore_y, bool ignore_z, IOSS_MAYBE_UNUSED INT dummy) + { +#if defined(NO_ZOLTAN_SUPPORT) + fmt::print(stderr, "ERROR: Zoltan library not enabled in this version of slice.\n" + " The 'rcb', 'rib', and 'hsfc' methods are not available.\n\n"); + std::exit(1); +#else + if (ranks == 1) { + return; + } + + size_t element_count = region.get_property("element_count").get_int(); + if (element_count != static_cast(static_cast(element_count))) { + fmt::print(stderr, "ERROR: Cannot have a mesh with more than 2.1 Billion elements in a " + "Zoltan decomposition.\n"); + exit(EXIT_FAILURE); + } + + auto [x, y, z] = Ioss::DecompUtils::get_element_centroid(region, dummy); + + // Copy mesh data and pointers into structure accessible from callback fns. + Zoltan_Data.ndot = element_count; + Zoltan_Data.vwgt = const_cast(Data(weights)); + + if (ignore_x && ignore_y) { + x.clear(); + y.clear(); + Zoltan_Data.x = Data(z); + } + else if (ignore_x && ignore_z) { + x.clear(); + z.clear(); + Zoltan_Data.x = Data(y); + } + else if (ignore_y && ignore_z) { + y.clear(); + z.clear(); + Zoltan_Data.x = Data(x); + } + else if (ignore_x) { + x.clear(); + Zoltan_Data.x = Data(y); + Zoltan_Data.y = Data(z); + } + else if (ignore_y) { + y.clear(); + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(z); + } + else if (ignore_z) { + z.clear(); + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(y); + } + else { + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(y); + Zoltan_Data.z = Data(z); + } + + // Initialize Zoltan + int argc = 0; + char **argv = nullptr; + + float ver = 0.0; + Zoltan_Initialize(argc, argv, &ver); + fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), method); + + Zoltan zz(Ioss::ParallelUtils::comm_self()); + + // Register Callback functions + // Using global Zoltan_Data; could register it here instead as data field. + zz.Set_Num_Obj_Fn(zoltan_num_obj, nullptr); + zz.Set_Obj_List_Fn(zoltan_obj_list, nullptr); + zz.Set_Num_Geom_Fn(zoltan_num_dim, nullptr); + zz.Set_Geom_Multi_Fn(zoltan_geom, nullptr); + + // Set parameters for Zoltan + zz.Set_Param("DEBUG_LEVEL", "0"); + std::string str = fmt::format("{}", ranks); + zz.Set_Param("NUM_GLOBAL_PARTS", str); + zz.Set_Param("OBJ_WEIGHT_DIM", "1"); + zz.Set_Param("LB_METHOD", method); + zz.Set_Param("NUM_LID_ENTRIES", "0"); + zz.Set_Param("REMAP", "0"); + zz.Set_Param("RETURN_LISTS", "PARTITION_ASSIGNMENTS"); + zz.Set_Param("RCB_RECTILINEAR_BLOCKS", "1"); + + int num_global = sizeof(INT) / sizeof(ZOLTAN_ID_TYPE); + num_global = num_global < 1 ? 1 : num_global; + + // Call partitioner + int changes = 0; + int num_local = 0; + int num_import = 1; + int num_export = 1; + ZOLTAN_ID_PTR import_global_ids = nullptr; + ZOLTAN_ID_PTR import_local_ids = nullptr; + ZOLTAN_ID_PTR export_global_ids = nullptr; + ZOLTAN_ID_PTR export_local_ids = nullptr; + int *import_procs = nullptr; + int *import_to_part = nullptr; + int *export_procs = nullptr; + int *export_to_part = nullptr; + int rc = zz.LB_Partition(changes, num_global, num_local, num_import, import_global_ids, + import_local_ids, import_procs, import_to_part, num_export, + export_global_ids, export_local_ids, export_procs, export_to_part); + + if (rc != ZOLTAN_OK) { + fmt::print(stderr, "ERROR: Problem during call to Zoltan LB_Partition.\n"); + goto End; + } + + // Sanity check + if (element_count != static_cast(num_export)) { + fmt::print(stderr, "Sanity check failed; ndot {} != num_export {}.\n", element_count, + static_cast(num_export)); + goto End; + } + + elem_to_proc.resize(element_count); + for (size_t i = 0; i < element_count; i++) { + elem_to_proc[i] = export_to_part[i]; + } + + End: + /* Clean up */ + Zoltan::LB_Free_Part(&import_global_ids, &import_local_ids, &import_procs, &import_to_part); + Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); +#endif + } + + template + int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc, INT dummy) + { + + Ioss::chain_t element_chains = + Ioss::generate_element_chains(region, surface_list, 0, dummy); + region.get_database()->progress("Ioss::generate_element_chains"); + + std::vector weights = + line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + region.get_database()->progress("generate_element_weights"); + + double start = Ioss::Utils::timer(); + decompose_zoltan(region, num_ranks, method, element_to_proc, weights, false, false, false, + dummy); + double end = Ioss::Utils::timer(); + fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); + region.get_database()->progress("exit decompose_elements"); + + // Make sure all elements on a chain are on the same processor rank... + line_decomp_modify(element_chains, element_to_proc, num_ranks); + + return 1; + } + + template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, + const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc, + int dummy); + template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, + const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc, + int64_t dummy); + + template + std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count) + { + int debug_level = 0; + auto chains = string_chains(element_chains); + + if ((debug_level & 16) != 0) { + for (const auto &[chain_root, chain_elements] : chains) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + } + + std::vector weights(element_count, 1); + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + // * Set the weights of all elements in the chain... + // * non-root = 0, root = length of chain. + for (const auto &element : chain_elements) { + weights[element - 1] = 0; + } + weights[chain_root - 1] = static_cast(chain_elements.size()); + } + return weights; + } + template IOSS_EXPORT std::vector + DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); + template IOSS_EXPORT std::vector + DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); + + template + void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count) + { + int debug_level = 0; + // Get a map of all chains and the elements in the chains. Map key will be root. + auto chains = string_chains(element_chains); + + // Delta: elements added/removed from each processor... + std::vector delta(proc_count); + + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + if ((debug_level & 16) != 0) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + + std::vector chain_proc_count(proc_count); + + // * get processors used by elements in the chain... + for (const auto &element : chain_elements) { + auto proc = elem_to_proc[element - 1]; + chain_proc_count[proc]++; + } + + // * Now, subtract the `delta` from each count + for (int i = 0; i < proc_count; i++) { + chain_proc_count[i] -= delta[i]; + } + + // * Assign all elements in the chain to processor at chain root + // * Update the deltas for all processors that gain/lose elements... + auto root_proc = elem_to_proc[chain_root - 1]; + for (const auto &element : chain_elements) { + if (elem_to_proc[element - 1] != root_proc) { + auto old_proc = elem_to_proc[element - 1]; + elem_to_proc[element - 1] = root_proc; + delta[root_proc]++; + delta[old_proc]--; + } + } + } + + std::vector proc_element_count(proc_count); + for (auto proc : elem_to_proc) { + proc_element_count[proc]++; + } + if ((debug_level & 32) != 0) { + fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); + fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); + } + } + + template IOSS_EXPORT void + DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); + template IOSS_EXPORT void + DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); + + void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, + int proc_count) + { + // Output histogram of elements / rank... + std::vector elem_per_rank(proc_count); + for (int proc : elem_to_proc) { + elem_per_rank[proc]++; + } + + size_t number_elements = elem_to_proc.size(); + size_t proc_width = Ioss::Utils::number_width(proc_count, false); + size_t work_width = Ioss::Utils::number_width(number_elements, true); + + auto min_work = *std::min_element(elem_per_rank.begin(), elem_per_rank.end()); + auto max_work = *std::max_element(elem_per_rank.begin(), elem_per_rank.end()); + size_t median = 0; + { + auto pw_copy(elem_per_rank); + std::nth_element(pw_copy.begin(), pw_copy.begin() + pw_copy.size() / 2, pw_copy.end()); + median = pw_copy[pw_copy.size() / 2]; + fmt::print("\nElements per processor:\n\tMinimum = {}, Maximum = {}, Median = {}, Ratio = " + "{:.3}\n\n", + fmt::group_digits(min_work), fmt::group_digits(max_work), + fmt::group_digits(median), (double)(max_work) / min_work); + } + if (min_work == max_work) { + fmt::print("Work on all processors is {}\n\n", fmt::group_digits(min_work)); + } + else { + int max_star = 40; + int min_star = max_star * ((double)min_work / (double)(max_work)); + min_star = std::max(1, min_star); + int delta = max_star - min_star; + + double avg_work = (double)number_elements / (double)proc_count; + for (size_t i = 0; i < elem_per_rank.size(); i++) { + int star_cnt = + (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; + std::string stars(star_cnt, '*'); + std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; + if (elem_per_rank[i] == max_work) { + fmt::print( +#if !defined __NVCC__ + fg(fmt::color::red), +#endif + format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + (double)elem_per_rank[i] / avg_work, stars); + } + else if (elem_per_rank[i] == min_work) { + fmt::print( +#if !defined __NVCC__ + fg(fmt::color::green), +#endif + format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + elem_per_rank[i] / avg_work, stars); + } + else { + fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + elem_per_rank[i] / avg_work, stars); + } + } + + // Output Histogram... + output_histogram(elem_per_rank, (size_t)avg_work, median); + } + } + + template + std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) + { + size_t element_count = region.get_property("element_count").get_int(); + + // The zoltan methods supported in slice are all geometry based + // and use the element centroid. + std::vector x(element_count); + std::vector y(element_count); + std::vector z(element_count); + + const auto *nb = region.get_node_blocks()[0]; + std::vector coor; + nb->get_field_data("mesh_model_coordinates", coor); + + const auto &blocks = region.get_element_blocks(); + size_t el = 0; + for (auto &eb : blocks) { + std::vector connectivity; + eb->get_field_data("connectivity_raw", connectivity); + size_t blk_element_count = eb->entity_count(); + size_t blk_element_nodes = eb->topology()->number_nodes(); + + for (size_t j = 0; j < blk_element_count; j++) { + for (size_t k = 0; k < blk_element_nodes; k++) { + auto node = connectivity[j * blk_element_nodes + k] - 1; + x[el] += coor[node * 3 + 0]; + y[el] += coor[node * 3 + 1]; + z[el] += coor[node * 3 + 2]; + } + x[el] /= blk_element_nodes; + y[el] /= blk_element_nodes; + z[el] /= blk_element_nodes; + el++; + } + } + return {x, y, z}; + } + + template IOSS_EXPORT std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int dummy); + + template IOSS_EXPORT std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int64_t dummy); + +} // namespace Ioss diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h new file mode 100644 index 0000000000..30b1b35a2c --- /dev/null +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -0,0 +1,48 @@ +/* + * Copyright(C) 2024 National Technology & Engineering Solutions + * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with + * NTESS, the U.S. Government retains certain rights in this software. + * + * See packages/seacas/LICENSE for details + */ +#pragma once + +#include "Ioss_ChainGenerator.h" +#include "Ioss_CodeTypes.h" +#include "Ioss_Region.h" +#include +#include + +#include "ioss_export.h" + +namespace Ioss { + class IOSS_EXPORT DecompUtils + { + public: + template + static void line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &element_to_proc, int proc_count); + + static void output_decomposition_statistics(const std::vector &element_to_proc, + int proc_count); + + template + static std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); + + template + static int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, + const std::string &surface_list, std::vector &element_to_proc, + INT dummy); + + template + static void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED INT dummy); + + template + static std::tuple, std::vector, std::vector> + get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy); + }; +} // namespace Ioss diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C index 0badfc563b..b167d1aae8 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C @@ -1,4 +1,4 @@ -// Copyright(C) 1999-2023 National Technology & Engineering Solutions +// Copyright(C) 1999-2024 National Technology & Engineering Solutions // of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with // NTESS, the U.S. Government retains certain rights in this software. // @@ -327,6 +327,12 @@ namespace Ioss { FaceGenerator::FaceGenerator(Ioss::Region ®ion) : region_(region) {} + FaceUnorderedSet &FaceGenerator::faces(const Ioss::ElementBlock *block) + { + auto name = block->name(); + return faces_[name]; + } + template IOSS_EXPORT void FaceGenerator::generate_faces(int, bool, bool); template IOSS_EXPORT void FaceGenerator::generate_faces(int64_t, bool, bool); @@ -334,7 +340,8 @@ namespace Ioss { void FaceGenerator::generate_faces(INT /*dummy*/, bool block_by_block, bool local_ids) { if (block_by_block) { - generate_block_faces(INT(0), local_ids); + const auto &ebs = region_.get_element_blocks(); + generate_block_faces(ebs, INT(0), local_ids); } else { generate_model_faces(INT(0), local_ids); @@ -349,7 +356,14 @@ namespace Ioss { } } - template void FaceGenerator::generate_block_faces(INT /*dummy*/, bool local_ids) + template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, + int, bool); + template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, + int64_t, bool); + + template + void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &ebs, INT /*dummy*/, + bool local_ids) { // Convert ids into hashed-ids Ioss::NodeBlock *nb = region_.get_node_blocks()[0]; @@ -370,7 +384,6 @@ namespace Ioss { auto endh = std::chrono::steady_clock::now(); #endif - const auto &ebs = region_.get_element_blocks(); for (const auto &eb : ebs) { const std::string &name = eb->name(); size_t numel = eb->entity_count(); diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h index d88745ffff..2881b08f43 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h @@ -14,6 +14,7 @@ #include #include +#include "Ioss_Region.h" #include "ioss_export.h" #define FG_USE_ROBIN @@ -28,8 +29,7 @@ #include namespace Ioss { - class Region; - + class ElementBlock; class IOSS_EXPORT Face { public: @@ -123,14 +123,18 @@ namespace Ioss { template void generate_faces(INT /*dummy*/, bool block_by_block = false, bool local_ids = false); + template + void generate_block_faces(const ElementBlockContainer &ebs, INT /*dummy*/, + bool local_ids = false); + FaceUnorderedSet &faces(const std::string &name = "ALL") { return faces_[name]; } + FaceUnorderedSet &faces(const ElementBlock *block); //! Given a local node id (0-based), return the hashed value. size_t node_id_hash(size_t local_node_id) const { return hashIds_[local_node_id]; } private: template void hash_node_ids(const std::vector &node_ids); - template void generate_block_faces(INT /*dummy*/, bool local_ids); template void generate_model_faces(INT /*dummy*/, bool local_ids); Ioss::Region ®ion_; diff --git a/packages/seacas/libraries/ioss/src/Ioss_Property.h b/packages/seacas/libraries/ioss/src/Ioss_Property.h index 2ef5656778..fdb065e695 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Property.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Property.h @@ -97,6 +97,7 @@ namespace Ioss { IOSS_NODISCARD bool operator!=(const Ioss::Property &rhs) const; IOSS_NODISCARD bool operator==(const Ioss::Property &rhs) const; +#if 0 friend void swap(Ioss::Property &first, Ioss::Property &second) noexcept { using std::swap; @@ -105,7 +106,7 @@ namespace Ioss { swap(first.origin_, second.origin_); swap(first.data_, second.data_); } - +#endif private: std::string name_{}; BasicType type_{INVALID}; diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C index 9a38458021..5e0540cc7e 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C @@ -282,8 +282,8 @@ namespace Ioex { double t_end = Ioss::Utils::timer(); double duration = util().global_minmax(t_end - t_begin, Ioss::ParallelUtils::DO_MAX); if (myProcessor == 0) { - fmt::print(Ioss::DebugOut(), "Input File Open Time = {} ({})\n", duration, - decoded_filename()); + fmt::print(Ioss::DebugOut(), "Input File Open Time = {} ({})\n", duration, + decoded_filename()); } } diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 0c4ab1e54b..734316cf2c 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -5,12 +5,16 @@ // See packages/seacas/LICENSE for details #include "Ioss_CodeTypes.h" +#include "Ioss_DecompositionUtils.h" #include "exodus/Ioex_DecompositionData.h" + #if defined PARALLEL_AWARE_EXODUS -#include "Ioss_ElementTopology.h" // for ElementTopology -#include "Ioss_Field.h" // for Field, etc -#include "Ioss_Map.h" // for Map, MapContainer -#include "Ioss_PropertyManager.h" // for PropertyManager +#include "Ioss_ElementTopology.h" +#include "Ioss_Field.h" +#include "Ioss_IOFactory.h" +#include "Ioss_Map.h" +#include "Ioss_PropertyManager.h" +#include "Ioss_Region.h" #include "Ioss_SmartAssert.h" #include "Ioss_Sort.h" #include "Ioss_Utils.h" @@ -39,8 +43,8 @@ #endif namespace { - // ZOLTAN Callback functions... + // ZOLTAN Callback functions... #if !defined(NO_ZOLTAN_SUPPORT) int zoltan_num_dim(void *data, int *ierr) { @@ -79,7 +83,12 @@ namespace { } if (wdim != 0) { - std::fill(wgts, wgts + element_count, 1.0); + if (zdata->weights().empty()) { + std::fill(wgts, wgts + element_count, 1.0); + } + else { + std::copy(zdata->weights().begin(), zdata->weights().end(), &wgts[0]); + } } if (ngid_ent == 1) { @@ -107,6 +116,7 @@ namespace { *ierr = ZOLTAN_OK; } #endif + } // namespace namespace Ioex { @@ -125,7 +135,8 @@ namespace Ioex { m_processorCount = pu.parallel_size(); } - template void DecompositionData::decompose_model(int filePtr) + template + void DecompositionData::decompose_model(int filePtr, const std::string &filename) { m_decomposition.show_progress(__func__); // Initial decomposition is linear where processor #p contains @@ -191,8 +202,16 @@ namespace Ioex { for (int i = 0; i < map_count; i++) { if (std::string(names[i]) == map_name) { m_decomposition.m_elementToProc.resize(decomp_elem_count()); - ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, - decomp_elem_count(), Data(m_decomposition.m_elementToProc)); + if (sizeof(INT) == 4) { + ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, + decomp_elem_count(), Data(m_decomposition.m_elementToProc)); + } + else { + std::vector tmp_map(decomp_elem_count()); + ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, + decomp_elem_count(), Data(tmp_map)); + std::copy(tmp_map.begin(), tmp_map.end(), m_decomposition.m_elementToProc.begin()); + } map_read = true; break; } @@ -256,6 +275,55 @@ namespace Ioex { } } + if (m_decomposition.m_lineDecomp) { + // For first iteration of this, we do the line-decomp modified decomposition on a single rank + // and then communicate the m_elementToProc vector to each of the ranks. This is then used + // do do the parallel distributions/decomposition of the elements assuming a "guided" + // decomposition. + std::vector element_to_proc_global{}; + + m_decomposition.show_progress("***LINE_DECOMPOSE BEGIN***"); + if (m_processor == 0) { + Ioss::PropertyManager properties; + Ioss::DatabaseIO *dbi = Ioss::IOFactory::create( + "exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); + Ioss::Region region(dbi, "line_decomp_region"); + + int status = Ioss::DecompUtils::line_decompose( + region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, + element_to_proc_global, INT(0)); + + if (m_decomposition.m_showHWM || m_decomposition.m_showProgress) { + Ioss::DecompUtils::output_decomposition_statistics(element_to_proc_global, m_processorCount); + } + } + // Now broadcast the parts of the `element_to_proc_global` + // vector to the owning ranks in the initial linear + // decomposition... + + std::vector sendcounts(m_processorCount); + std::vector displs(m_processorCount); + m_decomposition.m_elementToProc.resize(decomp_elem_count()); + + // calculate send counts and displacements + int sum = 0; + int rem = globalElementCount % m_processorCount; + for (int i = 0; i < m_processorCount; i++) { + sendcounts[i] = globalElementCount / m_processorCount; + if (rem > 0) { + sendcounts[i]++; + rem--; + } + displs[i] = sum; + sum += sendcounts[i]; + } + MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, + Data(m_decomposition.m_elementToProc), decomp_elem_count(), MPI_INT, 0, + m_decomposition.m_comm); + m_decomposition.m_method = "SPECIFIED"; + m_decomposition.show_progress("***LINE_DECOMPOSE END***"); + } + #if !defined(NO_ZOLTAN_SUPPORT) float version = 0.0; Zoltan_Initialize(0, nullptr, &version); diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h index 651476b517..5faa8dc3c2 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h @@ -50,11 +50,11 @@ namespace Ioex { DecompositionDataBase(const DecompositionDataBase &) = delete; DecompositionDataBase &operator=(const DecompositionDataBase &) = delete; - virtual ~DecompositionDataBase() = default; - IOSS_NODISCARD virtual int int_size() const = 0; - virtual void decompose_model(int filePtr) = 0; - IOSS_NODISCARD virtual size_t ioss_node_count() const = 0; - IOSS_NODISCARD virtual size_t ioss_elem_count() const = 0; + virtual ~DecompositionDataBase() = default; + IOSS_NODISCARD virtual int int_size() const = 0; + virtual void decompose_model(int filePtr, const std::string &filename) = 0; + IOSS_NODISCARD virtual size_t ioss_node_count() const = 0; + IOSS_NODISCARD virtual size_t ioss_elem_count() const = 0; IOSS_NODISCARD virtual int spatial_dimension() const = 0; IOSS_NODISCARD virtual size_t global_node_count() const = 0; @@ -66,6 +66,7 @@ namespace Ioex { IOSS_NODISCARD virtual size_t decomp_elem_count() const = 0; IOSS_NODISCARD virtual std::vector ¢roids() = 0; + IOSS_NODISCARD virtual std::vector &weights() = 0; Ioss_MPI_Comm comm_; @@ -120,7 +121,7 @@ namespace Ioex { IOSS_NODISCARD int int_size() const { return sizeof(INT); } - void decompose_model(int filePtr); + void decompose_model(int filePtr, const std::string &filename); IOSS_NODISCARD int spatial_dimension() const { return m_decomposition.m_spatialDimension; } @@ -136,6 +137,7 @@ namespace Ioex { IOSS_NODISCARD size_t decomp_elem_count() const { return m_decomposition.file_elem_count(); } IOSS_NODISCARD std::vector ¢roids() { return m_decomposition.m_centroids; } + IOSS_NODISCARD std::vector &weights() { return m_decomposition.m_weights; } template void communicate_element_data(T *file_data, T *ioss_data, size_t comp_count) const diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C index f819428074..b0d9f4304d 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C @@ -356,6 +356,14 @@ namespace { return total_data_size; } + void add_processor_id_map(Ioss::Region *region) + { + const auto &blocks = region->get_element_blocks(); + for (const auto &block : blocks) { + block->field_add(Ioss::Field("proc_id", block->field_int_type(), "scalar", Ioss::Field::MAP)); + } + } + } // namespace namespace Ioex { @@ -779,11 +787,11 @@ namespace Ioex { decomp = std::make_unique>(properties, util().communicator()); } assert(decomp != nullptr); - decomp->decompose_model(exoid); + decomp->decompose_model(exoid, get_filename()); read_region(); - Ioex::read_exodus_basis(get_file_pointer()); - Ioex::read_exodus_quadrature(get_file_pointer()); + Ioex::read_exodus_basis(exoid); + Ioex::read_exodus_quadrature(exoid); get_elemblocks(); @@ -4797,6 +4805,16 @@ namespace Ioex { return num_to_get; } + template + void ParallelDatabaseIO::output_processor_id_map(Ioss::Region *region, INT /*dummy*/) + { + std::vector proc_id(elementCount, myProcessor); + const auto &blocks = region->get_element_blocks(); + for (const auto &block : blocks) { + put_field_internal(block, block->get_field("proc_id"), Data(proc_id), -1); + } + } + void ParallelDatabaseIO::write_meta_data(Ioss::IfDatabaseExistsBehavior behavior) { Ioss::Region *region = get_region(); @@ -4849,7 +4867,14 @@ namespace Ioex { if (behavior != Ioss::DB_APPEND && behavior != Ioss::DB_MODIFY) { output_node_map(); + add_processor_id_map(region); output_other_metadata(); + if (int_byte_size_api() == 8) { + output_processor_id_map(region, int64_t(0)); + } + else { + output_processor_id_map(region, int(0)); + } } } diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h index b61bead2ef..2ab679de54 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h @@ -197,6 +197,8 @@ namespace Ioex { void write_entity_transient_field(const Ioss::Field &field, const Ioss::GroupingEntity *ge, int64_t count, void *variables) const; void write_meta_data(Ioss::IfDatabaseExistsBehavior behavior) override; + template + void output_processor_id_map(Ioss::Region *region, INT /*dummy*/); // Read related metadata and store it in the region... void read_region(); diff --git a/packages/seacas/libraries/ioss/src/main/shell_interface.C b/packages/seacas/libraries/ioss/src/main/shell_interface.C index ce293550ea..02724f4781 100644 --- a/packages/seacas/libraries/ioss/src/main/shell_interface.C +++ b/packages/seacas/libraries/ioss/src/main/shell_interface.C @@ -207,12 +207,10 @@ void IOShell::Interface::enroll_options() "Files are decomposed externally into a file-per-processor in a parallel run.", nullptr); -#if defined(SEACAS_HAVE_CGNS) options_.enroll( "add_processor_id_field", Ioss::GetLongOption::NoValue, - "For CGNS, add a cell-centered field whose value is the processor id of that cell", nullptr); -#endif - + "Add a cell-centered field whose value is the processor id of that cell", nullptr); + options_.enroll("serialize_io_size", Ioss::GetLongOption::MandatoryValue, "Number of processors that can perform simultaneous IO operations in " "a parallel run; 0 to disable", @@ -510,9 +508,7 @@ bool IOShell::Interface::parse_options(int argc, char **argv, int my_processor) } #if defined(SEACAS_HAVE_MPI) -#if defined(SEACAS_HAVE_CGNS) add_processor_id_field = (options_.retrieve("add_processor_id_field") != nullptr); -#endif #if !defined(NO_ZOLTAN_SUPPORT) if (options_.retrieve("rcb") != nullptr) {